Examples¶
This page provides practical examples of using liburlparser in Python for various URL parsing tasks.
Basic URL Parsing¶
from liburlparser import Url
# Parse a simple URL
url = Url("https://www.example.com")
print(f"Domain: {url.domain}") # Domain: example
print(f"Suffix: {url.suffix}") # Suffix: com
print(f"Subdomain: {url.subdomain}") # Subdomain: www
# Parse a more complex URL
url = Url("https://user:pass@mail.google.com:8080/path/to/page?q=test&lang=en#section")
print(f"Protocol: {url.protocol}") # Protocol: https
print(f"Userinfo: {url.userinfo}") # Userinfo: user:pass
print(f"Domain: {url.domain}") # Domain: google
print(f"Subdomain: {url.subdomain}") # Subdomain: mail
print(f"Port: {url.port}") # Port: 8080
print(f"Query: {url.query}") # Query: q=test&lang=en
print(f"Fragment: {url.fragment}") # Fragment: section
Host Parsing¶
from liburlparser import Host
# Parse a simple hostname
host = Host("example.com")
print(f"Domain: {host.domain}") # Domain: example
print(f"Suffix: {host.suffix}") # Suffix: com
print(f"Subdomain: {host.subdomain}") # Subdomain: (empty string)
# Parse a hostname with subdomain
host = Host("blog.example.co.uk")
print(f"Domain: {host.domain}") # Domain: example
print(f"Suffix: {host.suffix}") # Suffix: co.uk
print(f"Subdomain: {host.subdomain}") # Subdomain: blog
# Parse a hostname with multiple subdomain levels
host = Host("a.b.c.example.com")
print(f"Domain: {host.domain}") # Domain: example
print(f"Suffix: {host.suffix}") # Suffix: com
print(f"Subdomain: {host.subdomain}") # Subdomain: a.b.c
Extracting Host from URL¶
from liburlparser import Url, Host
# Method 1: Get the host from a URL object
url = Url("https://mail.google.com/about")
host = url.host
print(f"Host: {host}") # Host: <Host :'mail.google.com'>
# Method 2: Use the Host.from_url static method
host = Host.from_url("https://mail.google.com/about")
print(f"Domain: {host.domain}") # Domain: google
# Method 3: Just extract the host string (fastest)
host_str = Url.extract_host("https://mail.google.com/about")
print(f"Host string: {host_str}") # Host string: mail.google.com
Ignoring "www" Subdomain¶
from liburlparser import Url, Host
# Default behavior
host = Host("www.example.com")
print(f"Subdomain: '{host.subdomain}'") # Subdomain: 'www'
print(f"Domain: {host.domain}") # Domain: example
# Ignore www
host = Host("www.example.com", ignore_www=True)
print(f"Subdomain: '{host.subdomain}'") # Subdomain: ''
print(f"Domain: {host.domain}") # Domain: example
# Same for URLs
url = Url("https://www.example.com/about", ignore_www=True)
print(f"Subdomain: '{url.subdomain}'") # Subdomain: ''
print(f"Domain: {url.domain}") # Domain: example
Converting to Dictionary or JSON¶
from liburlparser import Url, Host
import json
# URL to dictionary
url = Url("https://mail.google.com/about?q=test#section")
url_dict = url.to_dict()
print(json.dumps(url_dict, indent=2))
# Output:
# {
# "str": "https://mail.google.com/about?q=test#section",
# "protocol": "https",
# "userinfo": "",
# "host": {
# "str": "mail.google.com",
# "subdomain": "mail",
# "domain": "google",
# "domain_name": "google",
# "suffix": "com"
# },
# "port": 0,
# "query": "q=test",
# "fragment": "section"
# }
# URL to JSON
url_json = url.to_json()
print(url_json)
# Output: {"str": "https://mail.google.com/about?q=test#section", "protocol": "https", "userinfo": "", "host": {"str": "mail.google.com", "subdomain": "mail", "domain": "google", "domain_name": "google", "suffix": "com"}, "port": 0, "query": "q=test", "fragment": "section"}
# Host to dictionary
host = Host("mail.google.com")
host_dict = host.to_dict()
print(json.dumps(host_dict, indent=2))
# Output:
# {
# "str": "mail.google.com",
# "subdomain": "mail",
# "domain": "google",
# "domain_name": "google",
# "suffix": "com"
# }
# Host to JSON
host_json = host.to_json()
print(host_json)
# Output: {"str": "mail.google.com", "subdomain": "mail", "domain": "google", "domain_name": "google", "suffix": "com"}
Quick Domain Extraction¶
from liburlparser import Host
# From a host string
result = Host.extract("mail.google.com")
print(result) # {'suffix': 'com', 'domain': 'google', 'subdomain': 'mail'}
# From a URL string
result = Host.extract_from_url("https://mail.google.com/about")
print(result) # {'suffix': 'com', 'domain': 'google', 'subdomain': 'mail'}
Batch Processing URLs¶
from liburlparser import Host
import csv
def extract_domains_from_csv(input_file, url_column, output_file):
with open(input_file, 'r') as infile, open(output_file, 'w', newline='') as outfile:
reader = csv.DictReader(infile)
fieldnames = list(reader.fieldnames) + ['domain', 'suffix', 'subdomain']
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
writer.writeheader()
for row in reader:
url = row[url_column]
try:
# Use the fast extract_from_url method
domain_info = Host.extract_from_url(url)
row['domain'] = domain_info['domain']
row['suffix'] = domain_info['suffix']
row['subdomain'] = domain_info['subdomain']
except Exception:
row['domain'] = ''
row['suffix'] = ''
row['subdomain'] = ''
writer.writerow(row)
# Example usage
# extract_domains_from_csv('urls.csv', 'url_column', 'output.csv')
Error Handling¶
from liburlparser import Url, Host
def safe_parse_url(url_str):
try:
url = Url(url_str)
return url
except Exception as e:
print(f"Error parsing URL '{url_str}': {e}")
return None
def safe_parse_host(host_str):
try:
host = Host(host_str)
return host
except Exception as e:
print(f"Error parsing host '{host_str}': {e}")
return None
# Test with valid and invalid URLs
urls = [
"https://example.com",
"invalid://example.com",
"https://example.com:invalid",
"not a url"
]
for url_str in urls:
url = safe_parse_url(url_str)
if url:
print(f"Successfully parsed: {url.domain}")
Working with International Domain Names (IDNs)¶
from liburlparser import Url, Host
# Parse an IDN
url = Url("https://例子.测试")
print(f"Domain: {url.domain}") # Domain: 例子
print(f"Suffix: {url.suffix}") # Suffix: 测试
host = Host("例子.测试")
print(f"Domain: {host.domain}") # Domain: 例子
print(f"Suffix: {host.suffix}") # Suffix: 测试