Host Class API Reference¶
The Host
class represents a hostname and provides methods to parse and extract information from it.
Import¶
from liburlparser import Host
Constructor¶
Host(hoststr, ignore_www=False)
Parameters¶
hoststr
(str): The hostname to parseignore_www
(bool, optional): Whether to ignore "www" in the subdomain. Default isFalse
.
Example¶
# Create a Host object
host = Host("mail.google.com")
# Create a Host object, ignoring "www" if present
host = Host("www.example.com", ignore_www=True)
Class Methods¶
from_url¶
@classmethod
def from_url(cls, urlstr, ignore_www=False)
Create a Host object from a URL string.
Parameters¶
urlstr
(str): The URL string to parseignore_www
(bool, optional): Whether to ignore "www" in the subdomain. Default isFalse
.
Returns¶
- A new
Host
object
Example¶
host = Host.from_url("https://mail.google.com/about")
print(host.domain) # google
extract_from_url¶
@classmethod
def extract_from_url(cls, urlstr)
Extract domain components from a URL string.
Parameters¶
urlstr
(str): The URL string to parse
Returns¶
- A dictionary with keys 'subdomain', 'domain', and 'suffix'
Example¶
info = Host.extract_from_url("https://mail.google.com/about")
print(info) # {'subdomain': 'mail', 'domain': 'google', 'suffix': 'com'}
extract¶
@classmethod
def extract(cls, hoststr)
Extract domain components from a host string.
Parameters¶
hoststr
(str): The hostname to parse
Returns¶
- A dictionary with keys 'subdomain', 'domain', and 'suffix'
Example¶
info = Host.extract("mail.google.com")
print(info) # {'subdomain': 'mail', 'domain': 'google', 'suffix': 'com'}
load_psl_from_path¶
@classmethod
def load_psl_from_path(cls, filepath)
Load the Public Suffix List from a file.
Parameters¶
filepath
(str): Path to the PSL file
Example¶
Host.load_psl_from_path("/path/to/public_suffix_list.dat")
load_psl_from_string¶
@classmethod
def load_psl_from_string(cls, string)
Load the Public Suffix List from a string.
Parameters¶
string
(str): The PSL content as a string
Example¶
with open("/path/to/public_suffix_list.dat", "r") as f:
psl_content = f.read()
Host.load_psl_from_string(psl_content)
is_psl_loaded¶
@classmethod
def is_psl_loaded(cls)
Check if the Public Suffix List is loaded.
Returns¶
True
if loaded,False
otherwise
Example¶
is_loaded = Host.is_psl_loaded()
print(f"PSL loaded: {is_loaded}")
removeWWW¶
@classmethod
def removeWWW(cls, hoststr)
Remove "www." from the beginning of a hostname.
Parameters¶
hoststr
(str): The hostname
Returns¶
- The hostname without "www."
Example¶
host_without_www = Host.removeWWW("www.example.com")
print(host_without_www) # example.com
Properties¶
subdomain¶
The subdomain part of the hostname.
host = Host("mail.google.com")
print(host.subdomain) # mail
domain¶
The domain part of the hostname.
host = Host("mail.google.com")
print(host.domain) # google
domain_name¶
The domain name (same as domain).
host = Host("mail.google.com")
print(host.domain_name) # google
fulldomain¶
The full domain (domain + suffix).
host = Host("mail.google.com")
print(host.fulldomain) # google.com
suffix¶
The suffix part of the hostname.
host = Host("mail.google.com")
print(host.suffix) # com
Methods¶
to_dict¶
Convert the Host object to a dictionary.
host.to_dict()
Returns¶
- A dictionary with keys 'str', 'subdomain', 'domain', 'domain_name', and 'suffix'
Example¶
host = Host("mail.google.com")
host_dict = host.to_dict()
print(host_dict)
# {'str': 'mail.google.com', 'subdomain': 'mail', 'domain': 'google', 'domain_name': 'google', 'suffix': 'com'}
to_json¶
Convert the Host object to a JSON string.
host = Host("mail.google.com")
host_json = host.to_json()
print(host_json)
# {"str": "mail.google.com", "subdomain": "mail", "domain": "google", "domain_name": "google", "suffix": "com"}
str¶
Get the string representation of the hostname.
str(host)
Returns¶
- The hostname as a string
Example¶
host = Host("mail.google.com")
print(str(host)) # mail.google.com
repr¶
Get the representation of the Host object.
repr(host)
Returns¶
- A string like
<Host :'example.com'>
Example¶
host = Host("mail.google.com")
print(repr(host)) # <Host :'mail.google.com'>
Complete Example¶
Here's a complete example that demonstrates the Host class functionality:
from liburlparser import Host
def analyze_host(host_str):
# Parse the host
host = Host(host_str)
# Print host components
print(f"Full host: {host}")
print(f"Subdomain: {host.subdomain}")
print(f"Domain: {host.domain}")
print(f"Suffix: {host.suffix}")
print(f"Full domain: {host.fulldomain}")
# Convert to dictionary and JSON
print(f"Dictionary: {host.to_dict()}")
print(f"JSON: {host.to_json()}")
# Test with different hostnames
hosts = [
"example.com",
"www.example.com",
"mail.google.com",
"blog.example.co.uk",
"a.b.c.example.org"
]
for host_str in hosts:
print(f"\nAnalyzing: {host_str}")
analyze_host(host_str)
Handling Complex Domains¶
The Host class can handle various domain structures:
# Standard domains
host = Host("example.com")
print(f"Domain: {host.domain}, Suffix: {host.suffix}") # Domain: example, Suffix: com
# Domains with subdomains
host = Host("www.mail.example.com")
print(f"Subdomain: {host.subdomain}, Domain: {host.domain}, Suffix: {host.suffix}")
# Subdomain: www.mail, Domain: example, Suffix: com
# Country-specific domains
host = Host("example.co.uk")
print(f"Domain: {host.domain}, Suffix: {host.suffix}") # Domain: example, Suffix: co.uk
# Domains with unusual TLDs
host = Host("example.museum")
print(f"Domain: {host.domain}, Suffix: {host.suffix}") # Domain: example, Suffix: museum
# IDN domains
host = Host("例子.测试")
print(f"Domain: {host.domain}, Suffix: {host.suffix}") # Domain: 例子, Suffix: 测试
Error Handling¶
It's good practice to handle potential errors when parsing hosts:
def safe_parse_host(host_str):
try:
host = Host(host_str)
return {
'success': True,
'host': host,
'domain': host.domain,
'suffix': host.suffix,
'subdomain': host.subdomain
}
except Exception as e:
return {
'success': False,
'error': str(e),
'host_str': host_str
}
# Test with valid and invalid hosts
hosts = [
"example.com",
"mail.google.com",
"", # Empty string
"invalid" # No suffix
]
for host_str in hosts:
result = safe_parse_host(host_str)
if result['success']:
print(f"Successfully parsed: {host_str} → Domain: {result['domain']}")
else:
print(f"Failed to parse: {host_str} → Error: {result['error']}")