-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathurls.py
More file actions
42 lines (32 loc) · 1.06 KB
/
urls.py
File metadata and controls
42 lines (32 loc) · 1.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
"""
Functions for working with URLs
"""
import logging
import json
from urllib.parse import urlparse
logger = logging.getLogger(__name__)
def extract_url_domain(url):
"""
Extracts the domain name from a given URL.
For example: "https://www.google.com/" would return "google"
Args:
url (str): The URL to extract the domain name from.
Returns:
str: The domain name of the given URL.
"""
logging.debug(f"Getting domain from {json.dumps(str(url))}")
try:
if not isinstance(url, str):
logging.exception("Input is not a string.")
raise TypeError("Input must be a string.")
parsed_url = urlparse(url)
domain = parsed_url.netloc.split(".")
if domain[0] == "www":
domain = domain[1]
else:
domain = domain[0]
logging.debug(f"Got domain {json.dumps(str(domain))}")
return domain
except (ValueError, TypeError) as e:
logging.exception(f"Error extracting domain from {json.dumps(str(url))}", exc_info=e)
return None