diff --git a/README.md b/README.md
index 771798c0b..7535e6bd5 100644
--- a/README.md
+++ b/README.md
@@ -1093,6 +1093,7 @@ Our enterprise sponsors and technology partners help scale Crawl4AI to power pro
| Company | About | Sponsorship Tier |
|------|------|----------------------------|
+| | Leveraging Thordata ensures seamless compatibility with any AI/ML workflows and data infrastructure, massively accessing web data with 99.9% uptime, backed by one-on-one customer support. | 🥈 Silver |
| | NstProxy is a trusted proxy provider with over 110M+ real residential IPs, city-level targeting, 99.99% uptime, and low pricing at $0.1/GB, it delivers unmatched stability, scale, and cost-efficiency. | 🥈 Silver |
| | Scrapeless provides production-grade infrastructure for Crawling, Automation, and AI Agents, offering Scraping Browser, 4 Proxy Types and Universal Scraping API. | 🥈 Silver |
| | AI-powered Captcha solving service. Supports all major Captcha types, including reCAPTCHA, Cloudflare, and more | 🥉 Bronze |
diff --git a/crawl4ai/html2text/__init__.py b/crawl4ai/html2text/__init__.py
index ca15b4534..838b0dd22 100644
--- a/crawl4ai/html2text/__init__.py
+++ b/crawl4ai/html2text/__init__.py
@@ -312,6 +312,11 @@ def handle_tag(
) -> None:
self.current_tag = tag
+ if tag == "base" and start:
+ href = attrs.get("href")
+ if href:
+ self.baseurl = urlparse.urljoin(self.baseurl, href)
+
if self.tag_callback is not None:
if self.tag_callback(self, tag, attrs, start) is True:
return
diff --git a/tests/test_base_tag_local.py b/tests/test_base_tag_local.py
new file mode 100644
index 000000000..159dbca15
--- /dev/null
+++ b/tests/test_base_tag_local.py
@@ -0,0 +1,39 @@
+import unittest
+from crawl4ai.html2text import HTML2Text
+
+class TestBaseTag(unittest.TestCase):
+ def test_base_tag_handling(self):
+ html_content = """
+
+
+
+
+
+ Link
+
+
+ """
+
+ # Initialize parser with a different base (or empty)
+ parser = HTML2Text(baseurl="https://override.com/")
+
+ # Feed content
+ markdown = parser.handle(html_content)
+
+ print(f"Markdown Output: {markdown}")
+
+ # Expected: The link should be resolved against the tag
+ expected_url = "https://example.com/subdir/page.html"
+
+ # Current behavior (bug): It resolves against init baseurl ("https://override.com/page.html")
+ # OR if baseurl is empty, it stays relative "page.html"
+
+ if expected_url in markdown:
+ print("SUCCESS: Base tag respected.")
+ else:
+ print(f"FAILURE: Base tag ignored. Expected {expected_url} in output.")
+
+ self.assertIn(expected_url, markdown)
+
+if __name__ == "__main__":
+ unittest.main()