diff --git a/CHANGELOG.md b/CHANGELOG.md
index ebeee66..451fe77 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
+## [0.4.0] - 2025-12-28
+
+### Added
+- **Python feedparser compatibility improvements**:
+ - Field alias mappings for deprecated field names (`description` → `subtitle`, `guid` → `id`, etc.)
+ - Dict-style access on feed objects (`d['feed']['title']`, `d['entries'][0]['link']`)
+ - Container aliases (`channel` → `feed`, `items` → `entries`)
+ - Auto-URL detection in `parse()` function (URLs are automatically fetched when http feature enabled)
+ - Optional HTTP parameters (`etag`, `modified`, `user_agent`) for `parse()` and `parse_with_limits()`
+
+### Changed
+- `parse_with_limits()` now uses keyword-only `limits` parameter for consistency
+
## [0.3.0] - 2025-12-18
### Added
@@ -147,7 +160,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Comprehensive test coverage
- Documentation with examples
-[Unreleased]: https://github.com/bug-ops/feedparser-rs/compare/v0.3.0...HEAD
+[Unreleased]: https://github.com/bug-ops/feedparser-rs/compare/v0.4.0...HEAD
+[0.4.0]: https://github.com/bug-ops/feedparser-rs/compare/v0.3.0...v0.4.0
[0.3.0]: https://github.com/bug-ops/feedparser-rs/compare/v0.2.1...v0.3.0
[0.2.1]: https://github.com/bug-ops/feedparser-rs/compare/v0.2.0...v0.2.1
[0.2.0]: https://github.com/bug-ops/feedparser-rs/compare/v0.1.8...v0.2.0
diff --git a/Cargo.lock b/Cargo.lock
index cfa1984..4774667 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -536,7 +536,7 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "feedparser-rs"
-version = "0.3.0"
+version = "0.4.0"
dependencies = [
"ammonia",
"chrono",
@@ -559,7 +559,7 @@ dependencies = [
[[package]]
name = "feedparser-rs-node"
-version = "0.3.0"
+version = "0.4.0"
dependencies = [
"feedparser-rs",
"napi",
@@ -569,10 +569,11 @@ dependencies = [
[[package]]
name = "feedparser-rs-py"
-version = "0.3.0"
+version = "0.4.0"
dependencies = [
"chrono",
"feedparser-rs",
+ "once_cell",
"pyo3",
]
diff --git a/Cargo.toml b/Cargo.toml
index 9006134..ad81061 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,7 +7,7 @@ members = [
resolver = "2"
[workspace.package]
-version = "0.3.0"
+version = "0.4.0"
edition = "2024"
rust-version = "1.88.0"
authors = ["bug-ops"]
@@ -29,6 +29,7 @@ memchr = "2.7"
mockito = "1.6"
napi = "3.7"
napi-derive = "3.4"
+once_cell = "1.20"
pyo3 = "0.27"
quick-xml = "0.38"
regex = "1.11"
diff --git a/README.md b/README.md
index fd95e6f..f93638a 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@ High-performance RSS/Atom/JSON Feed parser written in Rust, with Python and Node
- **Conditional GET** — ETag/Last-Modified support for bandwidth-efficient polling
- **Podcast support** — iTunes and Podcast 2.0 namespace extensions
- **Multi-language bindings** — Native Python (PyO3) and Node.js (napi-rs) bindings
-- **Familiar API** — Inspired by Python's feedparser, easy to migrate existing code
+- **feedparser drop-in** — Dict-style access, field aliases, same API patterns as Python feedparser
## Supported Formats
@@ -146,18 +146,28 @@ See [Node.js API documentation](crates/feedparser-rs-node/README.md) for complet
### Python
```python
-import feedparser_rs
+import feedparser_rs as feedparser # Drop-in replacement
-# Parse from bytes or string
-d = feedparser_rs.parse(b'...')
+# Parse from bytes, string, or URL (auto-detected)
+d = feedparser.parse(b'...')
+d = feedparser.parse('https://example.com/feed.xml') # URL auto-detected
+
+# Attribute-style access
print(d.version) # 'rss20'
print(d.feed.title)
print(d.bozo) # True if parsing had issues
-print(d.entries[0].published_parsed) # time.struct_time
+
+# Dict-style access (feedparser-compatible)
+print(d['feed']['title'])
+print(d['entries'][0]['link'])
+
+# Deprecated field aliases work
+print(d.feed.description) # → d.feed.subtitle
+print(d.channel.title) # → d.feed.title
```
> [!NOTE]
-> Python bindings provide `time.struct_time` for date fields, matching feedparser's API for easy migration.
+> Python bindings provide full feedparser compatibility: dict-style access, field aliases, and `time.struct_time` for date fields.
## Cargo Features
diff --git a/crates/feedparser-rs-node/package.json b/crates/feedparser-rs-node/package.json
index daf8458..65877ed 100644
--- a/crates/feedparser-rs-node/package.json
+++ b/crates/feedparser-rs-node/package.json
@@ -1,6 +1,6 @@
{
"name": "feedparser-rs",
- "version": "0.3.0",
+ "version": "0.4.0",
"description": "High-performance RSS/Atom/JSON Feed parser for Node.js",
"main": "index.js",
"types": "index.d.ts",
diff --git a/crates/feedparser-rs-py/Cargo.toml b/crates/feedparser-rs-py/Cargo.toml
index 53ab369..11bfaad 100644
--- a/crates/feedparser-rs-py/Cargo.toml
+++ b/crates/feedparser-rs-py/Cargo.toml
@@ -18,6 +18,7 @@ crate-type = ["cdylib"]
feedparser-rs = { path = "../feedparser-rs-core" }
pyo3 = { workspace = true, features = ["extension-module", "chrono"] }
chrono = { workspace = true, features = ["clock"] }
+once_cell = { workspace = true }
[features]
default = ["http"]
diff --git a/crates/feedparser-rs-py/README.md b/crates/feedparser-rs-py/README.md
index 92c58fe..d0383f0 100644
--- a/crates/feedparser-rs-py/README.md
+++ b/crates/feedparser-rs-py/README.md
@@ -14,7 +14,7 @@ High-performance RSS/Atom/JSON Feed parser for Python with feedparser-compatible
- **Tolerant parsing**: Bozo flag for graceful handling of malformed feeds
- **Multi-format**: RSS 0.9x/1.0/2.0, Atom 0.3/1.0, JSON Feed 1.0/1.1
- **Podcast support**: iTunes and Podcast 2.0 namespace extensions
-- **Familiar API**: Inspired by feedparser, easy migration path
+- **feedparser-compatible**: Dict-style access, field aliases, same API patterns
- **DoS protection**: Built-in resource limits
## Installation
@@ -33,15 +33,20 @@ pip install feedparser-rs
```python
import feedparser_rs
-# Parse from string or bytes
+# Parse from string, bytes, or URL (auto-detected)
d = feedparser_rs.parse('...')
d = feedparser_rs.parse(b'...')
+d = feedparser_rs.parse('https://example.com/feed.xml') # URL auto-detected
-# Access data
+# Attribute-style access (feedparser-compatible)
print(d.feed.title)
print(d.version) # "rss20", "atom10", etc.
print(d.bozo) # True if parsing errors occurred
+# Dict-style access (feedparser-compatible)
+print(d['feed']['title'])
+print(d['entries'][0]['link'])
+
for entry in d.entries:
print(entry.title)
print(entry.published_parsed) # time.struct_time
@@ -55,35 +60,63 @@ for entry in d.entries:
```python
import feedparser_rs
-# Fetch and parse in one call
+# Option 1: Auto-detection (recommended)
+d = feedparser_rs.parse('https://example.com/feed.xml')
+
+# Option 2: Explicit URL function
d = feedparser_rs.parse_url('https://example.com/feed.xml')
-print(d.feed.title)
-print(f"Fetched {len(d.entries)} entries")
+# With conditional GET for efficient polling
+d = feedparser_rs.parse(
+ 'https://example.com/feed.xml',
+ etag=cached_etag,
+ modified=cached_modified
+)
+if d.status == 304:
+ print("Feed not modified")
# With custom limits
limits = feedparser_rs.ParserLimits(max_entries=100)
-d = feedparser_rs.parse_url_with_limits('https://example.com/feed.xml', limits)
+d = feedparser_rs.parse_with_limits('https://example.com/feed.xml', limits=limits)
```
> [!TIP]
-> `parse_url` supports automatic compression (gzip, deflate, brotli) and follows redirects.
+> URL fetching supports automatic compression (gzip, deflate, brotli) and follows redirects.
## Migration from feedparser
+feedparser-rs is designed as a drop-in replacement for Python feedparser:
+
```python
-# Option 1: alias import
+# Drop-in replacement
import feedparser_rs as feedparser
-d = feedparser.parse(feed_content)
-# Option 2: direct import
-import feedparser_rs
-d = feedparser_rs.parse(feed_content)
+# Same API patterns work
+d = feedparser.parse('https://example.com/feed.xml')
+print(d.feed.title)
+print(d['feed']['title']) # Dict-style access works too
+print(d.entries[0].link)
-# Option 3: URL fetching (new!)
-d = feedparser_rs.parse_url('https://example.com/feed.xml')
+# Deprecated field names supported
+print(d.feed.description) # → d.feed.subtitle
+print(d.channel.title) # → d.feed.title
+print(d.items[0].guid) # → d.entries[0].id
```
+### Supported Field Aliases
+
+| Old Name | Maps To |
+|----------|---------|
+| `feed.description` | `feed.subtitle` or `feed.summary` |
+| `feed.tagline` | `feed.subtitle` |
+| `feed.copyright` | `feed.rights` |
+| `feed.modified` | `feed.updated` |
+| `channel` | `feed` |
+| `items` | `entries` |
+| `entry.guid` | `entry.id` |
+| `entry.description` | `entry.summary` |
+| `entry.issued` | `entry.published` |
+
## Advanced Usage
### Custom Resource Limits
@@ -98,7 +131,7 @@ limits = feedparser_rs.ParserLimits(
max_links_per_entry=50,
)
-d = feedparser_rs.parse_with_limits(feed_data, limits)
+d = feedparser_rs.parse_with_limits(feed_data, limits=limits)
```
### Format Detection
@@ -132,20 +165,23 @@ for entry in d.entries:
### Functions
-- `parse(source)` — Parse feed from bytes or str
-- `parse_url(url)` — Fetch and parse feed from URL
-- `parse_with_limits(source, limits)` — Parse with custom resource limits
-- `parse_url_with_limits(url, limits)` — Fetch and parse with custom limits
+- `parse(source, etag=None, modified=None, user_agent=None)` — Parse feed from bytes, str, or URL (auto-detected)
+- `parse_url(url, etag=None, modified=None, user_agent=None)` — Fetch and parse feed from URL
+- `parse_with_limits(source, etag=None, modified=None, user_agent=None, limits=None)` — Parse with custom resource limits
+- `parse_url_with_limits(url, etag=None, modified=None, user_agent=None, limits=None)` — Fetch and parse with custom limits
- `detect_format(source)` — Detect feed format without full parsing
### Classes
-- `FeedParserDict` — Parsed feed result
- - `.feed` — Feed metadata
- - `.entries` — List of entries
+- `FeedParserDict` — Parsed feed result (supports both attribute and dict-style access)
+ - `.feed` / `['feed']` — Feed metadata
+ - `.entries` / `['entries']` — List of entries
- `.bozo` — True if parsing errors occurred
- `.version` — Feed version string
- `.encoding` — Character encoding
+ - `.status` — HTTP status code (for URL fetches)
+ - `.etag` — ETag header (for conditional GET)
+ - `.modified` — Last-Modified header (for conditional GET)
- `ParserLimits` — Resource limits configuration
diff --git a/crates/feedparser-rs-py/pyproject.toml b/crates/feedparser-rs-py/pyproject.toml
index c3c88ea..855f575 100644
--- a/crates/feedparser-rs-py/pyproject.toml
+++ b/crates/feedparser-rs-py/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "maturin"
[project]
name = "feedparser-rs"
-version = "0.3.0"
+version = "0.4.0"
description = "High-performance RSS/Atom/JSON Feed parser with feedparser-compatible API"
readme = "README.md"
license = { text = "MIT OR Apache-2.0" }
diff --git a/crates/feedparser-rs-py/src/lib.rs b/crates/feedparser-rs-py/src/lib.rs
index 84847a7..ef82276 100644
--- a/crates/feedparser-rs-py/src/lib.rs
+++ b/crates/feedparser-rs-py/src/lib.rs
@@ -40,39 +40,136 @@ fn _feedparser_rs(m: &Bound<'_, PyModule>) -> PyResult<()> {
Ok(())
}
-/// Parse an RSS/Atom/JSON Feed from bytes or string
+/// Parse an RSS/Atom/JSON Feed from bytes, string, or URL
+///
+/// Automatically detects whether `source` is a URL (http://, https://) or content.
+/// For URLs, fetches and parses the feed. For content, parses directly.
+///
+/// # Arguments
+///
+/// * `source` - URL string, feed content string, or bytes
+/// * `etag` - Optional ETag from previous fetch (for URLs with conditional GET)
+/// * `modified` - Optional Last-Modified timestamp (for URLs with conditional GET)
+/// * `user_agent` - Optional custom User-Agent header (for URLs)
+///
+/// # Examples
+///
+/// ```python
+/// import feedparser_rs
+///
+/// # Parse from URL (auto-detected)
+/// feed = feedparser_rs.parse("https://example.com/feed.xml")
+///
+/// # Parse from content
+/// feed = feedparser_rs.parse("...")
+///
+/// # Parse from URL with caching
+/// feed = feedparser_rs.parse(
+/// "https://example.com/feed.xml",
+/// etag=cached_etag,
+/// modified=cached_modified
+/// )
+/// ```
#[pyfunction]
-#[pyo3(signature = (source, /))]
-fn parse(py: Python<'_>, source: &Bound<'_, PyAny>) -> PyResult {
- parse_with_limits(py, source, None)
+#[pyo3(signature = (source, /, etag=None, modified=None, user_agent=None))]
+fn parse(
+ py: Python<'_>,
+ source: &Bound<'_, PyAny>,
+ etag: Option<&str>,
+ modified: Option<&str>,
+ user_agent: Option<&str>,
+) -> PyResult {
+ parse_internal(py, source, etag, modified, user_agent, None)
}
/// Parse with custom resource limits for DoS protection
+///
+/// Like `parse()` but allows specifying custom limits for untrusted feeds.
+///
+/// # Arguments
+///
+/// * `source` - URL string, feed content string, or bytes
+/// * `etag` - Optional ETag from previous fetch (for URLs)
+/// * `modified` - Optional Last-Modified timestamp (for URLs)
+/// * `user_agent` - Optional custom User-Agent header (for URLs)
+/// * `limits` - Optional parser limits for DoS protection
+///
+/// # Examples
+///
+/// ```python
+/// import feedparser_rs
+///
+/// limits = feedparser_rs.ParserLimits.strict()
+///
+/// # Parse from URL with limits
+/// feed = feedparser_rs.parse_with_limits(
+/// "https://example.com/feed.xml",
+/// limits=limits
+/// )
+///
+/// # Parse from content with limits
+/// feed = feedparser_rs.parse_with_limits("...", limits=limits)
+/// ```
#[pyfunction]
-#[pyo3(signature = (source, limits=None))]
+#[pyo3(signature = (source, /, etag=None, modified=None, user_agent=None, limits=None))]
fn parse_with_limits(
py: Python<'_>,
source: &Bound<'_, PyAny>,
+ etag: Option<&str>,
+ modified: Option<&str>,
+ user_agent: Option<&str>,
limits: Option<&PyParserLimits>,
) -> PyResult {
- let bytes: Vec = if let Ok(s) = source.extract::() {
+ parse_internal(py, source, etag, modified, user_agent, limits)
+}
+
+/// Internal parse function that handles both URL and content sources
+fn parse_internal(
+ py: Python<'_>,
+ source: &Bound<'_, PyAny>,
+ etag: Option<&str>,
+ modified: Option<&str>,
+ user_agent: Option<&str>,
+ limits: Option<&PyParserLimits>,
+) -> PyResult {
+ // Try to extract as string first
+ if let Ok(s) = source.extract::() {
+ // Check if it's a URL
if s.starts_with("http://") || s.starts_with("https://") {
- return Err(pyo3::exceptions::PyNotImplementedError::new_err(
- "URL fetching not implemented. Use requests.get(url).content",
- ));
+ // Handle URL - requires http feature
+ #[cfg(feature = "http")]
+ {
+ let parser_limits = limits.map(|l| l.to_core_limits()).unwrap_or_default();
+ let parsed =
+ core::parse_url_with_limits(&s, etag, modified, user_agent, parser_limits)
+ .map_err(convert_feed_error)?;
+ return PyParsedFeed::from_core(py, parsed);
+ }
+ #[cfg(not(feature = "http"))]
+ {
+ return Err(pyo3::exceptions::PyNotImplementedError::new_err(
+ "URL fetching requires the 'http' feature. Build with: maturin develop --features http",
+ ));
+ }
}
- s.into_bytes()
- } else if let Ok(b) = source.extract::>() {
- b
- } else {
- return Err(pyo3::exceptions::PyTypeError::new_err(
- "source must be str or bytes",
- ));
- };
- let parser_limits = limits.map(|l| l.to_core_limits()).unwrap_or_default();
- let parsed = core::parse_with_limits(&bytes, parser_limits).map_err(convert_feed_error)?;
- PyParsedFeed::from_core(py, parsed)
+ // Parse as content
+ let parser_limits = limits.map(|l| l.to_core_limits()).unwrap_or_default();
+ let parsed =
+ core::parse_with_limits(s.as_bytes(), parser_limits).map_err(convert_feed_error)?;
+ return PyParsedFeed::from_core(py, parsed);
+ }
+
+ // Try to extract as bytes
+ if let Ok(b) = source.extract::>() {
+ let parser_limits = limits.map(|l| l.to_core_limits()).unwrap_or_default();
+ let parsed = core::parse_with_limits(&b, parser_limits).map_err(convert_feed_error)?;
+ return PyParsedFeed::from_core(py, parsed);
+ }
+
+ Err(pyo3::exceptions::PyTypeError::new_err(
+ "source must be str, bytes, or URL",
+ ))
}
/// Detect feed format without full parsing
diff --git a/crates/feedparser-rs-py/src/types/compat.rs b/crates/feedparser-rs-py/src/types/compat.rs
new file mode 100644
index 0000000..930c9eb
--- /dev/null
+++ b/crates/feedparser-rs-py/src/types/compat.rs
@@ -0,0 +1,123 @@
+use once_cell::sync::Lazy;
+/// Python feedparser backward compatibility field mappings.
+///
+/// This module provides field alias mappings for deprecated Python feedparser field names.
+/// Old field names map to new field names for backward compatibility.
+///
+/// Example: `feed.description` → `feed.subtitle`
+/// `entry.guid` → `entry.id`
+use std::collections::HashMap;
+
+/// Feed-level field mappings: old name → list of new names (tried in order).
+///
+/// Some aliases can map to multiple fields (e.g., description → subtitle OR summary).
+/// The resolver tries each new field in order until it finds a non-None value.
+pub static FEED_FIELD_MAP: Lazy>> = Lazy::new(|| {
+ let mut map = HashMap::new();
+
+ // Description aliases
+ map.insert("description", vec!["subtitle", "summary"]);
+ map.insert(
+ "description_detail",
+ vec!["subtitle_detail", "summary_detail"],
+ );
+
+ // Tagline aliases (old Atom 0.3 field)
+ map.insert("tagline", vec!["subtitle"]);
+ map.insert("tagline_detail", vec!["subtitle_detail"]);
+
+ // Info alias (RSS 1.0)
+ map.insert("info", vec!["subtitle"]);
+ map.insert("info_detail", vec!["subtitle_detail"]);
+
+ // Copyright alias
+ map.insert("copyright", vec!["rights"]);
+ map.insert("copyright_detail", vec!["rights_detail"]);
+
+ // Modified alias
+ map.insert("modified", vec!["updated"]);
+ map.insert("modified_parsed", vec!["updated_parsed"]);
+
+ // Date alias (generic fallback)
+ map.insert("date", vec!["updated", "published"]);
+ map.insert("date_parsed", vec!["updated_parsed", "published_parsed"]);
+
+ // URL alias
+ map.insert("url", vec!["link"]);
+
+ map
+});
+
+/// Entry-level field mappings: old name → list of new names (tried in order).
+pub static ENTRY_FIELD_MAP: Lazy>> = Lazy::new(|| {
+ let mut map = HashMap::new();
+
+ // GUID alias (RSS)
+ map.insert("guid", vec!["id"]);
+
+ // Description alias
+ map.insert("description", vec!["summary"]);
+ map.insert("description_detail", vec!["summary_detail"]);
+
+ // Issued alias (old feedparser field)
+ map.insert("issued", vec!["published"]);
+ map.insert("issued_parsed", vec!["published_parsed"]);
+
+ // Modified alias
+ map.insert("modified", vec!["updated"]);
+ map.insert("modified_parsed", vec!["updated_parsed"]);
+
+ // Date alias (generic fallback)
+ map.insert("date", vec!["updated", "published"]);
+ map.insert("date_parsed", vec!["updated_parsed", "published_parsed"]);
+
+ map
+});
+
+/// Container-level field mappings for PyParsedFeed.
+pub static CONTAINER_FIELD_MAP: Lazy> = Lazy::new(|| {
+ let mut map = HashMap::new();
+
+ // RSS uses , Atom uses
+ map.insert("channel", "feed");
+
+ // RSS uses - , Atom uses
+ map.insert("items", "entries");
+
+ map
+});
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_feed_field_map_description() {
+ let targets = FEED_FIELD_MAP.get("description").unwrap();
+ assert_eq!(targets, &vec!["subtitle", "summary"]);
+ }
+
+ #[test]
+ fn test_feed_field_map_modified() {
+ let targets = FEED_FIELD_MAP.get("modified").unwrap();
+ assert_eq!(targets, &vec!["updated"]);
+ }
+
+ #[test]
+ fn test_entry_field_map_guid() {
+ let targets = ENTRY_FIELD_MAP.get("guid").unwrap();
+ assert_eq!(targets, &vec!["id"]);
+ }
+
+ #[test]
+ fn test_entry_field_map_issued() {
+ let targets = ENTRY_FIELD_MAP.get("issued").unwrap();
+ assert_eq!(targets, &vec!["published"]);
+ }
+
+ #[test]
+ fn test_container_field_map_channel() {
+ let target = CONTAINER_FIELD_MAP.get("channel").unwrap();
+ assert_eq!(*target, "feed");
+ }
+}
diff --git a/crates/feedparser-rs-py/src/types/entry.rs b/crates/feedparser-rs-py/src/types/entry.rs
index c518de4..6ad853c 100644
--- a/crates/feedparser-rs-py/src/types/entry.rs
+++ b/crates/feedparser-rs-py/src/types/entry.rs
@@ -1,7 +1,9 @@
use feedparser_rs::Entry as CoreEntry;
+use pyo3::exceptions::{PyAttributeError, PyKeyError};
use pyo3::prelude::*;
use super::common::{PyContent, PyEnclosure, PyLink, PyPerson, PySource, PyTag, PyTextConstruct};
+use super::compat::ENTRY_FIELD_MAP;
use super::datetime::optional_datetime_to_struct_time;
use super::geo::PyGeoLocation;
use super::media::{PyMediaContent, PyMediaThumbnail};
@@ -301,4 +303,416 @@ impl PyEntry {
self.inner.id.as_deref().unwrap_or("no-id")
)
}
+
+ /// Provides backward compatibility for deprecated Python feedparser field names.
+ ///
+ /// Maps old field names to their modern equivalents:
+ /// - `guid` → `id`
+ /// - `description` → `summary`
+ /// - `issued` → `published`
+ /// - `modified` → `updated`
+ /// - `date` → `updated` (or `published` as fallback)
+ ///
+ /// This method is called by Python when normal attribute lookup fails.
+ fn __getattr__(&self, py: Python<'_>, name: &str) -> PyResult> {
+ // Check if this is a deprecated field name
+ if let Some(new_names) = ENTRY_FIELD_MAP.get(name) {
+ // Try each new field name in order
+ for new_name in new_names {
+ let value: Option> = match *new_name {
+ "id" => self
+ .inner
+ .id
+ .as_deref()
+ .and_then(|v| v.into_pyobject(py).map(|o| o.unbind().into()).ok()),
+ "summary" => self
+ .inner
+ .summary
+ .as_deref()
+ .and_then(|v| v.into_pyobject(py).map(|o| o.unbind().into()).ok()),
+ "summary_detail" => self.inner.summary_detail.as_ref().and_then(|tc| {
+ Py::new(py, PyTextConstruct::from_core(tc.clone()))
+ .ok()
+ .map(|p: Py| p.into_any())
+ }),
+ "published" => self.inner.published.and_then(|dt| {
+ dt.to_rfc3339()
+ .into_pyobject(py)
+ .map(|o| o.unbind().into())
+ .ok()
+ }),
+ "published_parsed" => {
+ optional_datetime_to_struct_time(py, &self.inner.published)
+ .ok()
+ .flatten()
+ }
+ "updated" => self.inner.updated.and_then(|dt| {
+ dt.to_rfc3339()
+ .into_pyobject(py)
+ .map(|o| o.unbind().into())
+ .ok()
+ }),
+ "updated_parsed" => optional_datetime_to_struct_time(py, &self.inner.updated)
+ .ok()
+ .flatten(),
+ _ => None,
+ };
+
+ // If we found a value, return it
+ if let Some(v) = value {
+ return Ok(v);
+ }
+ }
+ }
+
+ // Field not found - raise AttributeError
+ Err(PyAttributeError::new_err(format!(
+ "'Entry' object has no attribute '{}'",
+ name
+ )))
+ }
+
+ /// Provides dict-style access to fields for Python feedparser compatibility.
+ ///
+ /// Supports both modern field names and deprecated aliases.
+ /// This method is called by Python when using dict-style access: `entry['title']`.
+ ///
+ /// Raises KeyError for unknown keys (unlike __getattr__ which raises AttributeError).
+ fn __getitem__(&self, py: Python<'_>, key: &str) -> PyResult> {
+ // Check for known fields first
+ match key {
+ "id" => Ok(self
+ .inner
+ .id
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "title" => Ok(self
+ .inner
+ .title
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "title_detail" => {
+ if let Some(ref tc) = self.inner.title_detail {
+ Ok(Py::new(py, PyTextConstruct::from_core(tc.clone()))?.into_any())
+ } else {
+ Ok(py.None())
+ }
+ }
+ "link" => Ok(self
+ .inner
+ .link
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "links" => {
+ let links: Vec<_> = self
+ .inner
+ .links
+ .iter()
+ .map(|l| PyLink::from_core(l.clone()))
+ .collect();
+ Ok(links.into_pyobject(py)?.into_any().unbind())
+ }
+ "summary" => Ok(self
+ .inner
+ .summary
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "summary_detail" => {
+ if let Some(ref tc) = self.inner.summary_detail {
+ Ok(Py::new(py, PyTextConstruct::from_core(tc.clone()))?.into_any())
+ } else {
+ Ok(py.None())
+ }
+ }
+ "content" => {
+ let content: Vec<_> = self
+ .inner
+ .content
+ .iter()
+ .map(|c| PyContent::from_core(c.clone()))
+ .collect();
+ Ok(content.into_pyobject(py)?.into_any().unbind())
+ }
+ "published" => Ok(self
+ .inner
+ .published
+ .map(|dt| dt.to_rfc3339())
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "published_parsed" => Ok(optional_datetime_to_struct_time(py, &self.inner.published)?
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "updated" => Ok(self
+ .inner
+ .updated
+ .map(|dt| dt.to_rfc3339())
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "updated_parsed" => Ok(optional_datetime_to_struct_time(py, &self.inner.updated)?
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "created" => Ok(self
+ .inner
+ .created
+ .map(|dt| dt.to_rfc3339())
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "created_parsed" => Ok(optional_datetime_to_struct_time(py, &self.inner.created)?
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "expired" => Ok(self
+ .inner
+ .expired
+ .map(|dt| dt.to_rfc3339())
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "expired_parsed" => Ok(optional_datetime_to_struct_time(py, &self.inner.expired)?
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "author" => Ok(self
+ .inner
+ .author
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "author_detail" => {
+ if let Some(ref p) = self.inner.author_detail {
+ Ok(Py::new(py, PyPerson::from_core(p.clone()))?.into_any())
+ } else {
+ Ok(py.None())
+ }
+ }
+ "authors" => {
+ let authors: Vec<_> = self
+ .inner
+ .authors
+ .iter()
+ .map(|p| PyPerson::from_core(p.clone()))
+ .collect();
+ Ok(authors.into_pyobject(py)?.into_any().unbind())
+ }
+ "contributors" => {
+ let contributors: Vec<_> = self
+ .inner
+ .contributors
+ .iter()
+ .map(|p| PyPerson::from_core(p.clone()))
+ .collect();
+ Ok(contributors.into_pyobject(py)?.into_any().unbind())
+ }
+ "publisher" => Ok(self
+ .inner
+ .publisher
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "publisher_detail" => {
+ if let Some(ref p) = self.inner.publisher_detail {
+ Ok(Py::new(py, PyPerson::from_core(p.clone()))?.into_any())
+ } else {
+ Ok(py.None())
+ }
+ }
+ "tags" => {
+ let tags: Vec<_> = self
+ .inner
+ .tags
+ .iter()
+ .map(|t| PyTag::from_core(t.clone()))
+ .collect();
+ Ok(tags.into_pyobject(py)?.into_any().unbind())
+ }
+ "enclosures" => {
+ let enclosures: Vec<_> = self
+ .inner
+ .enclosures
+ .iter()
+ .map(|e| PyEnclosure::from_core(e.clone()))
+ .collect();
+ Ok(enclosures.into_pyobject(py)?.into_any().unbind())
+ }
+ "comments" => Ok(self
+ .inner
+ .comments
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "source" => {
+ if let Some(ref s) = self.inner.source {
+ Ok(Py::new(py, PySource::from_core(s.clone()))?.into_any())
+ } else {
+ Ok(py.None())
+ }
+ }
+ "itunes" => {
+ if let Some(ref i) = self.inner.itunes {
+ Ok(Py::new(py, PyItunesEntryMeta::from_core(i.as_ref().clone()))?.into_any())
+ } else {
+ Ok(py.None())
+ }
+ }
+ "podcast_transcripts" => {
+ let transcripts: Vec<_> = self
+ .inner
+ .podcast_transcripts
+ .iter()
+ .map(|t| PyPodcastTranscript::from_core(t.clone()))
+ .collect();
+ Ok(transcripts.into_pyobject(py)?.into_any().unbind())
+ }
+ "podcast_persons" => {
+ let persons: Vec<_> = self
+ .inner
+ .podcast_persons
+ .iter()
+ .map(|p| PyPodcastPerson::from_core(p.clone()))
+ .collect();
+ Ok(persons.into_pyobject(py)?.into_any().unbind())
+ }
+ "license" => Ok(self
+ .inner
+ .license
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "geo" => {
+ if let Some(ref g) = self.inner.geo {
+ Ok(Py::new(py, PyGeoLocation::from_core(g.as_ref().clone()))?.into_any())
+ } else {
+ Ok(py.None())
+ }
+ }
+ "dc_creator" => Ok(self
+ .inner
+ .dc_creator
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "dc_date" => Ok(self
+ .inner
+ .dc_date
+ .map(|dt| dt.to_rfc3339())
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "dc_date_parsed" => Ok(optional_datetime_to_struct_time(py, &self.inner.dc_date)?
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "dc_rights" => Ok(self
+ .inner
+ .dc_rights
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "dc_subject" => Ok(self
+ .inner
+ .dc_subject
+ .clone()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "media_thumbnails" => {
+ let thumbnails: Vec<_> = self
+ .inner
+ .media_thumbnails
+ .iter()
+ .map(|t| PyMediaThumbnail::from_core(t.clone()))
+ .collect();
+ Ok(thumbnails.into_pyobject(py)?.into_any().unbind())
+ }
+ "media_content" => {
+ let content: Vec<_> = self
+ .inner
+ .media_content
+ .iter()
+ .map(|c| PyMediaContent::from_core(c.clone()))
+ .collect();
+ Ok(content.into_pyobject(py)?.into_any().unbind())
+ }
+ "podcast" => {
+ if let Some(ref p) = self.inner.podcast {
+ Ok(Py::new(py, PyPodcastEntryMeta::from_core(p.as_ref().clone()))?.into_any())
+ } else {
+ Ok(py.None())
+ }
+ }
+ // Check for deprecated field name aliases
+ _ => {
+ if let Some(new_names) = ENTRY_FIELD_MAP.get(key) {
+ // Try each new field name in order
+ for new_name in new_names {
+ let value: Option> =
+ match *new_name {
+ "id" => self.inner.id.as_deref().and_then(|v| {
+ v.into_pyobject(py).map(|o| o.unbind().into()).ok()
+ }),
+ "summary" => self.inner.summary.as_deref().and_then(|v| {
+ v.into_pyobject(py).map(|o| o.unbind().into()).ok()
+ }),
+ "summary_detail" => {
+ self.inner.summary_detail.as_ref().and_then(|tc| {
+ Py::new(py, PyTextConstruct::from_core(tc.clone()))
+ .ok()
+ .map(|p: Py| p.into_any())
+ })
+ }
+ "published" => self.inner.published.and_then(|dt| {
+ dt.to_rfc3339()
+ .into_pyobject(py)
+ .map(|o| o.unbind().into())
+ .ok()
+ }),
+ "published_parsed" => {
+ optional_datetime_to_struct_time(py, &self.inner.published)
+ .ok()
+ .flatten()
+ }
+ "updated" => self.inner.updated.and_then(|dt| {
+ dt.to_rfc3339()
+ .into_pyobject(py)
+ .map(|o| o.unbind().into())
+ .ok()
+ }),
+ "updated_parsed" => {
+ optional_datetime_to_struct_time(py, &self.inner.updated)
+ .ok()
+ .flatten()
+ }
+ _ => None,
+ };
+
+ // If we found a value, return it
+ if let Some(v) = value {
+ return Ok(v);
+ }
+ }
+ }
+ // Field not found - raise KeyError
+ Err(PyKeyError::new_err(format!("'{}'", key)))
+ }
+ }
+ }
}
diff --git a/crates/feedparser-rs-py/src/types/feed_meta.rs b/crates/feedparser-rs-py/src/types/feed_meta.rs
index fff1850..946b084 100644
--- a/crates/feedparser-rs-py/src/types/feed_meta.rs
+++ b/crates/feedparser-rs-py/src/types/feed_meta.rs
@@ -1,7 +1,9 @@
use feedparser_rs::FeedMeta as CoreFeedMeta;
+use pyo3::exceptions::{PyAttributeError, PyKeyError};
use pyo3::prelude::*;
use super::common::{PyGenerator, PyImage, PyLink, PyPerson, PyTag, PyTextConstruct};
+use super::compat::FEED_FIELD_MAP;
use super::datetime::optional_datetime_to_struct_time;
use super::geo::PyGeoLocation;
use super::podcast::{PyItunesFeedMeta, PyPodcastMeta};
@@ -252,4 +254,420 @@ impl PyFeedMeta {
self.inner.link.as_deref().unwrap_or("no-link")
)
}
+
+ /// Provides backward compatibility for deprecated Python feedparser field names.
+ ///
+ /// Maps old field names to their modern equivalents:
+ /// - `description` → `subtitle` (or `summary` as fallback)
+ /// - `tagline` → `subtitle`
+ /// - `modified` → `updated`
+ /// - `copyright` → `rights`
+ /// - `date` → `updated` (or `published` as fallback)
+ /// - `url` → `link`
+ ///
+ /// This method is called by Python when normal attribute lookup fails.
+ fn __getattr__(&self, py: Python<'_>, name: &str) -> PyResult> {
+ // Check if this is a deprecated field name
+ if let Some(new_names) = FEED_FIELD_MAP.get(name) {
+ // Try each new field name in order
+ for new_name in new_names {
+ let value: Option> = match *new_name {
+ "subtitle" => self
+ .inner
+ .subtitle
+ .as_deref()
+ .and_then(|v| v.into_pyobject(py).map(|o| o.unbind().into()).ok()),
+ "subtitle_detail" => self.inner.subtitle_detail.as_ref().and_then(|tc| {
+ Py::new(py, PyTextConstruct::from_core(tc.clone()))
+ .ok()
+ .map(|p: Py| p.into_any())
+ }),
+ "summary" => self
+ .inner
+ .subtitle
+ .as_deref()
+ .and_then(|v| v.into_pyobject(py).map(|o| o.unbind().into()).ok()),
+ "summary_detail" => self.inner.subtitle_detail.as_ref().and_then(|tc| {
+ Py::new(py, PyTextConstruct::from_core(tc.clone()))
+ .ok()
+ .map(|p: Py| p.into_any())
+ }),
+ "rights" => self
+ .inner
+ .rights
+ .as_deref()
+ .and_then(|v| v.into_pyobject(py).map(|o| o.unbind().into()).ok()),
+ "rights_detail" => self.inner.rights_detail.as_ref().and_then(|tc| {
+ Py::new(py, PyTextConstruct::from_core(tc.clone()))
+ .ok()
+ .map(|p: Py| p.into_any())
+ }),
+ "updated" => self.inner.updated.and_then(|dt| {
+ dt.to_rfc3339()
+ .into_pyobject(py)
+ .map(|o| o.unbind().into())
+ .ok()
+ }),
+ "updated_parsed" => optional_datetime_to_struct_time(py, &self.inner.updated)
+ .ok()
+ .flatten(),
+ "published" => self.inner.published.and_then(|dt| {
+ dt.to_rfc3339()
+ .into_pyobject(py)
+ .map(|o| o.unbind().into())
+ .ok()
+ }),
+ "published_parsed" => {
+ optional_datetime_to_struct_time(py, &self.inner.published)
+ .ok()
+ .flatten()
+ }
+ "link" => self
+ .inner
+ .link
+ .as_deref()
+ .and_then(|v| v.into_pyobject(py).map(|o| o.unbind().into()).ok()),
+ _ => None,
+ };
+
+ // If we found a value, return it
+ if let Some(v) = value {
+ return Ok(v);
+ }
+ }
+ }
+
+ // Field not found - raise AttributeError
+ Err(PyAttributeError::new_err(format!(
+ "'FeedMeta' object has no attribute '{}'",
+ name
+ )))
+ }
+
+ /// Provides dict-style access to fields for Python feedparser compatibility.
+ ///
+ /// Supports both modern field names and deprecated aliases.
+ /// This method is called by Python when using dict-style access: `feed['title']`.
+ ///
+ /// Raises KeyError for unknown keys (unlike __getattr__ which raises AttributeError).
+ fn __getitem__(&self, py: Python<'_>, key: &str) -> PyResult> {
+ // Check for known fields first
+ match key {
+ "title" => Ok(self
+ .inner
+ .title
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "title_detail" => {
+ if let Some(ref tc) = self.inner.title_detail {
+ Ok(Py::new(py, PyTextConstruct::from_core(tc.clone()))?.into_any())
+ } else {
+ Ok(py.None())
+ }
+ }
+ "link" => Ok(self
+ .inner
+ .link
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "links" => {
+ let links: Vec<_> = self
+ .inner
+ .links
+ .iter()
+ .map(|l| PyLink::from_core(l.clone()))
+ .collect();
+ Ok(links.into_pyobject(py)?.into_any().unbind())
+ }
+ "subtitle" => Ok(self
+ .inner
+ .subtitle
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "subtitle_detail" => {
+ if let Some(ref tc) = self.inner.subtitle_detail {
+ Ok(Py::new(py, PyTextConstruct::from_core(tc.clone()))?.into_any())
+ } else {
+ Ok(py.None())
+ }
+ }
+ "updated" => Ok(self
+ .inner
+ .updated
+ .map(|dt| dt.to_rfc3339())
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "updated_parsed" => Ok(optional_datetime_to_struct_time(py, &self.inner.updated)?
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "published" => Ok(self
+ .inner
+ .published
+ .map(|dt| dt.to_rfc3339())
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "published_parsed" => Ok(optional_datetime_to_struct_time(py, &self.inner.published)?
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "author" => Ok(self
+ .inner
+ .author
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "author_detail" => {
+ if let Some(ref p) = self.inner.author_detail {
+ Ok(Py::new(py, PyPerson::from_core(p.clone()))?.into_any())
+ } else {
+ Ok(py.None())
+ }
+ }
+ "authors" => {
+ let authors: Vec<_> = self
+ .inner
+ .authors
+ .iter()
+ .map(|p| PyPerson::from_core(p.clone()))
+ .collect();
+ Ok(authors.into_pyobject(py)?.into_any().unbind())
+ }
+ "contributors" => {
+ let contributors: Vec<_> = self
+ .inner
+ .contributors
+ .iter()
+ .map(|p| PyPerson::from_core(p.clone()))
+ .collect();
+ Ok(contributors.into_pyobject(py)?.into_any().unbind())
+ }
+ "publisher" => Ok(self
+ .inner
+ .publisher
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "publisher_detail" => {
+ if let Some(ref p) = self.inner.publisher_detail {
+ Ok(Py::new(py, PyPerson::from_core(p.clone()))?.into_any())
+ } else {
+ Ok(py.None())
+ }
+ }
+ "language" => Ok(self
+ .inner
+ .language
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "rights" => Ok(self
+ .inner
+ .rights
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "rights_detail" => {
+ if let Some(ref tc) = self.inner.rights_detail {
+ Ok(Py::new(py, PyTextConstruct::from_core(tc.clone()))?.into_any())
+ } else {
+ Ok(py.None())
+ }
+ }
+ "generator" => Ok(self
+ .inner
+ .generator
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "generator_detail" => {
+ if let Some(ref g) = self.inner.generator_detail {
+ Ok(Py::new(py, PyGenerator::from_core(g.clone()))?.into_any())
+ } else {
+ Ok(py.None())
+ }
+ }
+ "image" => {
+ if let Some(ref i) = self.inner.image {
+ Ok(Py::new(py, PyImage::from_core(i.clone()))?.into_any())
+ } else {
+ Ok(py.None())
+ }
+ }
+ "icon" => Ok(self
+ .inner
+ .icon
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "logo" => Ok(self
+ .inner
+ .logo
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "tags" => {
+ let tags: Vec<_> = self
+ .inner
+ .tags
+ .iter()
+ .map(|t| PyTag::from_core(t.clone()))
+ .collect();
+ Ok(tags.into_pyobject(py)?.into_any().unbind())
+ }
+ "id" => Ok(self
+ .inner
+ .id
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "ttl" => Ok(self.inner.ttl.into_pyobject(py)?.into_any().unbind()),
+ "itunes" => {
+ if let Some(ref i) = self.inner.itunes {
+ Ok(Py::new(py, PyItunesFeedMeta::from_core(i.as_ref().clone()))?.into_any())
+ } else {
+ Ok(py.None())
+ }
+ }
+ "podcast" => {
+ if let Some(ref p) = self.inner.podcast {
+ Ok(Py::new(py, PyPodcastMeta::from_core(p.as_ref().clone()))?.into_any())
+ } else {
+ Ok(py.None())
+ }
+ }
+ "license" => Ok(self
+ .inner
+ .license
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "syndication" => {
+ if let Some(ref s) = self.inner.syndication {
+ Ok(Py::new(py, PySyndicationMeta::from_core(s.as_ref().clone()))?.into_any())
+ } else {
+ Ok(py.None())
+ }
+ }
+ "dc_creator" => Ok(self
+ .inner
+ .dc_creator
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "dc_publisher" => Ok(self
+ .inner
+ .dc_publisher
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "dc_rights" => Ok(self
+ .inner
+ .dc_rights
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "geo" => {
+ if let Some(ref g) = self.inner.geo {
+ Ok(Py::new(py, PyGeoLocation::from_core(g.as_ref().clone()))?.into_any())
+ } else {
+ Ok(py.None())
+ }
+ }
+ // Check for deprecated field name aliases
+ _ => {
+ if let Some(new_names) = FEED_FIELD_MAP.get(key) {
+ // Try each new field name in order
+ for new_name in new_names {
+ let value: Option> =
+ match *new_name {
+ "subtitle" => self.inner.subtitle.as_deref().and_then(|v| {
+ v.into_pyobject(py).map(|o| o.unbind().into()).ok()
+ }),
+ "subtitle_detail" => {
+ self.inner.subtitle_detail.as_ref().and_then(|tc| {
+ Py::new(py, PyTextConstruct::from_core(tc.clone()))
+ .ok()
+ .map(|p: Py| p.into_any())
+ })
+ }
+ "summary" => self.inner.subtitle.as_deref().and_then(|v| {
+ v.into_pyobject(py).map(|o| o.unbind().into()).ok()
+ }),
+ "summary_detail" => {
+ self.inner.subtitle_detail.as_ref().and_then(|tc| {
+ Py::new(py, PyTextConstruct::from_core(tc.clone()))
+ .ok()
+ .map(|p: Py| p.into_any())
+ })
+ }
+ "rights" => self.inner.rights.as_deref().and_then(|v| {
+ v.into_pyobject(py).map(|o| o.unbind().into()).ok()
+ }),
+ "rights_detail" => {
+ self.inner.rights_detail.as_ref().and_then(|tc| {
+ Py::new(py, PyTextConstruct::from_core(tc.clone()))
+ .ok()
+ .map(|p: Py| p.into_any())
+ })
+ }
+ "updated" => self.inner.updated.and_then(|dt| {
+ dt.to_rfc3339()
+ .into_pyobject(py)
+ .map(|o| o.unbind().into())
+ .ok()
+ }),
+ "updated_parsed" => {
+ optional_datetime_to_struct_time(py, &self.inner.updated)
+ .ok()
+ .flatten()
+ }
+ "published" => self.inner.published.and_then(|dt| {
+ dt.to_rfc3339()
+ .into_pyobject(py)
+ .map(|o| o.unbind().into())
+ .ok()
+ }),
+ "published_parsed" => {
+ optional_datetime_to_struct_time(py, &self.inner.published)
+ .ok()
+ .flatten()
+ }
+ "link" => self.inner.link.as_deref().and_then(|v| {
+ v.into_pyobject(py).map(|o| o.unbind().into()).ok()
+ }),
+ _ => None,
+ };
+
+ // If we found a value, return it
+ if let Some(v) = value {
+ return Ok(v);
+ }
+ }
+ }
+ // Field not found - raise KeyError
+ Err(PyKeyError::new_err(format!("'{}'", key)))
+ }
+ }
+ }
}
diff --git a/crates/feedparser-rs-py/src/types/mod.rs b/crates/feedparser-rs-py/src/types/mod.rs
index 1742b4a..4f6e0b3 100644
--- a/crates/feedparser-rs-py/src/types/mod.rs
+++ b/crates/feedparser-rs-py/src/types/mod.rs
@@ -1,4 +1,5 @@
pub mod common;
+pub mod compat;
pub mod datetime;
pub mod entry;
pub mod feed_meta;
diff --git a/crates/feedparser-rs-py/src/types/parsed_feed.rs b/crates/feedparser-rs-py/src/types/parsed_feed.rs
index ff2868e..090e041 100644
--- a/crates/feedparser-rs-py/src/types/parsed_feed.rs
+++ b/crates/feedparser-rs-py/src/types/parsed_feed.rs
@@ -1,7 +1,9 @@
use feedparser_rs::ParsedFeed as CoreParsedFeed;
+use pyo3::exceptions::{PyAttributeError, PyKeyError};
use pyo3::prelude::*;
use pyo3::types::PyDict;
+use super::compat::CONTAINER_FIELD_MAP;
use super::entry::PyEntry;
use super::feed_meta::PyFeedMeta;
@@ -141,4 +143,114 @@ impl PyParsedFeed {
fn __str__(&self) -> String {
self.__repr__()
}
+
+ /// Provides backward compatibility for deprecated Python feedparser container names.
+ ///
+ /// Maps old container names to their modern equivalents:
+ /// - `channel` → `feed` (RSS uses , Atom uses )
+ /// - `items` → `entries` (RSS uses
- , Atom uses )
+ ///
+ /// This method is called by Python when normal attribute lookup fails.
+ fn __getattr__(&self, py: Python<'_>, name: &str) -> PyResult> {
+ // Check if this is a deprecated container name
+ if let Some(new_name) = CONTAINER_FIELD_MAP.get(name) {
+ match *new_name {
+ "feed" => {
+ // Convert Py to Py
+ Ok(self.feed.clone_ref(py).into())
+ }
+ "entries" => {
+ // Convert Vec> to Py (as Python list)
+ let entries: Vec<_> = self.entries.iter().map(|e| e.clone_ref(py)).collect();
+ match entries.into_pyobject(py) {
+ Ok(list) => Ok(list.unbind()),
+ Err(e) => Err(e),
+ }
+ }
+ _ => Err(PyAttributeError::new_err(format!(
+ "'FeedParserDict' object has no attribute '{}'",
+ name
+ ))),
+ }
+ } else {
+ // Field not found - raise AttributeError
+ Err(PyAttributeError::new_err(format!(
+ "'FeedParserDict' object has no attribute '{}'",
+ name
+ )))
+ }
+ }
+
+ /// Provides dict-style access to fields for Python feedparser compatibility.
+ ///
+ /// Supports both modern field names and deprecated aliases:
+ /// - `d['feed']` → feed metadata
+ /// - `d['entries']` → list of entries
+ /// - `d['channel']` → feed (deprecated alias)
+ /// - `d['items']` → entries (deprecated alias)
+ /// - `d['version']`, `d['bozo']`, etc. → top-level fields
+ ///
+ /// This method is called by Python when using dict-style access: `d[key]`.
+ fn __getitem__(&self, py: Python<'_>, key: &str) -> PyResult> {
+ // Check for known fields first
+ match key {
+ "feed" => Ok(self.feed.clone_ref(py).into()),
+ "entries" => {
+ let entries: Vec<_> = self.entries.iter().map(|e| e.clone_ref(py)).collect();
+ Ok(entries.into_pyobject(py)?.into_any().unbind())
+ }
+ "bozo" => {
+ let pybozo = self.bozo.into_pyobject(py)?.to_owned();
+ Ok(pybozo.into_any().unbind())
+ }
+ "bozo_exception" => Ok(self
+ .bozo_exception
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "encoding" => Ok(self
+ .encoding
+ .as_str()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ "version" => Ok(self.version.as_str().into_pyobject(py)?.into_any().unbind()),
+ "namespaces" => Ok(self.namespaces.clone_ref(py).into()),
+ "status" => Ok(self.status.into_pyobject(py)?.into_any().unbind()),
+ "href" => Ok(self.href.as_deref().into_pyobject(py)?.into_any().unbind()),
+ "etag" => Ok(self.etag.as_deref().into_pyobject(py)?.into_any().unbind()),
+ "modified" => Ok(self
+ .modified
+ .as_deref()
+ .into_pyobject(py)?
+ .into_any()
+ .unbind()),
+ #[cfg(feature = "http")]
+ "headers" => {
+ if let Some(ref headers) = self.headers {
+ Ok(headers.clone_ref(py).into())
+ } else {
+ Ok(py.None().into_pyobject(py)?.into_any().unbind())
+ }
+ }
+ // Check for deprecated container name aliases
+ _ => {
+ if let Some(new_name) = CONTAINER_FIELD_MAP.get(key) {
+ match *new_name {
+ "feed" => Ok(self.feed.clone_ref(py).into()),
+ "entries" => {
+ let entries: Vec<_> =
+ self.entries.iter().map(|e| e.clone_ref(py)).collect();
+ Ok(entries.into_pyobject(py)?.into_any().unbind())
+ }
+ _ => Err(PyKeyError::new_err(format!("'{}'", key))),
+ }
+ } else {
+ // Field not found - raise KeyError
+ Err(PyKeyError::new_err(format!("'{}'", key)))
+ }
+ }
+ }
+ }
}
diff --git a/crates/feedparser-rs-py/tests/test_basic.py b/crates/feedparser-rs-py/tests/test_basic.py
index 908a31a..bb7ba11 100644
--- a/crates/feedparser-rs-py/tests/test_basic.py
+++ b/crates/feedparser-rs-py/tests/test_basic.py
@@ -137,7 +137,7 @@ def test_parse_with_limits():
max_entries=10,
)
- d = feedparser_rs.parse_with_limits(xml, limits)
+ d = feedparser_rs.parse_with_limits(xml, limits=limits)
assert d.version == "rss20"
@@ -150,7 +150,7 @@ def test_parse_with_limits_exceeded():
)
with pytest.raises(ValueError, match="exceeds maximum"):
- feedparser_rs.parse_with_limits(xml, limits)
+ feedparser_rs.parse_with_limits(xml, limits=limits)
def test_detect_format_rss20():
diff --git a/crates/feedparser-rs-py/tests/test_compat.py b/crates/feedparser-rs-py/tests/test_compat.py
new file mode 100644
index 0000000..7f4bf99
--- /dev/null
+++ b/crates/feedparser-rs-py/tests/test_compat.py
@@ -0,0 +1,739 @@
+"""
+Test Python feedparser backward compatibility field mappings.
+
+Tests that deprecated field names correctly map to their modern equivalents:
+- Feed-level: description, tagline, modified, copyright, date, url
+- Entry-level: guid, description, issued, modified, date
+- Container-level: channel, items
+"""
+
+import pytest
+import feedparser_rs
+
+
+def test_feed_description_alias():
+ """feed.description should map to feed.subtitle"""
+ xml = """
+
+ Test subtitle text
+
+ """
+
+ feed = feedparser_rs.parse(xml)
+
+ # Both should work and return the same value
+ assert feed.feed.subtitle == "Test subtitle text"
+ assert feed.feed.description == "Test subtitle text"
+ assert feed.feed.description == feed.feed.subtitle
+
+
+def test_feed_tagline_alias():
+ """feed.tagline should map to feed.subtitle (old Atom 0.3 field)"""
+ xml = """
+ My feed tagline
+ """
+
+ feed = feedparser_rs.parse(xml)
+
+ assert feed.feed.subtitle == "My feed tagline"
+ assert feed.feed.tagline == "My feed tagline"
+ assert feed.feed.tagline == feed.feed.subtitle
+
+
+def test_feed_modified_alias():
+ """feed.modified should map to feed.updated"""
+ xml = """
+ 2024-01-01T12:00:00Z
+ """
+
+ feed = feedparser_rs.parse(xml)
+
+ assert feed.feed.updated is not None
+ assert feed.feed.modified == feed.feed.updated
+ # Both _parsed versions should work
+ assert feed.feed.modified_parsed is not None
+ assert feed.feed.modified_parsed.tm_year == 2024
+
+
+def test_feed_copyright_alias():
+ """feed.copyright should map to feed.rights"""
+ xml = """
+ Copyright 2024 Example Corp
+ """
+
+ feed = feedparser_rs.parse(xml)
+
+ assert feed.feed.rights == "Copyright 2024 Example Corp"
+ assert feed.feed.copyright == "Copyright 2024 Example Corp"
+ assert feed.feed.copyright == feed.feed.rights
+
+
+def test_feed_date_alias_falls_back_to_updated():
+ """feed.date should map to feed.updated as primary fallback"""
+ xml = """
+ 2024-01-15T10:30:00Z
+ """
+
+ feed = feedparser_rs.parse(xml)
+
+ assert feed.feed.date == feed.feed.updated
+ assert feed.feed.date_parsed.tm_year == 2024
+ assert feed.feed.date_parsed.tm_mon == 1
+ assert feed.feed.date_parsed.tm_mday == 15
+
+
+def test_feed_date_alias_falls_back_to_published():
+ """feed.date should fall back to feed.published if updated is absent"""
+ xml = """
+
+ Mon, 01 Jan 2024 12:00:00 GMT
+
+ """
+
+ feed = feedparser_rs.parse(xml)
+
+ # updated is None, so date should map to published
+ assert feed.feed.updated is None
+ assert feed.feed.published is not None
+ assert feed.feed.date == feed.feed.published
+ assert feed.feed.date_parsed.tm_year == 2024
+
+
+def test_feed_url_alias():
+ """feed.url should map to feed.link"""
+ xml = """
+
+ https://example.com
+
+ """
+
+ feed = feedparser_rs.parse(xml)
+
+ assert feed.feed.link == "https://example.com"
+ assert feed.feed.url == "https://example.com"
+ assert feed.feed.url == feed.feed.link
+
+
+def test_entry_guid_alias():
+ """entry.guid should map to entry.id"""
+ xml = """
+
+
-
+ abc123xyz
+
+
+ """
+
+ feed = feedparser_rs.parse(xml)
+ entry = feed.entries[0]
+
+ assert entry.id == "abc123xyz"
+ assert entry.guid == "abc123xyz"
+ assert entry.guid == entry.id
+
+
+def test_entry_description_alias():
+ """entry.description should map to entry.summary"""
+ xml = """
+
+ -
+ Entry summary text
+
+
+ """
+
+ feed = feedparser_rs.parse(xml)
+ entry = feed.entries[0]
+
+ assert entry.summary == "Entry summary text"
+ assert entry.description == "Entry summary text"
+ assert entry.description == entry.summary
+
+
+def test_entry_issued_alias():
+ """entry.issued should map to entry.published"""
+ xml = """
+
+ -
+ Mon, 01 Jan 2024 12:00:00 GMT
+
+
+ """
+
+ feed = feedparser_rs.parse(xml)
+ entry = feed.entries[0]
+
+ assert entry.published is not None
+ assert entry.issued == entry.published
+ # Both _parsed versions should work
+ assert entry.issued_parsed is not None
+ assert entry.issued_parsed.tm_year == 2024
+
+
+def test_entry_modified_alias():
+ """entry.modified should map to entry.updated"""
+ xml = """
+
+ 2024-01-15T10:30:00Z
+
+ """
+
+ feed = feedparser_rs.parse(xml)
+ entry = feed.entries[0]
+
+ assert entry.updated is not None
+ assert entry.modified == entry.updated
+ assert entry.modified_parsed.tm_year == 2024
+
+
+def test_entry_date_alias_falls_back_to_updated():
+ """entry.date should map to entry.updated as primary fallback"""
+ xml = """
+
+ 2024-01-15T10:30:00Z
+
+ """
+
+ feed = feedparser_rs.parse(xml)
+ entry = feed.entries[0]
+
+ assert entry.date == entry.updated
+ assert entry.date_parsed.tm_year == 2024
+
+
+def test_entry_date_alias_falls_back_to_published():
+ """entry.date should fall back to entry.published if updated is absent"""
+ xml = """
+
+ -
+ Mon, 01 Jan 2024 12:00:00 GMT
+
+
+ """
+
+ feed = feedparser_rs.parse(xml)
+ entry = feed.entries[0]
+
+ # updated is None, so date should map to published
+ assert entry.updated is None
+ assert entry.published is not None
+ assert entry.date == entry.published
+ assert entry.date_parsed.tm_year == 2024
+
+
+def test_container_channel_alias():
+ """d.channel should map to d.feed (RSS uses )"""
+ xml = """
+
+ RSS Feed Title
+
+ """
+
+ d = feedparser_rs.parse(xml)
+
+ # Both should work and return the same object
+ assert d.feed.title == "RSS Feed Title"
+ assert d.channel.title == "RSS Feed Title"
+ # Verify they're the same object by checking id
+ assert d.channel.title == d.feed.title
+
+
+def test_container_items_alias():
+ """d.items should map to d.entries (RSS uses - )"""
+ xml = """
+
+
- Item 1
+ - Item 2
+
+ """
+
+ d = feedparser_rs.parse(xml)
+
+ # Both should work and return the same list
+ assert len(d.entries) == 2
+ assert len(d.items) == 2
+ assert d.items[0].title == "Item 1"
+ assert d.items[1].title == "Item 2"
+
+
+def test_unknown_field_raises_attribute_error():
+ """Accessing unknown field should raise AttributeError"""
+ xml = """
+
+ Test
+ -
+ Test Item
+
+
+ """
+
+ feed = feedparser_rs.parse(xml)
+
+ # Unknown fields should raise AttributeError
+ with pytest.raises(AttributeError, match="has no attribute"):
+ _ = feed.feed.nonexistent_field
+
+ with pytest.raises(AttributeError, match="has no attribute"):
+ _ = feed.entries[0].fake_attribute
+
+ with pytest.raises(AttributeError, match="has no attribute"):
+ _ = feed.this_does_not_exist
+
+
+def test_multiple_alias_access():
+ """Test accessing multiple aliases in same object"""
+ xml = """
+ My Feed
+ Feed description
+ 2024-01-01T12:00:00Z
+ Copyright 2024
+
+ entry-1
+ Entry Title
+ Entry summary
+ 2024-01-01T10:00:00Z
+ 2024-01-01T11:00:00Z
+
+ """
+
+ feed = feedparser_rs.parse(xml)
+
+ # Feed-level aliases
+ assert feed.feed.description == "Feed description"
+ assert feed.feed.tagline == "Feed description"
+ assert feed.feed.modified is not None
+ assert feed.feed.copyright == "Copyright 2024"
+
+ # Entry-level aliases
+ entry = feed.entries[0]
+ assert entry.guid == "entry-1"
+ assert entry.description == "Entry summary"
+ assert entry.issued is not None
+ assert entry.modified is not None
+
+
+def test_detail_field_aliases():
+ """Test that _detail field aliases work correctly"""
+ xml = """
+ <b>Bold subtitle</b>
+ Copyright 2024
+
+ Entry summary
+
+ """
+
+ feed = feedparser_rs.parse(xml)
+
+ # Feed-level _detail aliases
+ assert feed.feed.subtitle_detail is not None
+ assert feed.feed.description_detail is not None
+ assert feed.feed.description_detail.type == feed.feed.subtitle_detail.type
+
+ assert feed.feed.rights_detail is not None
+ assert feed.feed.copyright_detail is not None
+ assert feed.feed.copyright_detail.type == feed.feed.rights_detail.type
+
+ # Entry-level _detail aliases
+ entry = feed.entries[0]
+ assert entry.summary_detail is not None
+ assert entry.description_detail is not None
+ assert entry.description_detail.value == entry.summary_detail.value
+
+
+def test_existing_attribute_access_still_works():
+ """Ensure normal attribute access is not affected by __getattr__"""
+ xml = """
+
+ Test Feed
+ https://example.com
+ Feed description
+ -
+ Entry Title
+ https://example.com/entry
+ entry-1
+ Entry summary
+
+
+ """
+
+ feed = feedparser_rs.parse(xml)
+
+ # Direct attribute access should work normally
+ assert feed.feed.title == "Test Feed"
+ assert feed.feed.link == "https://example.com"
+ assert feed.feed.subtitle == "Feed description"
+
+ assert feed.entries[0].title == "Entry Title"
+ assert feed.entries[0].link == "https://example.com/entry"
+ assert feed.entries[0].id == "entry-1"
+ assert feed.entries[0].summary == "Entry summary"
+
+ # FeedParserDict level
+ assert feed.version is not None
+ assert feed.bozo is not None
+
+
+# Phase 2: Dict-style access tests (__getitem__)
+
+
+def test_dict_access_feed_fields():
+ """Test dict-style access for feed fields"""
+ xml = """
+
+ Test Feed
+ https://example.com
+ Feed description
+
+ """
+
+ feed = feedparser_rs.parse(xml)
+
+ # Dict-style access should work
+ assert feed['feed']['title'] == "Test Feed"
+ assert feed['feed']['link'] == "https://example.com"
+ assert feed['feed']['subtitle'] == "Feed description"
+
+ # Mixed access should work
+ assert feed['feed'].title == "Test Feed"
+ assert feed.feed['title'] == "Test Feed"
+
+
+def test_dict_access_entry_fields():
+ """Test dict-style access for entry fields"""
+ xml = """
+
+ -
+ Entry Title
+ https://example.com/entry
+ entry-1
+ Entry summary
+
+
+ """
+
+ feed = feedparser_rs.parse(xml)
+ entry = feed['entries'][0]
+
+ # Dict-style access should work
+ assert entry['title'] == "Entry Title"
+ assert entry['link'] == "https://example.com/entry"
+ assert entry['id'] == "entry-1"
+ assert entry['summary'] == "Entry summary"
+
+ # Mixed access should work
+ assert feed['entries'][0].title == "Entry Title"
+ assert feed.entries[0]['title'] == "Entry Title"
+
+
+def test_dict_access_with_deprecated_aliases():
+ """Test dict-style access with deprecated field names"""
+ xml = """
+ My Feed
+ Feed description
+ 2024-01-01T12:00:00Z
+ Copyright 2024
+
+ entry-1
+ Entry Title
+ Entry summary
+ 2024-01-01T10:00:00Z
+
+ """
+
+ feed = feedparser_rs.parse(xml)
+
+ # Feed-level deprecated aliases should work with dict access
+ assert feed['feed']['description'] == "Feed description"
+ assert feed['feed']['tagline'] == "Feed description"
+ assert feed['feed']['copyright'] == "Copyright 2024"
+ assert feed['feed']['modified'] is not None
+
+ # Entry-level deprecated aliases should work with dict access
+ entry = feed['entries'][0]
+ assert entry['guid'] == "entry-1"
+ assert entry['description'] == "Entry summary"
+ assert entry['issued'] is not None
+
+
+def test_dict_access_container_aliases():
+ """Test dict-style access with container name aliases"""
+ xml = """
+
+ RSS Feed
+ - Item 1
+ - Item 2
+
+ """
+
+ d = feedparser_rs.parse(xml)
+
+ # channel → feed alias should work with dict access
+ assert d['channel']['title'] == "RSS Feed"
+ assert d['feed']['title'] == "RSS Feed"
+
+ # items → entries alias should work with dict access
+ assert len(d['items']) == 2
+ assert len(d['entries']) == 2
+ assert d['items'][0]['title'] == "Item 1"
+ assert d['entries'][0]['title'] == "Item 1"
+
+
+def test_dict_access_top_level_fields():
+ """Test dict-style access for top-level FeedParserDict fields"""
+ xml = """
+
+ Test
+
+ """
+
+ feed = feedparser_rs.parse(xml)
+
+ # Top-level fields should be accessible via dict-style
+ assert feed['version'] == 'rss20'
+ assert feed['bozo'] is False
+ assert feed['encoding'] is not None
+
+
+def test_dict_access_unknown_key_raises_keyerror():
+ """Dict access with unknown key should raise KeyError (not AttributeError)"""
+ xml = """
+
+ Test
+ -
+ Test Item
+
+
+ """
+
+ feed = feedparser_rs.parse(xml)
+
+ # Unknown keys should raise KeyError for dict access
+ with pytest.raises(KeyError):
+ _ = feed['nonexistent_field']
+
+ with pytest.raises(KeyError):
+ _ = feed['feed']['fake_field']
+
+ with pytest.raises(KeyError):
+ _ = feed['entries'][0]['unknown_key']
+
+ # But AttributeError should still be raised for attribute access
+ with pytest.raises(AttributeError, match="has no attribute"):
+ _ = feed.feed.fake_field
+
+
+def test_dict_and_attribute_access_equivalence():
+ """Test that dict and attribute access return same values"""
+ xml = """
+ My Feed
+ Feed description
+
+ 2024-01-01T12:00:00Z
+
+ entry-1
+ Entry Title
+ Entry summary
+
+ 2024-01-01T10:00:00Z
+
+ """
+
+ feed = feedparser_rs.parse(xml)
+
+ # Feed-level fields should be identical via both access methods
+ assert feed.feed.title == feed['feed']['title']
+ assert feed.feed.subtitle == feed['feed']['subtitle']
+ assert feed.feed.link == feed['feed']['link']
+ assert feed.feed.updated == feed['feed']['updated']
+
+ # Entry-level fields should be identical via both access methods
+ entry = feed.entries[0]
+ assert entry.id == entry['id']
+ assert entry.title == entry['title']
+ assert entry.summary == entry['summary']
+ assert entry.link == entry['link']
+ assert entry.updated == entry['updated']
+
+ # Top-level fields should be identical
+ assert feed.version == feed['version']
+ assert feed.bozo == feed['bozo']
+
+
+def test_dict_access_with_none_values():
+ """Test dict access returns None for missing optional fields"""
+ xml = """
+
+ Minimal Feed
+
+ """
+
+ feed = feedparser_rs.parse(xml)
+
+ # Missing optional fields should return None via dict access
+ assert feed['feed']['subtitle'] is None
+ assert feed['feed']['updated'] is None
+ assert feed['feed']['author'] is None
+ assert feed['feed']['image'] is None
+
+
+def test_dict_access_detail_fields():
+ """Test dict access for _detail fields"""
+ xml = """
+ <b>Bold subtitle</b>
+ Copyright 2024
+
+ Entry summary
+
+ """
+
+ feed = feedparser_rs.parse(xml)
+
+ # _detail fields should work with dict access
+ assert feed['feed']['subtitle_detail'] is not None
+ assert feed['feed']['subtitle_detail'].type == 'html'
+
+ assert feed['feed']['rights_detail'] is not None
+ assert feed['feed']['copyright_detail'] is not None
+ assert feed['feed']['copyright_detail'].type == 'text'
+
+ entry = feed['entries'][0]
+ assert entry['summary_detail'] is not None
+ assert entry['description_detail'] is not None
+
+
+def test_dict_access_list_fields():
+ """Test dict access for list fields (links, tags, authors, etc.)"""
+ xml = """
+
+
+
+
+
+
+
+
+ """
+
+ feed = feedparser_rs.parse(xml)
+
+ # List fields should work with dict access
+ assert len(feed['feed']['links']) == 2
+ assert feed['feed']['links'][0].href == "https://example.com/feed"
+
+ assert len(feed['feed']['tags']) == 2
+ assert feed['feed']['tags'][0].term == "technology"
+
+ entry = feed['entries'][0]
+ assert len(entry['links']) >= 1
+ assert len(entry['tags']) == 1
+ assert entry['tags'][0].term == "rust"
+
+
+# =============================================================================
+# Phase 4: Auto-URL Detection Tests
+# =============================================================================
+
+
+def test_parse_with_optional_http_params():
+ """Test that parse() accepts optional HTTP parameters for URL fetching"""
+ # When parsing content (not URL), these params should be ignored
+ xml = """
+
+ Test Feed
+
+ """
+
+ # Should work with optional params (they're just ignored for content)
+ feed = feedparser_rs.parse(xml, etag="some-etag", modified="some-date")
+ assert feed.feed.title == "Test Feed"
+ assert feed.version == 'rss20'
+
+
+def test_parse_with_user_agent_param():
+ """Test that parse() accepts user_agent parameter"""
+ xml = """
+
+ Test Feed
+
+ """
+
+ # Should work with user_agent param (ignored for content)
+ feed = feedparser_rs.parse(xml, user_agent="TestBot/1.0")
+ assert feed.feed.title == "Test Feed"
+
+
+def test_parse_url_detection_http():
+ """Test that parse() detects http:// URLs"""
+ # This test verifies URL detection logic without actually fetching
+ # Since we don't have an HTTP feature enabled or a real server,
+ # we just verify the parse function signature accepts URL-like strings
+ try:
+ # This will either succeed (if http feature enabled and server exists)
+ # or raise NotImplementedError (if http feature disabled)
+ feedparser_rs.parse("http://example.com/nonexistent")
+ except NotImplementedError as e:
+ # http feature not enabled - this is expected
+ assert "http" in str(e).lower()
+ except Exception:
+ # Some other error (network, etc.) - also acceptable
+ pass
+
+
+def test_parse_url_detection_https():
+ """Test that parse() detects https:// URLs"""
+ try:
+ feedparser_rs.parse("https://example.com/nonexistent")
+ except NotImplementedError as e:
+ # http feature not enabled - this is expected
+ assert "http" in str(e).lower()
+ except Exception:
+ # Some other error (network, etc.) - also acceptable
+ pass
+
+
+def test_parse_content_starting_with_http_in_text():
+ """Test that content containing 'http' as text is not treated as URL"""
+ # This should be parsed as content, not as a URL
+ xml = """
+
+ HTTP Guide
+ Learn about http protocol
+
+ """
+
+ feed = feedparser_rs.parse(xml)
+ assert feed.feed.title == "HTTP Guide"
+ assert "http" in feed.feed.subtitle.lower()
+
+
+def test_parse_bytes_content():
+ """Test that bytes content is still parsed correctly"""
+ xml = b"""
+
+ Bytes Feed
+
+ """
+
+ feed = feedparser_rs.parse(xml)
+ assert feed.feed.title == "Bytes Feed"
+
+
+def test_parse_with_limits_accepts_http_params():
+ """Test that parse_with_limits() also accepts HTTP parameters"""
+ xml = """
+
+ Test Feed
+
+ """
+
+ limits = feedparser_rs.ParserLimits()
+
+ # Should work with all optional params
+ feed = feedparser_rs.parse_with_limits(
+ xml,
+ etag="etag",
+ modified="modified",
+ user_agent="TestBot/1.0",
+ limits=limits
+ )
+ assert feed.feed.title == "Test Feed"