diff --git a/.gitignore b/.gitignore index bdb74d7..eebd88c 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ venv/ node_modules/ *.node .local/ +dhat-heap.json diff --git a/Cargo.lock b/Cargo.lock index 3950660..cfa1984 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "addr2line" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" +dependencies = [ + "gimli", +] + [[package]] name = "adler2" version = "2.0.1" @@ -116,6 +125,21 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "backtrace" +version = "0.3.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-link", +] + [[package]] name = "base64" version = "0.22.1" @@ -167,6 +191,15 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" +[[package]] +name = "castaway" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a" +dependencies = [ + "rustversion", +] + [[package]] name = "cc" version = "1.2.51" @@ -261,6 +294,21 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "compact_str" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "serde", + "static_assertions", +] + [[package]] name = "compression-codecs" version = "0.4.35" @@ -408,6 +456,22 @@ version = "0.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52560adf09603e58c9a7ee1fe1dcb95a16927b17c127f0ac02d6e768a0e25bc1" +[[package]] +name = "dhat" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98cd11d84628e233de0ce467de10b8633f4ddaecafadefc86e13b84b8739b827" +dependencies = [ + "backtrace", + "lazy_static", + "mintex", + "parking_lot", + "rustc-hash 1.1.0", + "serde", + "serde_json", + "thousands", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -476,7 +540,9 @@ version = "0.3.0" dependencies = [ "ammonia", "chrono", + "compact_str", "criterion", + "dhat", "encoding_rs", "flate2", "html-escape", @@ -667,6 +733,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "gimli" +version = "0.32.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" + [[package]] name = "h2" version = "0.4.12" @@ -1024,6 +1096,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "libc" version = "0.2.178" @@ -1126,6 +1204,12 @@ dependencies = [ "simd-adler32", ] +[[package]] +name = "mintex" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c505b3e17ed6b70a7ed2e67fbb2c560ee327353556120d6e72f5232b6880d536" + [[package]] name = "mio" version = "1.1.1" @@ -1175,7 +1259,7 @@ dependencies = [ "napi-build", "napi-sys", "nohash-hasher", - "rustc-hash", + "rustc-hash 2.1.1", ] [[package]] @@ -1241,6 +1325,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "object" +version = "0.37.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" +dependencies = [ + "memchr", +] + [[package]] name = "once_cell" version = "1.21.3" @@ -1505,7 +1598,7 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash", + "rustc-hash 2.1.1", "rustls", "socket2", "thiserror", @@ -1525,7 +1618,7 @@ dependencies = [ "lru-slab", "rand 0.9.2", "ring", - "rustc-hash", + "rustc-hash 2.1.1", "rustls", "rustls-pki-types", "slab", @@ -1720,6 +1813,18 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rustc-demangle" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustc-hash" version = "2.1.1" @@ -1901,6 +2006,12 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "string_cache" version = "0.8.9" @@ -2000,6 +2111,12 @@ dependencies = [ "syn", ] +[[package]] +name = "thousands" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bf63baf9f5039dadc247375c29eb13706706cfde997d0330d05aa63a77d8820" + [[package]] name = "tinystr" version = "0.8.2" diff --git a/Cargo.toml b/Cargo.toml index 2e5b358..9006134 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ categories = ["parsing", "web-programming"] ammonia = "4.1" anyhow = "1.0" chrono = { version = "0.4", default-features = false } +compact_str = { version = "0.9", features = ["serde"] } criterion = "0.8" encoding_rs = "0.8" flate2 = "1.1" diff --git a/crates/feedparser-rs-core/Cargo.toml b/crates/feedparser-rs-core/Cargo.toml index 93d482d..da5a961 100644 --- a/crates/feedparser-rs-core/Cargo.toml +++ b/crates/feedparser-rs-core/Cargo.toml @@ -13,6 +13,7 @@ repository.workspace = true [dependencies] ammonia.workspace = true chrono = { workspace = true, features = ["std", "clock"] } +compact_str.workspace = true encoding_rs.workspace = true html-escape.workspace = true memchr.workspace = true @@ -36,6 +37,7 @@ http = ["dep:reqwest"] [dev-dependencies] criterion = { workspace = true, features = ["html_reports"] } +dhat = "0.3.3" mockito.workspace = true flate2.workspace = true @@ -43,5 +45,9 @@ flate2.workspace = true name = "parsing" harness = false +[[bench]] +name = "types" +harness = false + [lints] workspace = true diff --git a/crates/feedparser-rs-core/benches/types.rs b/crates/feedparser-rs-core/benches/types.rs new file mode 100644 index 0000000..24893c9 --- /dev/null +++ b/crates/feedparser-rs-core/benches/types.rs @@ -0,0 +1,187 @@ +//! Benchmarks for newtype wrappers: Arc vs String performance +//! +//! Tests the claim that Arc cloning is faster than String cloning +//! for `MimeType`, `Url`, and `Email` types. +//! +//! Key questions: +//! 1. Is `Arc::clone()` actually faster for typical MIME types? +//! 2. What's the creation overhead of Arc vs String? +//! 3. What's the break-even point (clones needed to justify Arc)? + +use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; +use feedparser_rs::{Email, MimeType, Url}; +use std::hint::black_box; +use std::sync::Arc; + +/// Benchmark `MimeType` (`Arc`) cloning vs String cloning +fn bench_mimetype_clone(c: &mut Criterion) { + let mut group = c.benchmark_group("MimeType_clone"); + + // Test common MIME types found in RSS/Atom feeds + let mime_types = [ + "text/html", // Short (9 bytes) + "text/plain", // Short (10 bytes) + "application/xml", // Medium (15 bytes) + "application/rss+xml", // Medium (20 bytes) + "application/atom+xml", // Medium (21 bytes) + "application/json", // Medium (16 bytes) + "audio/mpeg", // Short (10 bytes) + "video/mp4", // Short (9 bytes) + "image/jpeg", // Short (10 bytes) + "application/pdf", // Medium (15 bytes) + ]; + + for mime_str in &mime_types { + let mime = MimeType::new(*mime_str); + + group.bench_with_input(BenchmarkId::new("Arc_str", mime_str), &mime, |b, m| { + b.iter(|| black_box(m.clone())); + }); + + // Compare to direct String clone + let string = mime_str.to_string(); + group.bench_with_input(BenchmarkId::new("String", mime_str), &string, |b, s| { + b.iter(|| black_box(s.clone())); + }); + + // Compare to Arc (alternative design) + let arc_string = Arc::new(mime_str.to_string()); + group.bench_with_input( + BenchmarkId::new("Arc_String", mime_str), + &arc_string, + |b, a| b.iter(|| black_box(Arc::clone(a))), + ); + } + + group.finish(); +} + +/// Benchmark `MimeType` creation overhead: `Arc::from(str)` vs `String::from(str)` +fn bench_mimetype_creation(c: &mut Criterion) { + let mut group = c.benchmark_group("MimeType_creation"); + + let mime_types = ["text/html", "application/rss+xml", "application/atom+xml"]; + + for mime_str in &mime_types { + group.bench_with_input( + BenchmarkId::new("Arc_from_str", mime_str), + mime_str, + |b, s| b.iter(|| MimeType::new(black_box(*s))), + ); + + group.bench_with_input( + BenchmarkId::new("String_from_str", mime_str), + mime_str, + |b, s| b.iter(|| black_box(*s).to_string()), + ); + + group.bench_with_input( + BenchmarkId::new("Arc_str_from_str", mime_str), + mime_str, + |b, s| b.iter(|| Arc::::from(black_box(*s))), + ); + } + + group.finish(); +} + +/// Benchmark `Url` (String) cloning +fn bench_url_clone(c: &mut Criterion) { + let mut group = c.benchmark_group("Url_clone"); + + let urls = [ + "https://example.com", // Short URL (19 bytes) + "https://example.com/feed.xml", // Medium URL (27 bytes) + "https://example.com/feed.xml?param=value", // Long URL (43 bytes) + "https://example.com/blog/2024/12/article-title", // Long URL (47 bytes) + ]; + + for url_str in &urls { + let url = Url::new(*url_str); + + group.bench_with_input(BenchmarkId::new("String", url_str), &url, |b, u| { + b.iter(|| black_box(u.clone())); + }); + + // Compare to Arc (if we switched) + let arc_url = Arc::::from(*url_str); + group.bench_with_input(BenchmarkId::new("Arc_str", url_str), &arc_url, |b, a| { + b.iter(|| black_box(Arc::clone(a))); + }); + } + + group.finish(); +} + +/// Benchmark `Email` (String) cloning +fn bench_email_clone(c: &mut Criterion) { + let mut group = c.benchmark_group("Email_clone"); + + let emails = [ + "user@example.com", // Short email (17 bytes) + "john.doe@example.com", // Medium email (21 bytes) + "very.long.email@subdomain.example.com", // Long email (38 bytes) + ]; + + for email_str in &emails { + let email = Email::new(*email_str); + + group.bench_with_input(BenchmarkId::new("String", email_str), &email, |b, e| { + b.iter(|| black_box(e.clone())); + }); + + // Compare to Arc (if we switched) + let arc_email = Arc::::from(*email_str); + group.bench_with_input( + BenchmarkId::new("Arc_str", email_str), + &arc_email, + |b, a| b.iter(|| black_box(Arc::clone(a))), + ); + } + + group.finish(); +} + +/// Benchmark break-even analysis: creation + N clones +/// +/// Tests: At how many clones does Arc become faster than String? +fn bench_breakeven_analysis(c: &mut Criterion) { + let mut group = c.benchmark_group("breakeven_analysis"); + + let mime_str = "application/rss+xml"; + let clone_counts = [1, 2, 5, 10, 20, 50, 100]; + + for &n_clones in &clone_counts { + // Arc approach: 1 creation + N clones + group.bench_with_input(BenchmarkId::new("Arc_str", n_clones), &n_clones, |b, &n| { + b.iter(|| { + let mime = MimeType::new(black_box(mime_str)); + for _ in 0..n { + let _ = black_box(mime.clone()); + } + }); + }); + + // String approach: 1 creation + N clones + group.bench_with_input(BenchmarkId::new("String", n_clones), &n_clones, |b, &n| { + b.iter(|| { + let s = black_box(mime_str).to_string(); + for _ in 0..n { + let _ = black_box(s.clone()); + } + }); + }); + } + + group.finish(); +} + +criterion_group!( + benches, + bench_mimetype_clone, + bench_mimetype_creation, + bench_url_clone, + bench_email_clone, + bench_breakeven_analysis, +); +criterion_main!(benches); diff --git a/crates/feedparser-rs-core/examples/error_handling.rs b/crates/feedparser-rs-core/examples/error_handling.rs new file mode 100644 index 0000000..36bbd89 --- /dev/null +++ b/crates/feedparser-rs-core/examples/error_handling.rs @@ -0,0 +1,263 @@ +//! Example: Error handling and the bozo pattern +//! +//! Demonstrates: +//! - The bozo flag for malformed feeds +//! - Graceful error recovery +//! - Extracting data from broken feeds +//! - Different types of parsing errors +//! - Resource limits protection +//! +//! The "bozo" pattern (from Python feedparser) means: +//! - Never panic on malformed input +//! - Set bozo=true flag when issues occur +//! - Continue parsing and extract whatever data is available +//! +//! Run with: +//! ```bash +//! cargo run --example error_handling +//! ``` + +use feedparser_rs::{ParserLimits, parse, parse_with_limits}; +use std::fs; + +fn main() -> Result<(), Box> { + println!("=== Feed Parser Example: Error Handling ===\n"); + + // Example 1: Parse malformed feed (bozo pattern) + malformed_feed_example()?; + + println!("\n{}\n", "=".repeat(60)); + + // Example 2: Resource limits + resource_limits_example(); + + println!("\n{}\n", "=".repeat(60)); + + // Example 3: Invalid XML recovery + invalid_xml_example(); + + println!("\n{}\n", "=".repeat(60)); + + // Example 4: Network error handling + network_error_example(); + + Ok(()) +} + +fn malformed_feed_example() -> Result<(), Box> { + println!("Example 1: Malformed Feed (Bozo Pattern)"); + println!("{}", "-".repeat(40)); + + let feed_path = "examples/feeds/malformed_feed.xml"; + let feed_data = fs::read(feed_path)?; + + println!("Parsing feed with known issues...\n"); + + // The parser will NOT panic, even with malformed XML + let feed = parse(&feed_data)?; + + // Check the bozo flag + println!("Bozo flag: {}", feed.bozo); + + if feed.bozo { + println!("Feed has parsing issues!"); + + if let Some(exception) = &feed.bozo_exception { + println!("Exception details: {exception}"); + } + + println!("\nDespite errors, we can still extract data:"); + } + + // Even with errors, we can extract available data + if let Some(title) = &feed.feed.title { + println!(" Feed title: {title}"); + } + + if let Some(link) = &feed.feed.link { + println!(" Feed link: {link}"); + } + + println!("\nEntries found: {}", feed.entries.len()); + for (i, entry) in feed.entries.iter().enumerate() { + println!("\n Entry {}:", i + 1); + if let Some(title) = &entry.title { + println!(" Title: {title}"); + } + if let Some(link) = &entry.link { + println!(" Link: {link}"); + } + if let Some(summary) = &entry.summary { + println!(" Summary: {summary}"); + } + + // Some entries may have unparseable dates + if let Some(published) = &entry.published { + println!(" Published: {published}"); + } else { + println!(" Published: (unable to parse date)"); + } + } + + println!("\nKey takeaway: The parser extracts as much data as possible,"); + println!("even when the feed has errors. Always check the bozo flag!"); + + Ok(()) +} + +fn resource_limits_example() { + println!("Example 2: Resource Limits Protection"); + println!("{}", "-".repeat(40)); + + // Create a feed that exceeds limits + let huge_feed = format!( + r#" + + + {} + https://example.com + + "#, + "A".repeat(200_000) + ); + + println!("Testing with strict limits:"); + let strict_limits = ParserLimits::strict(); + println!(" Max text length: {}", strict_limits.max_text_length); + println!(" Max entries: {}", strict_limits.max_entries); + + match parse_with_limits(huge_feed.as_bytes(), strict_limits) { + Ok(feed) => { + println!("\nParsed with limits:"); + if let Some(title) = &feed.feed.title { + println!(" Title length: {} chars (may be truncated)", title.len()); + } + } + Err(e) => { + println!("\nLimits exceeded: {e}"); + println!("This protects against DoS attacks and resource exhaustion."); + } + } + + // Now try with default (more permissive) limits + println!("\n\nTesting with default limits:"); + let default_limits = ParserLimits::default(); + println!(" Max text length: {}", default_limits.max_text_length); + + match parse_with_limits(huge_feed.as_bytes(), default_limits) { + Ok(feed) => { + println!("\nParsed successfully:"); + if let Some(title) = &feed.feed.title { + println!(" Title length: {} chars", title.len()); + } + } + Err(e) => { + println!("Error: {e}"); + } + } + + println!("\nUse strict limits for untrusted input!"); + println!("Use default limits for known/trusted feeds."); +} + +fn invalid_xml_example() { + println!("Example 3: Invalid XML Recovery"); + println!("{}", "-".repeat(40)); + + // Various types of invalid XML + let test_cases = vec![ + ( + "Unclosed tag", + b"Test</channel></rss>".as_slice(), + ), + ( + "Invalid entity", + b"<rss version='2.0'><channel><title>Test ￿".as_slice(), + ), + ( + "Missing required elements", + b"".as_slice(), + ), + ]; + + for (name, xml) in test_cases { + println!("\nTest case: {name}"); + print!(" "); + + match parse(xml) { + Ok(feed) => { + if feed.bozo { + println!("Parsed with bozo flag set"); + if let Some(ex) = &feed.bozo_exception { + println!(" Exception: {ex}"); + } + } else { + println!("Parsed successfully"); + } + + // Show what we recovered + if feed.feed.title.is_some() { + println!(" Recovered title: {:?}", feed.feed.title); + } + } + Err(e) => { + // Some errors are unrecoverable + println!("Unrecoverable error: {e}"); + } + } + } + + println!("\n\nThe parser attempts to recover from common XML errors"); + println!("and extract as much information as possible."); +} + +fn network_error_example() { + println!("Example 4: Network Error Handling"); + println!("{}", "-".repeat(40)); + + #[cfg(feature = "http")] + { + use feedparser_rs::parse_url; + + println!("Testing various network scenarios:\n"); + + // Test case 1: Invalid URL + println!("1. Invalid URL:"); + match parse_url("not-a-valid-url", None, None, None) { + Ok(_) => println!(" Unexpected success"), + Err(e) => println!(" Error (expected): {e}"), + } + + // Test case 2: Non-existent domain + println!("\n2. Non-existent domain:"); + match parse_url( + "https://this-domain-definitely-does-not-exist-12345.com/feed.xml", + None, + None, + None, + ) { + Ok(_) => println!(" Unexpected success"), + Err(e) => println!(" Error (expected): {e}"), + } + + // Test case 3: 404 Not Found + println!("\n3. HTTP 404:"); + match parse_url("https://httpbin.org/status/404", None, None, None) { + Ok(_) => println!(" Unexpected success"), + Err(e) => println!(" Error (expected): {e}"), + } + + println!("\n\nProper error handling:"); + println!("- Use Result type for all fallible operations"); + println!("- Match on specific error types for better UX"); + println!("- Provide helpful error messages to users"); + println!("- Implement retry logic for transient failures"); + println!("- Use timeouts to prevent hanging"); + } + + #[cfg(not(feature = "http"))] + { + println!("HTTP feature not enabled."); + println!("Enable with: cargo run --example error_handling --features http"); + } +} diff --git a/crates/feedparser-rs-core/examples/feeds/malformed_feed.xml b/crates/feedparser-rs-core/examples/feeds/malformed_feed.xml new file mode 100644 index 0000000..6f002cc --- /dev/null +++ b/crates/feedparser-rs-core/examples/feeds/malformed_feed.xml @@ -0,0 +1,28 @@ + + + + Test Feed with Issues + https://example.com/bad-feed + This feed has various malformations to test error handling + + + Post with unclosed tag + https://example.com/post1 + Missing closing link tag + Invalid date format here + + + + Post with invalid entity: ￿ + https://example.com/post2 + Contains invalid XML character + + + + Normal Post + https://example.com/post3 + This one is fine + Sat, 28 Dec 2024 12:00:00 GMT + + + diff --git a/crates/feedparser-rs-core/examples/feeds/sample_atom.xml b/crates/feedparser-rs-core/examples/feeds/sample_atom.xml new file mode 100644 index 0000000..7448419 --- /dev/null +++ b/crates/feedparser-rs-core/examples/feeds/sample_atom.xml @@ -0,0 +1,51 @@ + + + Example Science Feed + + + 2024-12-28T12:00:00Z + https://example.com/science + + Dr. Alice Cooper + alice@example.com + https://example.com/authors/alice + + Latest discoveries in science and technology + + + Quantum Computing Breakthrough + + https://example.com/science/quantum-2024 + 2024-12-28T10:00:00Z + 2024-12-28T10:00:00Z + + Dr. Alice Cooper + alice@example.com + + New advances in quantum error correction + + <p>Researchers have made significant progress in quantum error correction, + bringing us closer to practical quantum computers.</p> + + + + + + + Climate Change Impact Study + + https://example.com/science/climate-study + 2024-12-27T15:30:00Z + 2024-12-27T15:30:00Z + + Dr. Bob Zhang + bob@example.com + + Long-term effects on coastal ecosystems + + New research shows accelerating changes in coastal ecosystems due to rising temperatures. + + + + + diff --git a/crates/feedparser-rs-core/examples/feeds/sample_podcast.xml b/crates/feedparser-rs-core/examples/feeds/sample_podcast.xml new file mode 100644 index 0000000..5a42528 --- /dev/null +++ b/crates/feedparser-rs-core/examples/feeds/sample_podcast.xml @@ -0,0 +1,67 @@ + + + + Tech Talk Podcast + https://example.com/podcast + Weekly discussions about technology and programming + en + + + Alex Johnson + Weekly discussions about technology and programming + + Alex Johnson + alex@example.com + + no + + + + + + Support the show + Alex Johnson + + + Episode 42: The Future of Rust + https://example.com/podcast/ep42 + https://example.com/podcast/ep42 + Discussing the upcoming features in Rust 2024 edition + Sat, 28 Dec 2024 08:00:00 GMT + + + Alex Johnson + Rust 2024 edition preview + Discussing the upcoming features in Rust 2024 edition + 3600 + no + 42 + full + + + + Best quote from the episode + Jane Developer + + + + Episode 41: Web Assembly Deep Dive + https://example.com/podcast/ep41 + https://example.com/podcast/ep41 + Everything you need to know about WebAssembly + Sat, 21 Dec 2024 08:00:00 GMT + + + Alex Johnson + Understanding WebAssembly + Everything you need to know about WebAssembly + 2700 + no + 41 + full + + + diff --git a/crates/feedparser-rs-core/examples/feeds/sample_rss.xml b/crates/feedparser-rs-core/examples/feeds/sample_rss.xml new file mode 100644 index 0000000..ea9b333 --- /dev/null +++ b/crates/feedparser-rs-core/examples/feeds/sample_rss.xml @@ -0,0 +1,44 @@ + + + + Example Tech Blog + https://example.com/blog + Latest articles about technology and programming + en-us + Sat, 28 Dec 2024 12:00:00 GMT + Sat, 28 Dec 2024 12:00:00 GMT + + + + Getting Started with Rust + https://example.com/blog/rust-intro + https://example.com/blog/rust-intro + Learn the basics of Rust programming language + Sat, 28 Dec 2024 10:00:00 GMT + john@example.com (John Doe) + Programming + Rust + + + + Web Performance Optimization + https://example.com/blog/web-perf + https://example.com/blog/web-perf + Tips for making your website faster + Fri, 27 Dec 2024 14:30:00 GMT + jane@example.com (Jane Smith) + Web Development + + + + + Database Design Patterns + https://example.com/blog/db-patterns + db-patterns-2024 + Common patterns for designing scalable databases + Thu, 26 Dec 2024 09:15:00 GMT + bob@example.com (Bob Wilson) + Databases + + + diff --git a/crates/feedparser-rs-core/examples/parse_file.rs b/crates/feedparser-rs-core/examples/parse_file.rs new file mode 100644 index 0000000..4a850df --- /dev/null +++ b/crates/feedparser-rs-core/examples/parse_file.rs @@ -0,0 +1,212 @@ +//! Example: Parse feed from local file +//! +//! Demonstrates: +//! - Reading feed from filesystem +//! - Basic feed metadata access +//! - Using type-safe Url, Email wrappers +//! - Iterating over entries +//! +//! Run with: +//! ```bash +//! cargo run --example parse_file +//! ``` + +use feedparser_rs::parse; +use std::fs; + +fn main() -> Result<(), Box> { + println!("=== Feed Parser Example: Local File ===\n"); + + // Example 1: Parse RSS 2.0 feed + parse_rss_example()?; + + println!("\n{}\n", "=".repeat(60)); + + // Example 2: Parse Atom feed + parse_atom_example()?; + + Ok(()) +} + +fn parse_rss_example() -> Result<(), Box> { + println!("Example 1: RSS 2.0 Feed"); + println!("{}", "-".repeat(40)); + + // Read feed from file + let feed_path = "examples/feeds/sample_rss.xml"; + let feed_data = fs::read(feed_path)?; + + // Parse the feed + let feed = parse(&feed_data)?; + + // Check for parsing issues (bozo pattern) + if feed.bozo { + println!("Warning: Feed had parsing issues"); + if let Some(exception) = &feed.bozo_exception { + println!("Exception: {exception}"); + } + } + + // Display feed metadata + println!("\nFeed Metadata:"); + println!(" Version: {}", feed.version); + println!(" Encoding: {}", feed.encoding); + + if let Some(title) = &feed.feed.title { + println!(" Title: {title}"); + } + + // Demonstrate type-safe Url access + if let Some(link) = &feed.feed.link { + println!(" Link: {}", link.as_str()); + // Url derefs to str, so string methods work directly + if link.starts_with("https://") { + println!(" (secure URL)"); + } + } + + if let Some(subtitle) = &feed.feed.subtitle { + println!(" Subtitle: {subtitle}"); + } + + if let Some(language) = &feed.feed.language { + println!(" Language: {language}"); + } + + // Display entries + println!("\nEntries ({} total):", feed.entries.len()); + for (i, entry) in feed.entries.iter().enumerate().take(3) { + println!("\n Entry {}:", i + 1); + + if let Some(title) = &entry.title { + println!(" Title: {title}"); + } + + if let Some(link) = &entry.link { + println!(" Link: {link}"); + } + + // Demonstrate Email type access + if let Some(author) = &entry.author { + println!(" Author: {author}"); + } + + if let Some(published) = &entry.published { + println!(" Published: {published}"); + } + + // Show categories/tags + if !entry.tags.is_empty() { + let categories: Vec<&str> = entry.tags.iter().map(|t| t.term.as_str()).collect(); + println!(" Categories: {}", categories.join(", ")); + } + + // Show enclosures (media attachments) + if !entry.enclosures.is_empty() { + println!(" Enclosures:"); + for enc in &entry.enclosures { + println!(" - {}", enc.url); + if let Some(enclosure_type) = &enc.enclosure_type { + println!(" Type: {enclosure_type}"); + } + if let Some(length) = enc.length { + println!(" Size: {length} bytes"); + } + } + } + } + + Ok(()) +} + +fn parse_atom_example() -> Result<(), Box> { + println!("Example 2: Atom Feed"); + println!("{}", "-".repeat(40)); + + let feed_path = "examples/feeds/sample_atom.xml"; + let feed_data = fs::read(feed_path)?; + + let feed = parse(&feed_data)?; + + println!("\nFeed Metadata:"); + println!(" Version: {}", feed.version); + + if let Some(title) = &feed.feed.title { + println!(" Title: {title}"); + } + + if let Some(subtitle) = &feed.feed.subtitle { + println!(" Subtitle: {subtitle}"); + } + + // Atom feeds often have multiple authors + if !feed.feed.authors.is_empty() { + println!("\n Authors:"); + for author in &feed.feed.authors { + if let Some(name) = &author.name { + print!(" - {name}"); + } + if let Some(email) = &author.email { + print!(" <{email}>"); + } + if let Some(uri) = &author.uri { + print!(" ({uri})"); + } + println!(); + } + } + + // Atom supports multiple links with different rel values + if !feed.feed.links.is_empty() { + println!("\n Links:"); + for link in &feed.feed.links { + print!(" - {}", link.href); + if let Some(rel) = &link.rel { + print!(" [rel={rel}]"); + } + if let Some(link_type) = &link.link_type { + print!(" ({link_type})"); + } + println!(); + } + } + + println!("\nEntries ({} total):", feed.entries.len()); + for (i, entry) in feed.entries.iter().enumerate() { + println!("\n Entry {}:", i + 1); + + if let Some(title) = &entry.title { + println!(" Title: {title}"); + } + + if let Some(id) = &entry.id { + println!(" ID: {id}"); + } + + if let Some(summary) = &entry.summary { + println!(" Summary: {summary}"); + } + + // Atom content can have different types + if !entry.content.is_empty() { + let content = &entry.content[0]; + if let Some(content_type) = &content.content_type { + println!(" Content type: {content_type}"); + } + let value = &content.value; + let preview = if value.len() > 100 { + format!("{}...", &value[..100]) + } else { + value.clone() + }; + println!(" Content: {preview}"); + } + + if !entry.tags.is_empty() { + let categories: Vec<&str> = entry.tags.iter().map(|t| t.term.as_str()).collect(); + println!(" Categories: {}", categories.join(", ")); + } + } + + Ok(()) +} diff --git a/crates/feedparser-rs-core/examples/parse_url.rs b/crates/feedparser-rs-core/examples/parse_url.rs new file mode 100644 index 0000000..a327c30 --- /dev/null +++ b/crates/feedparser-rs-core/examples/parse_url.rs @@ -0,0 +1,184 @@ +//! Example: Parse feed from URL with HTTP fetching +//! +//! Demonstrates: +//! - Fetching feeds from HTTP/HTTPS URLs +//! - Conditional GET with ETag/Last-Modified caching +//! - HTTP metadata handling +//! - Error handling for network issues +//! +//! Requires the 'http' feature (enabled by default) +//! +//! Run with: +//! ```bash +//! cargo run --example parse_url +//! ``` + +use feedparser_rs::parse_url; + +fn main() -> Result<(), Box> { + println!("=== Feed Parser Example: HTTP Fetching ===\n"); + + // Example 1: Simple URL fetch + simple_fetch_example()?; + + println!("\n{}\n", "=".repeat(60)); + + // Example 2: Conditional GET with caching + conditional_get_example()?; + + Ok(()) +} + +fn simple_fetch_example() -> Result<(), Box> { + println!("Example 1: Simple URL Fetch"); + println!("{}", "-".repeat(40)); + + // Use a real public RSS feed + // BBC News RSS feed is reliable and publicly available + let url = "https://feeds.bbci.co.uk/news/rss.xml"; + + println!("Fetching feed from: {url}"); + println!("Please wait...\n"); + + match parse_url(url, None, None, Some("feedparser-rs-example/1.0")) { + Ok(feed) => { + println!("Success!\n"); + + // HTTP metadata + if let Some(status) = feed.status { + println!("HTTP Status: {status}"); + } + + if let Some(href) = &feed.href { + println!("Final URL: {href}"); + } + + if let Some(etag) = &feed.etag { + println!("ETag: {etag}"); + } + + if let Some(modified) = &feed.modified { + println!("Last-Modified: {modified}"); + } + + println!("\nFeed Metadata:"); + println!(" Version: {}", feed.version); + println!(" Encoding: {}", feed.encoding); + + if let Some(title) = &feed.feed.title { + println!(" Title: {title}"); + } + + if let Some(link) = &feed.feed.link { + println!(" Link: {link}"); + } + + if let Some(subtitle) = &feed.feed.subtitle { + let preview = if subtitle.len() > 100 { + format!("{}...", &subtitle[..100]) + } else { + subtitle.clone() + }; + println!(" Subtitle: {preview}"); + } + + println!("\nLatest Entries (first 5):"); + for (i, entry) in feed.entries.iter().enumerate().take(5) { + println!( + "\n {}. {}", + i + 1, + entry.title.as_deref().unwrap_or("[No title]") + ); + + if let Some(link) = &entry.link { + println!(" {link}"); + } + + if let Some(published) = &entry.published { + println!(" Published: {published}"); + } + } + + println!("\nTotal entries: {}", feed.entries.len()); + } + Err(e) => { + eprintln!("Error fetching feed: {e}"); + eprintln!("\nNote: This example requires internet connectivity."); + eprintln!("If you're offline, try the parse_file example instead."); + return Err(e.into()); + } + } + + Ok(()) +} + +#[allow(clippy::unnecessary_wraps)] +fn conditional_get_example() -> Result<(), Box> { + println!("Example 2: Conditional GET with Caching"); + println!("{}", "-".repeat(40)); + + // Use NPR News RSS feed (another reliable public feed) + let url = "https://feeds.npr.org/1001/rss.xml"; + + println!("First fetch (no cache):"); + println!("Fetching from: {url}"); + + let first_fetch = match parse_url(url, None, None, Some("feedparser-rs-example/1.0")) { + Ok(feed) => feed, + Err(e) => { + eprintln!("Error: {e}"); + eprintln!("Skipping conditional GET example (requires internet)"); + return Ok(()); + } + }; + + println!("Success!"); + if let Some(title) = &first_fetch.feed.title { + println!(" Title: {title}"); + } + + // Save caching headers + let etag = first_fetch.etag.clone(); + let modified = first_fetch.modified; + + println!("\nCaching headers received:"); + if let Some(ref e) = etag { + println!(" ETag: {e}"); + } + if let Some(ref m) = modified { + println!(" Last-Modified: {m}"); + } + + // Second fetch with caching headers + println!("\nSecond fetch (with cache validation):"); + println!("Sending If-None-Match and If-Modified-Since headers..."); + + match parse_url( + url, + etag.as_deref(), + modified.as_deref(), + Some("feedparser-rs-example/1.0"), + ) { + Ok(second_fetch) => { + if second_fetch.status == Some(304) { + println!("\nHTTP 304 Not Modified"); + println!("Feed hasn't changed since last fetch."); + println!("Use cached version to save bandwidth!"); + } else if second_fetch.status == Some(200) { + println!("\nHTTP 200 OK"); + println!("Feed was modified, received new content."); + println!("Entries: {}", second_fetch.entries.len()); + } + } + Err(e) => { + eprintln!("Error on second fetch: {e}"); + } + } + + println!("\nBandwidth Savings:"); + println!("- First fetch: Full download"); + println!("- Second fetch: Headers only (if 304)"); + println!("- Typical savings: 90%+ for unchanged feeds"); + + Ok(()) +} diff --git a/crates/feedparser-rs-core/examples/podcast_feed.rs b/crates/feedparser-rs-core/examples/podcast_feed.rs new file mode 100644 index 0000000..fd0d44d --- /dev/null +++ b/crates/feedparser-rs-core/examples/podcast_feed.rs @@ -0,0 +1,273 @@ +//! Example: Parse podcast feed with iTunes and Podcast 2.0 metadata +//! +//! Demonstrates: +//! - iTunes podcast namespace (itunes:*) +//! - Podcast 2.0 namespace (podcast:*) +//! - Episode metadata and chapters +//! - Podcast categories and artwork +//! - Duration parsing and explicit flags +//! +//! Run with: +//! ```bash +//! cargo run --example podcast_feed +//! ``` + +use feedparser_rs::parse; +use std::fs; + +fn main() -> Result<(), Box> { + println!("=== Feed Parser Example: Podcast Feed ===\n"); + + let feed_path = "examples/feeds/sample_podcast.xml"; + let feed_data = fs::read(feed_path)?; + + let feed = parse(&feed_data)?; + + println!("Feed Version: {}", feed.version); + println!("{}", "=".repeat(60)); + + // Display podcast feed-level metadata + display_podcast_metadata(&feed); + + println!("\n{}\n", "=".repeat(60)); + + // Display episode details + display_episodes(&feed); + + Ok(()) +} + +fn display_podcast_metadata(feed: &feedparser_rs::ParsedFeed) { + println!("Podcast Metadata:"); + println!("{}", "-".repeat(40)); + + if let Some(title) = &feed.feed.title { + println!("\nTitle: {title}"); + } + + if let Some(subtitle) = &feed.feed.subtitle { + println!("Subtitle: {subtitle}"); + } + + // iTunes-specific metadata + if let Some(itunes) = &feed.feed.itunes { + println!("\niTunes Metadata:"); + + if let Some(author) = &itunes.author { + println!(" Author: {author}"); + } + + // Owner information + if let Some(owner) = &itunes.owner { + println!(" Owner:"); + if let Some(name) = &owner.name { + println!(" Name: {name}"); + } + if let Some(email) = &owner.email { + println!(" Email: {email}"); + } + } + + // Explicit content flag + if let Some(explicit) = itunes.explicit { + println!(" Explicit: {}", if explicit { "YES" } else { "NO" }); + } + + // Artwork + if let Some(image) = &itunes.image { + println!(" Artwork: {image}"); + } + + // Categories (iTunes podcasts can have nested categories) + if !itunes.categories.is_empty() { + println!(" Categories:"); + for cat in &itunes.categories { + print!(" - {}", cat.text); + if let Some(subcategory) = &cat.subcategory { + print!(" > {subcategory}"); + } + println!(); + } + } + + if let Some(podcast_type) = &itunes.podcast_type { + println!(" Type: {podcast_type}"); + } + + if itunes.complete == Some(true) { + println!(" Status: Complete (no more episodes will be released)"); + } + } + + // Podcast 2.0 metadata + if let Some(podcast) = &feed.feed.podcast { + println!("\nPodcast 2.0 Metadata:"); + + // Funding/support information + if !podcast.funding.is_empty() { + println!(" Funding:"); + for funding in &podcast.funding { + print!(" - {}", funding.url); + if let Some(message) = &funding.message { + print!(": {message}"); + } + println!(); + } + } + + // People involved (hosts, guests, etc.) + if !podcast.persons.is_empty() { + println!(" People:"); + for person in &podcast.persons { + print!(" - {}", person.name); + if let Some(role) = &person.role { + print!(" [{role}]"); + } + if let Some(img) = &person.img { + print!(" (photo: {img})"); + } + println!(); + } + } + + // Value for value (cryptocurrency support) + if let Some(value) = &podcast.value { + println!(" Value4Value:"); + println!(" Type: {}", value.type_); + println!(" Method: {}", value.method); + if !value.recipients.is_empty() { + println!(" Recipients:"); + for recipient in &value.recipients { + if let Some(name) = &recipient.name { + print!(" - {name}"); + print!(" ({}%)", recipient.split); + println!(); + } + } + } + } + } +} + +fn display_episodes(feed: &feedparser_rs::ParsedFeed) { + println!("Episodes ({} total):", feed.entries.len()); + println!("{}", "-".repeat(40)); + + for (i, entry) in feed.entries.iter().enumerate() { + println!("\nEpisode {}:", i + 1); + + if let Some(title) = &entry.title { + println!(" Title: {title}"); + } + + if let Some(link) = &entry.link { + println!(" Link: {link}"); + } + + if let Some(published) = &entry.published { + println!(" Published: {published}"); + } + + // Media enclosure (audio file) + if !entry.enclosures.is_empty() { + println!(" Audio:"); + for enc in &entry.enclosures { + println!(" URL: {}", enc.url); + if let Some(enclosure_type) = &enc.enclosure_type { + println!(" Type: {enclosure_type}"); + } + if let Some(length) = enc.length { + #[allow(clippy::cast_precision_loss)] + let mb = length as f64 / 1_048_576.0; + println!(" Size: {mb:.2} MB ({length} bytes)"); + } + } + } + + // iTunes episode metadata + if let Some(itunes) = &entry.itunes { + println!(" iTunes:"); + + if let Some(duration) = itunes.duration { + println!(" Duration: {duration} seconds"); + + // Convert to human-readable format + let hours = duration / 3600; + let minutes = (duration % 3600) / 60; + let seconds = duration % 60; + if hours > 0 { + println!(" ({hours:02}:{minutes:02}:{seconds:02})"); + } else { + println!(" ({minutes}:{seconds:02})"); + } + } + + if let Some(episode_num) = itunes.episode { + println!(" Episode Number: {episode_num}"); + } + + if let Some(season) = itunes.season { + println!(" Season: {season}"); + } + + if let Some(episode_type) = &itunes.episode_type { + println!(" Episode Type: {episode_type}"); + } + + if let Some(explicit) = itunes.explicit { + println!(" Explicit: {}", if explicit { "YES" } else { "NO" }); + } + } + + // Podcast 2.0 episode features + if let Some(podcast) = &entry.podcast { + println!(" Podcast 2.0:"); + + // Transcripts + if !podcast.transcript.is_empty() { + println!(" Transcripts:"); + for transcript in &podcast.transcript { + print!(" - {}", transcript.url); + if let Some(transcript_type) = &transcript.transcript_type { + print!(" ({transcript_type})"); + } + if let Some(language) = &transcript.language { + print!(" [{language}]"); + } + println!(); + } + } + + // Chapters + if let Some(chapters) = &podcast.chapters { + println!(" Chapters: {} ({})", chapters.url, chapters.type_); + } + + // Soundbites (highlight clips) + if !podcast.soundbite.is_empty() { + println!(" Soundbites:"); + for soundbite in &podcast.soundbite { + let start_time = soundbite.start_time; + let duration = soundbite.duration; + print!(" - {:.1}s - {:.1}s", start_time, start_time + duration); + if let Some(title) = &soundbite.title { + print!(": {title}"); + } + println!(); + } + } + + // Guest information + if !podcast.person.is_empty() { + println!(" People:"); + for person in &podcast.person { + print!(" - {}", person.name); + if let Some(role) = &person.role { + print!(" ({role})"); + } + println!(); + } + } + } + } +} diff --git a/crates/feedparser-rs-core/examples/profile_memory.rs b/crates/feedparser-rs-core/examples/profile_memory.rs new file mode 100644 index 0000000..3c2dea6 --- /dev/null +++ b/crates/feedparser-rs-core/examples/profile_memory.rs @@ -0,0 +1,58 @@ +//! DHAT memory profiler for feedparser-rs +//! +//! Usage: +//! `cargo run --release --example profile_memory` +//! +//! View results: +//! Open dhat-heap.json at +//! +//! Metrics tracked: +//! - Total allocations per parse +//! - Total bytes allocated +//! - Peak memory usage +//! - Allocation hot spots (functions causing most allocations) + +use feedparser_rs::parse; + +#[global_allocator] +static ALLOC: dhat::Alloc = dhat::Alloc; + +fn main() { + let _profiler = dhat::Profiler::new_heap(); + + println!("=== feedparser-rs Memory Profiling ===\n"); + + // Profile small feed (2.7 KB) + println!("Profiling SMALL feed (2.7 KB) - 1000 iterations..."); + let small = include_bytes!("../../../benchmarks/fixtures/small.xml"); + for _ in 0..1000 { + let _ = parse(small); + } + println!(" Completed: 1000 parses\n"); + + // Profile medium feed (24 KB) + println!("Profiling MEDIUM feed (24 KB) - 100 iterations..."); + let medium = include_bytes!("../../../benchmarks/fixtures/medium.xml"); + for _ in 0..100 { + let _ = parse(medium); + } + println!(" Completed: 100 parses\n"); + + // Profile large feed (237 KB) + println!("Profiling LARGE feed (237 KB) - 10 iterations..."); + let large = include_bytes!("../../../benchmarks/fixtures/large.xml"); + for _ in 0..10 { + let _ = parse(large); + } + println!(" Completed: 10 parses\n"); + + println!("=== Profiling Complete ==="); + println!("\nResults saved to: dhat-heap.json"); + println!("View at: https://nnethercote.github.io/dh_view/dh_view.html"); + println!("\nKey metrics to analyze:"); + println!(" - Total allocations per parse (target: <200 for small)"); + println!(" - Total bytes allocated"); + println!(" - Peak memory usage"); + println!(" - Short-lived allocations (optimization candidates)"); + println!(" - Top allocation hot spots"); +} diff --git a/crates/feedparser-rs-core/src/lib.rs b/crates/feedparser-rs-core/src/lib.rs index c2a7de9..9f102c3 100644 --- a/crates/feedparser-rs-core/src/lib.rs +++ b/crates/feedparser-rs-core/src/lib.rs @@ -1,11 +1,12 @@ #![cfg_attr(test, allow(clippy::unwrap_used, clippy::expect_used, clippy::panic))] -//! feedparser-rs-core: High-performance RSS/Atom/JSON Feed parser +//! # feedparser-rs: High-performance RSS/Atom/JSON Feed parser //! -//! This crate provides a pure Rust implementation of feed parsing with -//! compatibility for Python's feedparser library. +//! A pure Rust implementation of feed parsing with API compatibility for Python's +//! [feedparser](https://github.com/kurtmckee/feedparser) library. Designed for +//! 10-100x faster feed parsing with identical behavior. //! -//! # Examples +//! ## Quick Start //! //! ``` //! use feedparser_rs::parse; @@ -15,29 +16,148 @@ //! //! //! Example Feed +//! https://example.com +//! +//! First Post +//! https://example.com/post/1 +//! //! //! //! "#; //! //! let feed = parse(xml.as_bytes()).unwrap(); -//! assert!(feed.bozo == false); +//! assert!(!feed.bozo); +//! assert_eq!(feed.feed.title.as_deref(), Some("Example Feed")); +//! assert_eq!(feed.entries.len(), 1); //! ``` //! -//! # Features +//! ## Supported Formats //! -//! - Parse RSS 0.9x, 1.0, 2.0 -//! - Parse Atom 0.3, 1.0 -//! - Parse JSON Feed 1.0, 1.1 -//! - Tolerant parsing with bozo flag -//! - Multi-format date parsing -//! - HTML sanitization -//! - Encoding detection +//! | Format | Versions | Detection | +//! |--------|----------|-----------| +//! | RSS | 0.90, 0.91, 0.92, 2.0 | `` element | +//! | RSS 1.0 | RDF-based | `` with RSS namespace | +//! | Atom | 0.3, 1.0 | `` with Atom namespace | +//! | JSON Feed | 1.0, 1.1 | `version` field starting with `https://jsonfeed.org` | //! -//! # Architecture +//! ## Namespace Extensions //! -//! The library provides core data structures like [`ParsedFeed`], [`Entry`], and [`FeedMeta`] -//! for representing parsed feed data. The main entry point is the [`parse`] function which -//! automatically detects feed format and returns parsed results. +//! The parser supports common feed extensions: +//! +//! - **iTunes/Podcast** (`itunes:`) - Podcast metadata, categories, explicit flags +//! - **Podcast 2.0** (`podcast:`) - Transcripts, chapters, funding, persons +//! - **Dublin Core** (`dc:`) - Creator, date, rights, subject +//! - **Media RSS** (`media:`) - Thumbnails, content, descriptions +//! - **Content** (`content:encoded`) - Full HTML content +//! - **Syndication** (`sy:`) - Update frequency hints +//! - **`GeoRSS`** (`georss:`) - Geographic coordinates +//! - **Creative Commons** (`cc:`, `creativeCommons:`) - License information +//! +//! ## Type-Safe URL and MIME Handling +//! +//! The library uses semantic newtypes for improved type safety: +//! +//! ``` +//! use feedparser_rs::{Url, MimeType, Email}; +//! +//! // Url - wraps URL strings without validation (bozo-compatible) +//! let url = Url::new("https://example.com/feed.xml"); +//! assert_eq!(url.as_str(), "https://example.com/feed.xml"); +//! assert!(url.starts_with("https://")); // Deref to str +//! +//! // MimeType - uses Arc for efficient cloning +//! let mime = MimeType::new("application/rss+xml"); +//! let clone = mime.clone(); // Cheap: just increments refcount +//! +//! // Email - wraps email addresses +//! let email = Email::new("author@example.com"); +//! ``` +//! +//! These types implement [`Deref`](std::ops::Deref)<Target=str>, so string methods work directly: +//! +//! ``` +//! use feedparser_rs::Url; +//! +//! let url = Url::new("https://example.com/path?query=1"); +//! assert!(url.contains("example.com")); +//! assert_eq!(url.len(), 32); +//! ``` +//! +//! ## The Bozo Pattern +//! +//! Following Python feedparser's philosophy, this library **never panics** on +//! malformed input. Instead, it sets the `bozo` flag and continues parsing: +//! +//! ``` +//! use feedparser_rs::parse; +//! +//! // XML with undefined entity - triggers bozo +//! let xml_with_entity = b"Test ￿"; +//! +//! let feed = parse(xml_with_entity).unwrap(); +//! // Parser handles invalid characters gracefully +//! assert!(feed.feed.title.is_some()); +//! ``` +//! +//! The bozo flag indicates the feed had issues but was still parseable. +//! +//! ## Resource Limits +//! +//! Protect against malicious feeds with [`ParserLimits`]: +//! +//! ``` +//! use feedparser_rs::{parse_with_limits, ParserLimits}; +//! +//! // Customize limits for untrusted input +//! let limits = ParserLimits { +//! max_entries: 100, +//! max_text_length: 50_000, +//! ..Default::default() +//! }; +//! +//! let xml = b"Safe"; +//! let feed = parse_with_limits(xml, limits).unwrap(); +//! ``` +//! +//! ## HTTP Fetching +//! +//! With the `http` feature (enabled by default), fetch feeds from URLs: +//! +//! ```no_run +//! use feedparser_rs::parse_url; +//! +//! // Simple fetch +//! let feed = parse_url("https://example.com/feed.xml", None, None, None)?; +//! +//! // With conditional GET for caching +//! let feed2 = parse_url( +//! "https://example.com/feed.xml", +//! feed.etag.as_deref(), // ETag from previous fetch +//! feed.modified.as_deref(), // Last-Modified from previous fetch +//! Some("MyApp/1.0"), // Custom User-Agent +//! )?; +//! +//! if feed2.status == Some(304) { +//! println!("Feed not modified since last fetch"); +//! } +//! # Ok::<(), feedparser_rs::FeedError>(()) +//! ``` +//! +//! ## Core Types +//! +//! - [`ParsedFeed`] - Complete parsed feed with metadata and entries +//! - [`FeedMeta`] - Feed-level metadata (title, link, author, etc.) +//! - [`Entry`] - Individual feed entry/item +//! - [`Link`], [`Person`], [`Tag`] - Common feed elements +//! - [`Url`], [`MimeType`], [`Email`] - Type-safe string wrappers +//! +//! ## Module Structure +//! +//! - [`types`] - All data structures for parsed feeds +//! - [`namespace`] - Handlers for namespace extensions (iTunes, Podcast 2.0, etc.) +//! - [`util`] - Helper functions for dates, HTML sanitization, encoding +//! - [`compat`] - Python feedparser API compatibility layer +//! - [`http`] - HTTP client for fetching feeds (requires `http` feature) /// Compatibility utilities for Python feedparser API pub mod compat; @@ -68,11 +188,12 @@ pub use limits::{LimitError, ParserLimits}; pub use options::ParseOptions; pub use parser::{detect_format, parse, parse_with_limits}; pub use types::{ - Content, Enclosure, Entry, FeedMeta, FeedVersion, Generator, Image, ItunesCategory, + Content, Email, Enclosure, Entry, FeedMeta, FeedVersion, Generator, Image, ItunesCategory, ItunesEntryMeta, ItunesFeedMeta, ItunesOwner, LimitedCollectionExt, Link, MediaContent, - MediaThumbnail, ParsedFeed, Person, PodcastChapters, PodcastEntryMeta, PodcastFunding, - PodcastMeta, PodcastPerson, PodcastSoundbite, PodcastTranscript, PodcastValue, - PodcastValueRecipient, Source, Tag, TextConstruct, TextType, parse_duration, parse_explicit, + MediaThumbnail, MimeType, ParsedFeed, Person, PodcastChapters, PodcastEntryMeta, + PodcastFunding, PodcastMeta, PodcastPerson, PodcastSoundbite, PodcastTranscript, PodcastValue, + PodcastValueRecipient, Source, Tag, TextConstruct, TextType, Url, parse_duration, + parse_explicit, }; pub use namespace::syndication::{SyndicationMeta, UpdatePeriod}; diff --git a/crates/feedparser-rs-core/src/namespace/cc.rs b/crates/feedparser-rs-core/src/namespace/cc.rs index 83c8166..ef69449 100644 --- a/crates/feedparser-rs-core/src/namespace/cc.rs +++ b/crates/feedparser-rs-core/src/namespace/cc.rs @@ -53,8 +53,8 @@ pub fn handle_feed_element( if let Some(license_url) = extract_license_url(attrs, text) { feed.links.try_push_limited( Link { - href: license_url, - rel: Some("license".to_string()), + href: license_url.into(), + rel: Some("license".into()), ..Default::default() }, limits.max_links_per_feed, @@ -94,8 +94,8 @@ pub fn handle_entry_element( if let Some(license_url) = extract_license_url(attrs, text) { entry.links.try_push_limited( Link { - href: license_url, - rel: Some("license".to_string()), + href: license_url.into(), + rel: Some("license".into()), ..Default::default() }, limits.max_links_per_entry, diff --git a/crates/feedparser-rs-core/src/namespace/content.rs b/crates/feedparser-rs-core/src/namespace/content.rs index 91df63f..40ff026 100644 --- a/crates/feedparser-rs-core/src/namespace/content.rs +++ b/crates/feedparser-rs-core/src/namespace/content.rs @@ -26,7 +26,7 @@ pub fn handle_entry_element(element: &str, text: &str, entry: &mut Entry) { // content:encoded → add to entry.content as HTML entry.content.push(Content { value: text.to_string(), - content_type: Some("text/html".to_string()), + content_type: Some("text/html".into()), language: None, base: None, }); diff --git a/crates/feedparser-rs-core/src/namespace/dublin_core.rs b/crates/feedparser-rs-core/src/namespace/dublin_core.rs index e07d313..00e2c5d 100644 --- a/crates/feedparser-rs-core/src/namespace/dublin_core.rs +++ b/crates/feedparser-rs-core/src/namespace/dublin_core.rs @@ -34,10 +34,10 @@ pub fn handle_feed_element(element: &str, text: &str, feed: &mut FeedMeta) { "creator" => { // dc:creator → author (if not already set) if feed.author.is_none() { - feed.author = Some(text.to_string()); + feed.author = Some(text.into()); } // Store in dc_creator field - feed.dc_creator = Some(text.to_string()); + feed.dc_creator = Some(text.into()); // Also add to authors list feed.authors.push(Person::from_name(text)); } @@ -62,9 +62,9 @@ pub fn handle_feed_element(element: &str, text: &str, feed: &mut FeedMeta) { "publisher" => { // dc:publisher → publisher if feed.publisher.is_none() { - feed.publisher = Some(text.to_string()); + feed.publisher = Some(text.into()); } - feed.dc_publisher = Some(text.to_string()); + feed.dc_publisher = Some(text.into()); } "rights" => { // dc:rights → rights (if not already set) @@ -82,7 +82,7 @@ pub fn handle_feed_element(element: &str, text: &str, feed: &mut FeedMeta) { "language" => { // dc:language → language if feed.language.is_none() { - feed.language = Some(text.to_string()); + feed.language = Some(text.into()); } } "identifier" => { @@ -112,9 +112,9 @@ pub fn handle_entry_element(element: &str, text: &str, entry: &mut Entry) { match element { "creator" => { if entry.author.is_none() { - entry.author = Some(text.to_string()); + entry.author = Some(text.into()); } - entry.dc_creator = Some(text.to_string()); + entry.dc_creator = Some(text.into()); entry.authors.push(Person::from_name(text)); } "date" => { @@ -142,7 +142,7 @@ pub fn handle_entry_element(element: &str, text: &str, entry: &mut Entry) { } "identifier" => { if entry.id.is_none() { - entry.id = Some(text.to_string()); + entry.id = Some(text.into()); } } "contributor" => { diff --git a/crates/feedparser-rs-core/src/namespace/georss.rs b/crates/feedparser-rs-core/src/namespace/georss.rs index 54d9a4a..70ee217 100644 --- a/crates/feedparser-rs-core/src/namespace/georss.rs +++ b/crates/feedparser-rs-core/src/namespace/georss.rs @@ -178,25 +178,25 @@ pub fn handle_entry_element( match tag { b"point" => { if let Some(loc) = parse_point(text) { - entry.geo = Some(loc); + entry.geo = Some(Box::new(loc)); } true } b"line" => { if let Some(loc) = parse_line(text) { - entry.geo = Some(loc); + entry.geo = Some(Box::new(loc)); } true } b"polygon" => { if let Some(loc) = parse_polygon(text) { - entry.geo = Some(loc); + entry.geo = Some(Box::new(loc)); } true } b"box" => { if let Some(loc) = parse_box(text) { - entry.geo = Some(loc); + entry.geo = Some(Box::new(loc)); } true } @@ -225,25 +225,25 @@ pub fn handle_feed_element( match tag { b"point" => { if let Some(loc) = parse_point(text) { - feed.geo = Some(loc); + feed.geo = Some(Box::new(loc)); } true } b"line" => { if let Some(loc) = parse_line(text) { - feed.geo = Some(loc); + feed.geo = Some(Box::new(loc)); } true } b"polygon" => { if let Some(loc) = parse_polygon(text) { - feed.geo = Some(loc); + feed.geo = Some(Box::new(loc)); } true } b"box" => { if let Some(loc) = parse_box(text) { - feed.geo = Some(loc); + feed.geo = Some(Box::new(loc)); } true } diff --git a/crates/feedparser-rs-core/src/namespace/media_rss.rs b/crates/feedparser-rs-core/src/namespace/media_rss.rs index f92bd5d..a559a9c 100644 --- a/crates/feedparser-rs-core/src/namespace/media_rss.rs +++ b/crates/feedparser-rs-core/src/namespace/media_rss.rs @@ -14,6 +14,17 @@ /// - `media:keywords` → tags (comma-separated) /// - `media:category` → tags /// - `media:credit` → contributors +/// +/// # Type Design Note +/// +/// The [`MediaContent`] and [`MediaThumbnail`] types in this module use raw `String` +/// fields instead of the `Url`/`MimeType` newtypes from `types::common`. This is +/// intentional: +/// +/// 1. These are internal parsing types with extended attributes (medium, bitrate, +/// framerate, expression, `is_default`) not present in the public API types. +/// 2. The `media_content_to_enclosure` function handles conversion to public types. +/// 3. The public API types in `types::common::MediaContent` use proper newtypes. use crate::types::{Enclosure, Entry, Tag}; /// Media RSS namespace URI @@ -191,8 +202,8 @@ pub fn handle_entry_element(element: &str, text: &str, entry: &mut Entry) { /// ``` pub fn media_content_to_enclosure(content: &MediaContent) -> Enclosure { Enclosure { - url: content.url.clone(), - enclosure_type: content.type_.clone(), + url: content.url.clone().into(), + enclosure_type: content.type_.as_ref().map(|t| t.clone().into()), length: content.file_size, } } diff --git a/crates/feedparser-rs-core/src/namespace/syndication.rs b/crates/feedparser-rs-core/src/namespace/syndication.rs index f98105e..f92dd67 100644 --- a/crates/feedparser-rs-core/src/namespace/syndication.rs +++ b/crates/feedparser-rs-core/src/namespace/syndication.rs @@ -82,7 +82,7 @@ pub fn handle_feed_element(element: &str, text: &str, feed: &mut FeedMeta) { "updatePeriod" => { if let Some(period) = UpdatePeriod::parse(text) { if feed.syndication.is_none() { - feed.syndication = Some(SyndicationMeta::default()); + feed.syndication = Some(Box::new(SyndicationMeta::default())); } if let Some(syn) = &mut feed.syndication { syn.update_period = Some(period); @@ -92,7 +92,7 @@ pub fn handle_feed_element(element: &str, text: &str, feed: &mut FeedMeta) { "updateFrequency" => { if let Ok(freq) = text.parse::() { if feed.syndication.is_none() { - feed.syndication = Some(SyndicationMeta::default()); + feed.syndication = Some(Box::new(SyndicationMeta::default())); } if let Some(syn) = &mut feed.syndication { syn.update_frequency = Some(freq); @@ -101,7 +101,7 @@ pub fn handle_feed_element(element: &str, text: &str, feed: &mut FeedMeta) { } "updateBase" => { if feed.syndication.is_none() { - feed.syndication = Some(SyndicationMeta::default()); + feed.syndication = Some(Box::new(SyndicationMeta::default())); } if let Some(syn) = &mut feed.syndication { syn.update_base = Some(text.to_string()); diff --git a/crates/feedparser-rs-core/src/parser/atom.rs b/crates/feedparser-rs-core/src/parser/atom.rs index 1f2e4cd..1001bfe 100644 --- a/crates/feedparser-rs-core/src/parser/atom.rs +++ b/crates/feedparser-rs-core/src/parser/atom.rs @@ -130,15 +130,15 @@ fn parse_feed_element( element.attributes().flatten(), limits.max_attribute_length, ) { - link.href = base_ctx.resolve_safe(&link.href); + link.href = base_ctx.resolve_safe(&link.href).into(); if feed.feed.link.is_none() && link.rel.as_deref() == Some("alternate") { - feed.feed.link = Some(link.href.clone()); + feed.feed.link = Some(link.href.to_string()); } if feed.feed.license.is_none() && link.rel.as_deref() == Some("license") { - feed.feed.license = Some(link.href.clone()); + feed.feed.license = Some(link.href.to_string()); } feed.feed .links @@ -304,13 +304,13 @@ fn parse_entry( element.attributes().flatten(), limits.max_attribute_length, ) { - link.href = base_ctx.resolve_safe(&link.href); + link.href = base_ctx.resolve_safe(&link.href).into(); if entry.link.is_none() && link.rel.as_deref() == Some("alternate") { - entry.link = Some(link.href.clone()); + entry.link = Some(link.href.to_string()); } if entry.license.is_none() && link.rel.as_deref() == Some("license") { - entry.license = Some(link.href.clone()); + entry.license = Some(link.href.to_string()); } entry .links @@ -321,7 +321,7 @@ fn parse_entry( } } b"id" if !is_empty => { - entry.id = Some(read_text(reader, buf, limits)?); + entry.id = Some(read_text(reader, buf, limits)?.into()); } b"updated" if !is_empty => { let text = read_text(reader, buf, limits)?; @@ -495,8 +495,8 @@ fn parse_person( check_depth(*depth, limits.max_nesting_depth)?; match e.local_name().as_ref() { - b"name" => name = Some(read_text(reader, buf, limits)?), - b"email" => email = Some(read_text(reader, buf, limits)?), + b"name" => name = Some(read_text(reader, buf, limits)?.into()), + b"email" => email = Some(read_text(reader, buf, limits)?.into()), b"uri" => uri = Some(read_text(reader, buf, limits)?), _ => skip_element(reader, buf, limits, *depth)?, } @@ -534,7 +534,7 @@ fn parse_generator( } match attr.key.as_ref() { b"uri" => uri = Some(bytes_to_string(&attr.value)), - b"version" => version = Some(bytes_to_string(&attr.value)), + b"version" => version = Some(bytes_to_string(&attr.value).into()), _ => {} } } @@ -560,7 +560,7 @@ fn parse_content( continue; } if attr.key.as_ref() == b"type" { - content_type = Some(bytes_to_string(&attr.value)); + content_type = Some(bytes_to_string(&attr.value).into()); } } @@ -599,7 +599,7 @@ fn parse_atom_source( limits.max_attribute_length, ) && link.is_none() { - link = Some(l.href); + link = Some(l.href.to_string()); } skip_to_end(reader, buf, b"link")?; } diff --git a/crates/feedparser-rs-core/src/parser/json.rs b/crates/feedparser-rs-core/src/parser/json.rs index 2a8c763..f1732bc 100644 --- a/crates/feedparser-rs-core/src/parser/json.rs +++ b/crates/feedparser-rs-core/src/parser/json.rs @@ -29,7 +29,7 @@ pub fn parse_json_feed_with_limits(data: &[u8], limits: ParserLimits) -> Result< ))); } - let mut feed = ParsedFeed::new(); + let mut feed = ParsedFeed::with_capacity(limits.max_entries); let json: Value = match serde_json::from_slice(data) { Ok(v) => v, @@ -109,7 +109,7 @@ fn parse_feed_metadata(json: &Value, feed: &mut FeedMeta, limits: &ParserLimits) && favicon.len() <= limits.max_text_length { feed.image = Some(Image { - url: favicon.to_string(), + url: favicon.to_string().into(), title: None, link: None, width: None, @@ -129,7 +129,7 @@ fn parse_feed_metadata(json: &Value, feed: &mut FeedMeta, limits: &ParserLimits) if let Some(language) = json.get("language").and_then(|v| v.as_str()) && language.len() <= limits.max_text_length { - feed.language = Some(language.to_string()); + feed.language = Some(language.into()); } if let Some(expired) = json.get("expired").and_then(Value::as_bool) @@ -143,7 +143,7 @@ fn parse_item(json: &Value, limits: &ParserLimits) -> Entry { let mut entry = Entry::default(); if let Some(id) = json.get("id").and_then(|v| v.as_str()) { - entry.id = Some(id.to_string()); + entry.id = Some(id.into()); } if let Some(url) = json.get("url").and_then(|v| v.as_str()) { @@ -185,7 +185,7 @@ fn parse_item(json: &Value, limits: &ParserLimits) -> Entry { if let Some(image) = json.get("image").and_then(|v| v.as_str()) { let _ = entry.links.try_push_limited( - Link::enclosure(image, Some("image/*".to_string())), + Link::enclosure(image, Some("image/*".into())), limits.max_entries, ); } @@ -218,10 +218,10 @@ fn parse_item(json: &Value, limits: &ParserLimits) -> Entry { if let Some(language) = json.get("language").and_then(|v| v.as_str()) { if let Some(detail) = &mut entry.title_detail { - detail.language = Some(language.to_string()); + detail.language = Some(language.into()); } if let Some(detail) = &mut entry.summary_detail { - detail.language = Some(language.to_string()); + detail.language = Some(language.into()); } } @@ -243,7 +243,7 @@ fn parse_item(json: &Value, limits: &ParserLimits) -> Entry { /// Extracts authors from JSON Feed format (supports both "authors" array and legacy "author" object) fn parse_authors( json: &Value, - author: &mut Option, + author: &mut Option, author_detail: &mut Option, authors: &mut Vec, limits: &ParserLimits, diff --git a/crates/feedparser-rs-core/src/parser/rss.rs b/crates/feedparser-rs-core/src/parser/rss.rs index 816e3a6..fcf15b8 100644 --- a/crates/feedparser-rs-core/src/parser/rss.rs +++ b/crates/feedparser-rs-core/src/parser/rss.rs @@ -29,25 +29,28 @@ const MALFORMED_ATTRIBUTES_ERROR: &str = "Malformed XML attributes"; /// Note: Keys are cloned to `Vec` because `quick_xml::Attribute` owns the key /// data only for the lifetime of the event, but we need to store attributes across /// multiple parsing calls in `parse_enclosure` and other functions. +/// +/// Pre-allocates space for 4 attributes (typical for enclosures: url, type, length, maybe one more) #[inline] fn collect_attributes(e: &quick_xml::events::BytesStart) -> (Vec<(Vec, String)>, bool) { let mut has_errors = false; - let attrs = e - .attributes() - .filter_map(|result| { - if let Ok(attr) = result { + let mut attrs = Vec::with_capacity(4); + + for result in e.attributes() { + match result { + Ok(attr) => { if let Ok(v) = attr.unescape_value() { - Some((attr.key.as_ref().to_vec(), v.to_string())) + attrs.push((attr.key.as_ref().to_vec(), v.to_string())); } else { has_errors = true; - None } - } else { + } + Err(_) => { has_errors = true; - None } - }) - .collect(); + } + } + (attrs, has_errors) } @@ -306,9 +309,9 @@ fn parse_enclosure(attrs: &[(Vec, String)], limits: &ParserLimits) -> Option None } else { Some(Enclosure { - url, + url: url.into(), length, - enclosure_type: enc_type, + enclosure_type: enc_type.map(Into::into), }) } } @@ -330,7 +333,7 @@ fn parse_channel_standard( feed.feed.set_title(TextConstruct { value: text, content_type: TextType::Text, - language: channel_lang.map(String::from), + language: channel_lang.map(std::convert::Into::into), base: base_ctx.base().map(String::from), }); } @@ -348,12 +351,12 @@ fn parse_channel_standard( feed.feed.set_subtitle(TextConstruct { value: text, content_type: TextType::Html, - language: channel_lang.map(String::from), + language: channel_lang.map(std::convert::Into::into), base: base_ctx.base().map(String::from), }); } b"language" => { - feed.feed.language = Some(read_text(reader, buf, limits)?); + feed.feed.language = Some(read_text(reader, buf, limits)?.into()); } b"pubDate" => { let text = read_text(reader, buf, limits)?; @@ -367,10 +370,10 @@ fn parse_channel_standard( } } b"managingEditor" => { - feed.feed.author = Some(read_text(reader, buf, limits)?); + feed.feed.author = Some(read_text(reader, buf, limits)?.into()); } b"webMaster" => { - feed.feed.publisher = Some(read_text(reader, buf, limits)?); + feed.feed.publisher = Some(read_text(reader, buf, limits)?.into()); } b"generator" => { feed.feed.generator = Some(read_text(reader, buf, limits)?); @@ -383,7 +386,7 @@ fn parse_channel_standard( let term = read_text(reader, buf, limits)?; feed.feed.tags.try_push_limited( Tag { - term, + term: term.into(), scheme: None, label: None, }, @@ -409,11 +412,17 @@ fn parse_channel_itunes( ) -> Result { if is_itunes_tag(tag, b"author") { let text = read_text(reader, buf, limits)?; - let itunes = feed.feed.itunes.get_or_insert_with(ItunesFeedMeta::default); + let itunes = feed + .feed + .itunes + .get_or_insert_with(|| Box::new(ItunesFeedMeta::default())); itunes.author = Some(text); Ok(true) } else if is_itunes_tag(tag, b"owner") { - let itunes = feed.feed.itunes.get_or_insert_with(ItunesFeedMeta::default); + let itunes = feed + .feed + .itunes + .get_or_insert_with(|| Box::new(ItunesFeedMeta::default())); if let Ok(owner) = parse_itunes_owner(reader, buf, limits, depth) { itunes.owner = Some(owner); } @@ -423,18 +432,27 @@ fn parse_channel_itunes( Ok(true) } else if is_itunes_tag(tag, b"explicit") { let text = read_text(reader, buf, limits)?; - let itunes = feed.feed.itunes.get_or_insert_with(ItunesFeedMeta::default); + let itunes = feed + .feed + .itunes + .get_or_insert_with(|| Box::new(ItunesFeedMeta::default())); itunes.explicit = parse_explicit(&text); Ok(true) } else if is_itunes_tag(tag, b"image") { if let Some(value) = find_attribute(attrs, b"href") { - let itunes = feed.feed.itunes.get_or_insert_with(ItunesFeedMeta::default); - itunes.image = Some(truncate_to_length(value, limits.max_attribute_length)); + let itunes = feed + .feed + .itunes + .get_or_insert_with(|| Box::new(ItunesFeedMeta::default())); + itunes.image = Some(truncate_to_length(value, limits.max_attribute_length).into()); } Ok(true) } else if is_itunes_tag(tag, b"keywords") { let text = read_text(reader, buf, limits)?; - let itunes = feed.feed.itunes.get_or_insert_with(ItunesFeedMeta::default); + let itunes = feed + .feed + .itunes + .get_or_insert_with(|| Box::new(ItunesFeedMeta::default())); itunes.keywords = text .split(',') .map(|s| s.trim().to_string()) @@ -443,19 +461,28 @@ fn parse_channel_itunes( Ok(true) } else if is_itunes_tag(tag, b"type") { let text = read_text(reader, buf, limits)?; - let itunes = feed.feed.itunes.get_or_insert_with(ItunesFeedMeta::default); + let itunes = feed + .feed + .itunes + .get_or_insert_with(|| Box::new(ItunesFeedMeta::default())); itunes.podcast_type = Some(text); Ok(true) } else if is_itunes_tag(tag, b"complete") { let text = read_text(reader, buf, limits)?; - let itunes = feed.feed.itunes.get_or_insert_with(ItunesFeedMeta::default); + let itunes = feed + .feed + .itunes + .get_or_insert_with(|| Box::new(ItunesFeedMeta::default())); itunes.complete = Some(text.trim().eq_ignore_ascii_case("Yes")); Ok(true) } else if is_itunes_tag(tag, b"new-feed-url") { let text = read_text(reader, buf, limits)?; if !text.is_empty() { - let itunes = feed.feed.itunes.get_or_insert_with(ItunesFeedMeta::default); - itunes.new_feed_url = Some(text.trim().to_string()); + let itunes = feed + .feed + .itunes + .get_or_insert_with(|| Box::new(ItunesFeedMeta::default())); + itunes.new_feed_url = Some(text.trim().to_string().into()); } Ok(true) } else { @@ -523,7 +550,10 @@ fn parse_itunes_category( buf.clear(); } - let itunes = feed.feed.itunes.get_or_insert_with(ItunesFeedMeta::default); + let itunes = feed + .feed + .itunes + .get_or_insert_with(|| Box::new(ItunesFeedMeta::default())); itunes.categories.push(ItunesCategory { text: category_text, subcategory: subcategory_text, @@ -544,7 +574,10 @@ fn parse_channel_podcast( ) -> Result { if tag.starts_with(b"podcast:guid") { let text = read_text(reader, buf, limits)?; - let podcast = feed.feed.podcast.get_or_insert_with(PodcastMeta::default); + let podcast = feed + .feed + .podcast + .get_or_insert_with(|| Box::new(PodcastMeta::default())); podcast.guid = Some(text); Ok(true) } else if tag.starts_with(b"podcast:funding") { @@ -557,10 +590,17 @@ fn parse_channel_podcast( } else { Some(message_text) }; - let podcast = feed.feed.podcast.get_or_insert_with(PodcastMeta::default); - podcast - .funding - .try_push_limited(PodcastFunding { url, message }, limits.max_podcast_funding); + let podcast = feed + .feed + .podcast + .get_or_insert_with(|| Box::new(PodcastMeta::default())); + podcast.funding.try_push_limited( + PodcastFunding { + url: url.into(), + message, + }, + limits.max_podcast_funding, + ); Ok(true) } else if tag.starts_with(b"podcast:value") { parse_podcast_value(reader, buf, attrs, feed, limits)?; @@ -649,7 +689,7 @@ fn parse_item( } b"enclosure" => { if let Some(mut enclosure) = parse_enclosure(&attrs, limits) { - enclosure.url = base_ctx.resolve_safe(&enclosure.url); + enclosure.url = base_ctx.resolve_safe(&enclosure.url).into(); entry .enclosures .try_push_limited(enclosure, limits.max_enclosures); @@ -713,7 +753,7 @@ fn parse_item_standard( entry.set_title(TextConstruct { value: text, content_type: TextType::Text, - language: item_lang.map(String::from), + language: item_lang.map(std::convert::Into::into), base: base_ctx.base().map(String::from), }); } @@ -723,8 +763,8 @@ fn parse_item_standard( entry.link = Some(resolved_link.clone()); entry.links.try_push_limited( Link { - href: resolved_link, - rel: Some("alternate".to_string()), + href: resolved_link.into(), + rel: Some("alternate".into()), ..Default::default() }, limits.max_links_per_entry, @@ -735,25 +775,25 @@ fn parse_item_standard( entry.set_summary(TextConstruct { value: text, content_type: TextType::Html, - language: item_lang.map(String::from), + language: item_lang.map(std::convert::Into::into), base: base_ctx.base().map(String::from), }); } b"guid" => { - entry.id = Some(read_text(reader, buf, limits)?); + entry.id = Some(read_text(reader, buf, limits)?.into()); } b"pubDate" => { let text = read_text(reader, buf, limits)?; entry.published = parse_date(&text); } b"author" => { - entry.author = Some(read_text(reader, buf, limits)?); + entry.author = Some(read_text(reader, buf, limits)?.into()); } b"category" => { let term = read_text(reader, buf, limits)?; entry.tags.try_push_limited( Tag { - term, + term: term.into(), scheme: None, label: None, }, @@ -788,28 +828,38 @@ fn parse_item_itunes( ) -> Result { if is_itunes_tag(tag, b"title") { let text = read_text(reader, buf, limits)?; - let itunes = entry.itunes.get_or_insert_with(ItunesEntryMeta::default); + let itunes = entry + .itunes + .get_or_insert_with(|| Box::new(ItunesEntryMeta::default())); itunes.title = Some(text); Ok(true) } else if is_itunes_tag(tag, b"author") { let text = read_text(reader, buf, limits)?; - let itunes = entry.itunes.get_or_insert_with(ItunesEntryMeta::default); + let itunes = entry + .itunes + .get_or_insert_with(|| Box::new(ItunesEntryMeta::default())); itunes.author = Some(text); Ok(true) } else if is_itunes_tag(tag, b"duration") { let text = read_text(reader, buf, limits)?; - let itunes = entry.itunes.get_or_insert_with(ItunesEntryMeta::default); + let itunes = entry + .itunes + .get_or_insert_with(|| Box::new(ItunesEntryMeta::default())); itunes.duration = parse_duration(&text); Ok(true) } else if is_itunes_tag(tag, b"explicit") { let text = read_text(reader, buf, limits)?; - let itunes = entry.itunes.get_or_insert_with(ItunesEntryMeta::default); + let itunes = entry + .itunes + .get_or_insert_with(|| Box::new(ItunesEntryMeta::default())); itunes.explicit = parse_explicit(&text); Ok(true) } else if is_itunes_tag(tag, b"image") { if let Some(value) = find_attribute(attrs, b"href") { - let itunes = entry.itunes.get_or_insert_with(ItunesEntryMeta::default); - itunes.image = Some(truncate_to_length(value, limits.max_attribute_length)); + let itunes = entry + .itunes + .get_or_insert_with(|| Box::new(ItunesEntryMeta::default())); + itunes.image = Some(truncate_to_length(value, limits.max_attribute_length).into()); } if !is_empty { skip_element(reader, buf, limits, depth)?; @@ -817,17 +867,23 @@ fn parse_item_itunes( Ok(true) } else if is_itunes_tag(tag, b"episode") { let text = read_text(reader, buf, limits)?; - let itunes = entry.itunes.get_or_insert_with(ItunesEntryMeta::default); + let itunes = entry + .itunes + .get_or_insert_with(|| Box::new(ItunesEntryMeta::default())); itunes.episode = text.parse().ok(); Ok(true) } else if is_itunes_tag(tag, b"season") { let text = read_text(reader, buf, limits)?; - let itunes = entry.itunes.get_or_insert_with(ItunesEntryMeta::default); + let itunes = entry + .itunes + .get_or_insert_with(|| Box::new(ItunesEntryMeta::default())); itunes.season = text.parse().ok(); Ok(true) } else if is_itunes_tag(tag, b"episodeType") { let text = read_text(reader, buf, limits)?; - let itunes = entry.itunes.get_or_insert_with(ItunesEntryMeta::default); + let itunes = entry + .itunes + .get_or_insert_with(|| Box::new(ItunesEntryMeta::default())); itunes.episode_type = Some(text); Ok(true) } else { @@ -896,8 +952,8 @@ fn parse_podcast_transcript( if !url.is_empty() { entry.podcast_transcripts.try_push_limited( PodcastTranscript { - url, - transcript_type, + url: url.into(), + transcript_type: transcript_type.map(Into::into), language, rel, }, @@ -936,8 +992,8 @@ fn parse_podcast_person( name, role, group, - img, - href, + img: img.map(Into::into), + href: href.map(Into::into), }, limits.max_podcast_persons, ); @@ -964,8 +1020,13 @@ fn parse_podcast_chapters( .unwrap_or_default(); if !url.is_empty() { - let podcast = entry.podcast.get_or_insert_with(PodcastEntryMeta::default); - podcast.chapters = Some(PodcastChapters { url, type_ }); + let podcast = entry + .podcast + .get_or_insert_with(|| Box::new(PodcastEntryMeta::default())); + podcast.chapters = Some(PodcastChapters { + url: url.into(), + type_: type_.into(), + }); } if !is_empty { @@ -996,7 +1057,9 @@ fn parse_podcast_soundbite( if text.is_empty() { None } else { Some(text) } }; - let podcast = entry.podcast.get_or_insert_with(PodcastEntryMeta::default); + let podcast = entry + .podcast + .get_or_insert_with(|| Box::new(PodcastEntryMeta::default())); podcast.soundbite.try_push_limited( PodcastSoundbite { start_time, @@ -1085,9 +1148,14 @@ fn parse_item_media( let height = find_attribute(attrs, b"height").and_then(|v| v.parse().ok()); if !url.is_empty() { - entry - .media_thumbnails - .try_push_limited(MediaThumbnail { url, width, height }, limits.max_enclosures); + entry.media_thumbnails.try_push_limited( + MediaThumbnail { + url: url.into(), + width, + height, + }, + limits.max_enclosures, + ); } if !is_empty { skip_element(reader, buf, limits, depth)?; @@ -1107,8 +1175,8 @@ fn parse_item_media( if !url.is_empty() { entry.media_content.try_push_limited( MediaContent { - url, - content_type, + url: url.into(), + content_type: content_type.map(Into::into), filesize, width, height, @@ -1182,7 +1250,7 @@ fn parse_image( } Ok(Image { - url, + url: url.into(), title, link, width, @@ -1283,7 +1351,7 @@ fn parse_podcast_value( let suggested = find_attribute(attrs, b"suggested") .map(|v| truncate_to_length(v, limits.max_attribute_length)); - let mut recipients = Vec::new(); + let mut recipients = Vec::with_capacity(2); loop { match reader.read_event_into(buf) { @@ -1333,7 +1401,10 @@ fn parse_podcast_value( buf.clear(); } - let podcast = feed.feed.podcast.get_or_insert_with(PodcastMeta::default); + let podcast = feed + .feed + .podcast + .get_or_insert_with(|| Box::new(PodcastMeta::default())); podcast.value = Some(PodcastValue { type_, method, diff --git a/crates/feedparser-rs-core/src/parser/rss10.rs b/crates/feedparser-rs-core/src/parser/rss10.rs index 1e6b3a4..54155d0 100644 --- a/crates/feedparser-rs-core/src/parser/rss10.rs +++ b/crates/feedparser-rs-core/src/parser/rss10.rs @@ -91,7 +91,7 @@ pub fn parse_rss10_with_limits(data: &[u8], limits: ParserLimits) -> Result, ) -> Result { let mut entry = Entry::with_capacity(); - entry.id = item_id; + entry.id = item_id.map(std::convert::Into::into); loop { match reader.read_event_into(buf) { @@ -372,7 +372,7 @@ fn parse_image( } Ok(Image { - url, + url: url.into(), title, link, width: None, diff --git a/crates/feedparser-rs-core/src/types/common.rs b/crates/feedparser-rs-core/src/types/common.rs index e981fc7..4dbafa8 100644 --- a/crates/feedparser-rs-core/src/types/common.rs +++ b/crates/feedparser-rs-core/src/types/common.rs @@ -1,31 +1,454 @@ use super::generics::{FromAttributes, ParseFrom}; use crate::util::text::bytes_to_string; +use compact_str::CompactString; use serde_json::Value; +use std::ops::Deref; +use std::sync::Arc; + +/// Optimized string type for small strings (≤24 bytes stored inline) +/// +/// Uses `CompactString` which stores strings up to 24 bytes inline without heap allocation. +/// This significantly reduces allocations for common short strings like language codes, +/// author names, category terms, and other metadata fields. +/// +/// `CompactString` implements `Deref`, so it can be used transparently as a string. +/// +/// # Examples +/// +/// ``` +/// use feedparser_rs::types::SmallString; +/// +/// let s: SmallString = "en-US".into(); +/// assert_eq!(s.as_str(), "en-US"); +/// assert_eq!(s.len(), 5); // Stored inline, no heap allocation +/// ``` +pub type SmallString = CompactString; + +/// URL newtype for type-safe URL handling +/// +/// Provides a semantic wrapper around string URLs without validation. +/// Following the bozo pattern, URLs are not validated during parsing. +/// +/// # Examples +/// +/// ``` +/// use feedparser_rs::Url; +/// +/// let url = Url::new("https://example.com"); +/// assert_eq!(url.as_str(), "https://example.com"); +/// +/// // Deref coercion allows transparent string access +/// let len: usize = url.len(); +/// assert_eq!(len, 19); +/// ``` +#[derive(Debug, Clone, PartialEq, Eq, Hash, Default, serde::Serialize, serde::Deserialize)] +#[serde(transparent)] +pub struct Url(String); + +impl Url { + /// Creates a new URL from any type that can be converted to a String + /// + /// # Examples + /// + /// ``` + /// use feedparser_rs::Url; + /// + /// let url1 = Url::new("https://example.com"); + /// let url2 = Url::new(String::from("https://example.com")); + /// assert_eq!(url1, url2); + /// ``` + #[inline] + pub fn new(s: impl Into) -> Self { + Self(s.into()) + } + + /// Returns the URL as a string slice + /// + /// # Examples + /// + /// ``` + /// use feedparser_rs::Url; + /// + /// let url = Url::new("https://example.com"); + /// assert_eq!(url.as_str(), "https://example.com"); + /// ``` + #[inline] + pub fn as_str(&self) -> &str { + &self.0 + } + + /// Consumes the `Url` and returns the inner `String` + /// + /// # Examples + /// + /// ``` + /// use feedparser_rs::Url; + /// + /// let url = Url::new("https://example.com"); + /// let inner: String = url.into_inner(); + /// assert_eq!(inner, "https://example.com"); + /// ``` + #[inline] + pub fn into_inner(self) -> String { + self.0 + } +} + +impl Deref for Url { + type Target = str; + + #[inline] + fn deref(&self) -> &str { + &self.0 + } +} + +impl From for Url { + #[inline] + fn from(s: String) -> Self { + Self(s) + } +} + +impl From<&str> for Url { + #[inline] + fn from(s: &str) -> Self { + Self(s.to_string()) + } +} + +impl AsRef for Url { + #[inline] + fn as_ref(&self) -> &str { + &self.0 + } +} + +impl std::fmt::Display for Url { + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +impl PartialEq for Url { + fn eq(&self, other: &str) -> bool { + self.0 == other + } +} + +impl PartialEq<&str> for Url { + fn eq(&self, other: &&str) -> bool { + self.0 == *other + } +} + +impl PartialEq for Url { + fn eq(&self, other: &String) -> bool { + &self.0 == other + } +} + +/// MIME type newtype with string interning +/// +/// Uses `Arc` for efficient cloning of common MIME types. +/// Multiple references to the same MIME type share the same allocation. +/// +/// # Examples +/// +/// ``` +/// use feedparser_rs::MimeType; +/// +/// let mime = MimeType::new("text/html"); +/// assert_eq!(mime.as_str(), "text/html"); +/// +/// // Cloning is cheap (just increments reference count) +/// let clone = mime.clone(); +/// assert_eq!(mime, clone); +/// ``` +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct MimeType(Arc); + +// Custom serde implementation for MimeType since Arc doesn't implement Serialize/Deserialize +impl serde::Serialize for MimeType { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + serializer.serialize_str(&self.0) + } +} + +impl<'de> serde::Deserialize<'de> for MimeType { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let s = ::deserialize(deserializer)?; + Ok(Self::new(s)) + } +} + +impl MimeType { + /// Creates a new MIME type from any string-like type + /// + /// # Examples + /// + /// ``` + /// use feedparser_rs::MimeType; + /// + /// let mime = MimeType::new("application/json"); + /// assert_eq!(mime.as_str(), "application/json"); + /// ``` + #[inline] + pub fn new(s: impl AsRef) -> Self { + Self(Arc::from(s.as_ref())) + } + + /// Returns the MIME type as a string slice + /// + /// # Examples + /// + /// ``` + /// use feedparser_rs::MimeType; + /// + /// let mime = MimeType::new("text/plain"); + /// assert_eq!(mime.as_str(), "text/plain"); + /// ``` + #[inline] + pub fn as_str(&self) -> &str { + &self.0 + } + + /// Common MIME type constants for convenience. + /// + /// # Examples + /// + /// ``` + /// use feedparser_rs::MimeType; + /// + /// let html = MimeType::new(MimeType::TEXT_HTML); + /// assert_eq!(html.as_str(), "text/html"); + /// ``` + pub const TEXT_HTML: &'static str = "text/html"; + + /// `text/plain` MIME type constant + pub const TEXT_PLAIN: &'static str = "text/plain"; + + /// `application/xml` MIME type constant + pub const APPLICATION_XML: &'static str = "application/xml"; + + /// `application/json` MIME type constant + pub const APPLICATION_JSON: &'static str = "application/json"; +} + +impl Default for MimeType { + #[inline] + fn default() -> Self { + Self(Arc::from("")) + } +} + +impl Deref for MimeType { + type Target = str; + + #[inline] + fn deref(&self) -> &str { + &self.0 + } +} + +impl From for MimeType { + #[inline] + fn from(s: String) -> Self { + Self(Arc::from(s.as_str())) + } +} + +impl From<&str> for MimeType { + #[inline] + fn from(s: &str) -> Self { + Self(Arc::from(s)) + } +} + +impl AsRef for MimeType { + #[inline] + fn as_ref(&self) -> &str { + &self.0 + } +} + +impl std::fmt::Display for MimeType { + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +impl PartialEq for MimeType { + fn eq(&self, other: &str) -> bool { + &*self.0 == other + } +} + +impl PartialEq<&str> for MimeType { + fn eq(&self, other: &&str) -> bool { + &*self.0 == *other + } +} + +impl PartialEq for MimeType { + fn eq(&self, other: &String) -> bool { + &*self.0 == other + } +} + +/// Email newtype for type-safe email handling +/// +/// Provides a semantic wrapper around email addresses without validation. +/// Following the bozo pattern, emails are not validated during parsing. +/// +/// # Examples +/// +/// ``` +/// use feedparser_rs::Email; +/// +/// let email = Email::new("user@example.com"); +/// assert_eq!(email.as_str(), "user@example.com"); +/// ``` +#[derive(Debug, Clone, PartialEq, Eq, Hash, Default, serde::Serialize, serde::Deserialize)] +#[serde(transparent)] +pub struct Email(String); + +impl Email { + /// Creates a new email from any type that can be converted to a String + /// + /// # Examples + /// + /// ``` + /// use feedparser_rs::Email; + /// + /// let email = Email::new("user@example.com"); + /// assert_eq!(email.as_str(), "user@example.com"); + /// ``` + #[inline] + pub fn new(s: impl Into) -> Self { + Self(s.into()) + } + + /// Returns the email as a string slice + /// + /// # Examples + /// + /// ``` + /// use feedparser_rs::Email; + /// + /// let email = Email::new("user@example.com"); + /// assert_eq!(email.as_str(), "user@example.com"); + /// ``` + #[inline] + pub fn as_str(&self) -> &str { + &self.0 + } + + /// Consumes the `Email` and returns the inner `String` + /// + /// # Examples + /// + /// ``` + /// use feedparser_rs::Email; + /// + /// let email = Email::new("user@example.com"); + /// let inner: String = email.into_inner(); + /// assert_eq!(inner, "user@example.com"); + /// ``` + #[inline] + pub fn into_inner(self) -> String { + self.0 + } +} + +impl Deref for Email { + type Target = str; + + #[inline] + fn deref(&self) -> &str { + &self.0 + } +} + +impl From for Email { + #[inline] + fn from(s: String) -> Self { + Self(s) + } +} + +impl From<&str> for Email { + #[inline] + fn from(s: &str) -> Self { + Self(s.to_string()) + } +} + +impl AsRef for Email { + #[inline] + fn as_ref(&self) -> &str { + &self.0 + } +} + +impl std::fmt::Display for Email { + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +impl PartialEq for Email { + fn eq(&self, other: &str) -> bool { + self.0 == other + } +} + +impl PartialEq<&str> for Email { + fn eq(&self, other: &&str) -> bool { + self.0 == *other + } +} + +impl PartialEq for Email { + fn eq(&self, other: &String) -> bool { + &self.0 == other + } +} /// Link in feed or entry #[derive(Debug, Clone, Default)] pub struct Link { /// Link URL - pub href: String, + pub href: Url, /// Link relationship type (e.g., "alternate", "enclosure", "self") - pub rel: Option, + /// Stored inline as these are typically short (≤24 bytes) + pub rel: Option, /// MIME type of the linked resource - pub link_type: Option, + pub link_type: Option, /// Human-readable link title pub title: Option, /// Length of the linked resource in bytes pub length: Option, - /// Language of the linked resource - pub hreflang: Option, + /// Language of the linked resource (stored inline for lang codes ≤24 bytes) + pub hreflang: Option, } impl Link { /// Create a new link with just URL and relation type #[inline] - pub fn new(href: impl Into, rel: impl Into) -> Self { + pub fn new(href: impl Into, rel: impl AsRef) -> Self { Self { href: href.into(), - rel: Some(rel.into()), + rel: Some(rel.as_ref().into()), link_type: None, title: None, length: None, @@ -35,16 +458,16 @@ impl Link { /// Create an alternate link (common for entry URLs) #[inline] - pub fn alternate(href: impl Into) -> Self { + pub fn alternate(href: impl Into) -> Self { Self::new(href, "alternate") } /// Create a self link (for feed URLs) #[inline] - pub fn self_link(href: impl Into, mime_type: impl Into) -> Self { + pub fn self_link(href: impl Into, mime_type: impl Into) -> Self { Self { href: href.into(), - rel: Some("self".to_string()), + rel: Some("self".into()), link_type: Some(mime_type.into()), title: None, length: None, @@ -54,10 +477,10 @@ impl Link { /// Create an enclosure link (for media) #[inline] - pub fn enclosure(href: impl Into, mime_type: Option) -> Self { + pub fn enclosure(href: impl Into, mime_type: Option) -> Self { Self { href: href.into(), - rel: Some("enclosure".to_string()), + rel: Some("enclosure".into()), link_type: mime_type, title: None, length: None, @@ -67,14 +490,14 @@ impl Link { /// Create a related link #[inline] - pub fn related(href: impl Into) -> Self { + pub fn related(href: impl Into) -> Self { Self::new(href, "related") } /// Set MIME type (builder pattern) #[inline] #[must_use] - pub fn with_type(mut self, mime_type: impl Into) -> Self { + pub fn with_type(mut self, mime_type: impl Into) -> Self { self.link_type = Some(mime_type.into()); self } @@ -83,10 +506,10 @@ impl Link { /// Person (author, contributor, etc.) #[derive(Debug, Clone, Default)] pub struct Person { - /// Person's name - pub name: Option, + /// Person's name (stored inline for names ≤24 bytes) + pub name: Option, /// Person's email address - pub email: Option, + pub email: Option, /// Person's URI/website pub uri: Option, } @@ -105,9 +528,9 @@ impl Person { /// assert!(person.uri.is_none()); /// ``` #[inline] - pub fn from_name(name: impl Into) -> Self { + pub fn from_name(name: impl AsRef) -> Self { Self { - name: Some(name.into()), + name: Some(name.as_ref().into()), email: None, uri: None, } @@ -117,20 +540,20 @@ impl Person { /// Tag/category #[derive(Debug, Clone)] pub struct Tag { - /// Tag term/label - pub term: String, - /// Tag scheme/domain - pub scheme: Option, - /// Human-readable tag label - pub label: Option, + /// Tag term/label (stored inline for terms ≤24 bytes) + pub term: SmallString, + /// Tag scheme/domain (stored inline for schemes ≤24 bytes) + pub scheme: Option, + /// Human-readable tag label (stored inline for labels ≤24 bytes) + pub label: Option, } impl Tag { /// Create a simple tag with just term #[inline] - pub fn new(term: impl Into) -> Self { + pub fn new(term: impl AsRef) -> Self { Self { - term: term.into(), + term: term.as_ref().into(), scheme: None, label: None, } @@ -141,7 +564,7 @@ impl Tag { #[derive(Debug, Clone)] pub struct Image { /// Image URL - pub url: String, + pub url: Url, /// Image title pub title: Option, /// Link associated with the image @@ -158,11 +581,11 @@ pub struct Image { #[derive(Debug, Clone)] pub struct Enclosure { /// Enclosure URL - pub url: String, + pub url: Url, /// File size in bytes pub length: Option, /// MIME type - pub enclosure_type: Option, + pub enclosure_type: Option, } /// Content block @@ -171,9 +594,9 @@ pub struct Content { /// Content body pub value: String, /// Content MIME type - pub content_type: Option, - /// Content language - pub language: Option, + pub content_type: Option, + /// Content language (stored inline for lang codes ≤24 bytes) + pub language: Option, /// Base URL for relative links pub base: Option, } @@ -184,7 +607,7 @@ impl Content { pub fn html(value: impl Into) -> Self { Self { value: value.into(), - content_type: Some("text/html".to_string()), + content_type: Some(MimeType::new(MimeType::TEXT_HTML)), language: None, base: None, } @@ -195,7 +618,7 @@ impl Content { pub fn plain(value: impl Into) -> Self { Self { value: value.into(), - content_type: Some("text/plain".to_string()), + content_type: Some(MimeType::new(MimeType::TEXT_PLAIN)), language: None, base: None, } @@ -220,8 +643,8 @@ pub struct TextConstruct { pub value: String, /// Content type pub content_type: TextType, - /// Content language - pub language: Option, + /// Content language (stored inline for lang codes ≤24 bytes) + pub language: Option, /// Base URL for relative links pub base: Option, } @@ -252,8 +675,8 @@ impl TextConstruct { /// Set language (builder pattern) #[inline] #[must_use] - pub fn with_language(mut self, language: impl Into) -> Self { - self.language = Some(language.into()); + pub fn with_language(mut self, language: impl AsRef) -> Self { + self.language = Some(language.as_ref().into()); self } } @@ -265,8 +688,8 @@ pub struct Generator { pub value: String, /// Generator URI pub uri: Option, - /// Generator version - pub version: Option, + /// Generator version (stored inline for versions ≤24 bytes) + pub version: Option, } /// Source reference (for entries) @@ -289,7 +712,7 @@ pub struct MediaThumbnail { /// /// This URL comes from untrusted feed input and has NOT been validated for SSRF. /// Applications MUST validate URLs before fetching to prevent SSRF attacks. - pub url: String, + pub url: Url, /// Thumbnail width in pixels pub width: Option, /// Thumbnail height in pixels @@ -305,9 +728,9 @@ pub struct MediaContent { /// /// This URL comes from untrusted feed input and has NOT been validated for SSRF. /// Applications MUST validate URLs before fetching to prevent SSRF attacks. - pub url: String, + pub url: Url, /// MIME type - pub content_type: Option, + pub content_type: Option, /// File size in bytes pub filesize: Option, /// Media width in pixels @@ -346,12 +769,14 @@ impl FromAttributes for Link { } href.map(|href| Self { - href, - rel: rel.or_else(|| Some("alternate".to_string())), - link_type, + href: Url::new(href), + rel: rel + .map(std::convert::Into::into) + .or_else(|| Some("alternate".into())), + link_type: link_type.map(MimeType::new), title, length, - hreflang, + hreflang: hreflang.map(std::convert::Into::into), }) } } @@ -379,9 +804,9 @@ impl FromAttributes for Tag { } term.map(|term| Self { - term, - scheme, - label, + term: term.into(), + scheme: scheme.map(std::convert::Into::into), + label: label.map(std::convert::Into::into), }) } } @@ -409,9 +834,9 @@ impl FromAttributes for Enclosure { } url.map(|url| Self { - url, + url: Url::new(url), length, - enclosure_type, + enclosure_type: enclosure_type.map(MimeType::new), }) } } @@ -438,7 +863,11 @@ impl FromAttributes for MediaThumbnail { } } - url.map(|url| Self { url, width, height }) + url.map(|url| Self { + url: Url::new(url), + width, + height, + }) } } @@ -471,8 +900,8 @@ impl FromAttributes for MediaContent { } url.map(|url| Self { - url, - content_type, + url: Url::new(url), + content_type: content_type.map(MimeType::new), filesize, width, height, @@ -489,7 +918,10 @@ impl ParseFrom<&Value> for Person { /// JSON Feed format: `{"name": "...", "url": "...", "avatar": "..."}` fn parse_from(json: &Value) -> Option { json.as_object().map(|obj| Self { - name: obj.get("name").and_then(Value::as_str).map(String::from), + name: obj + .get("name") + .and_then(Value::as_str) + .map(std::convert::Into::into), email: None, // JSON Feed doesn't have email field uri: obj.get("url").and_then(Value::as_str).map(String::from), }) @@ -504,12 +936,12 @@ impl ParseFrom<&Value> for Enclosure { let obj = json.as_object()?; let url = obj.get("url").and_then(Value::as_str)?; Some(Self { - url: url.to_string(), + url: Url::new(url), length: obj.get("size_in_bytes").and_then(Value::as_u64), enclosure_type: obj .get("mime_type") .and_then(Value::as_str) - .map(String::from), + .map(MimeType::new), }) } } @@ -536,10 +968,7 @@ mod tests { assert_eq!(link.rel.as_deref(), Some("self")); assert_eq!(link.link_type.as_deref(), Some("application/feed+json")); - let link = Link::enclosure( - "https://example.com/audio.mp3", - Some("audio/mpeg".to_string()), - ); + let link = Link::enclosure("https://example.com/audio.mp3", Some("audio/mpeg".into())); assert_eq!(link.rel.as_deref(), Some("enclosure")); assert_eq!(link.link_type.as_deref(), Some("audio/mpeg")); @@ -624,4 +1053,144 @@ mod tests { assert_eq!(TextType::Text, TextType::Text); assert_ne!(TextType::Text, TextType::Html); } + + // Newtype tests + + #[test] + fn test_url_new() { + let url = Url::new("https://example.com"); + assert_eq!(url.as_str(), "https://example.com"); + } + + #[test] + fn test_url_from_string() { + let url: Url = String::from("https://example.com").into(); + assert_eq!(url.as_str(), "https://example.com"); + } + + #[test] + fn test_url_from_str() { + let url: Url = "https://example.com".into(); + assert_eq!(url.as_str(), "https://example.com"); + } + + #[test] + fn test_url_deref() { + let url = Url::new("https://example.com"); + // Deref allows calling str methods directly + assert_eq!(url.len(), 19); + assert!(url.starts_with("https://")); + } + + #[test] + fn test_url_into_inner() { + let url = Url::new("https://example.com"); + let inner = url.into_inner(); + assert_eq!(inner, "https://example.com"); + } + + #[test] + fn test_url_default() { + let url = Url::default(); + assert_eq!(url.as_str(), ""); + } + + #[test] + fn test_url_clone() { + let url1 = Url::new("https://example.com"); + let url2 = url1.clone(); + assert_eq!(url1, url2); + } + + #[test] + fn test_mime_type_new() { + let mime = MimeType::new("text/html"); + assert_eq!(mime.as_str(), "text/html"); + } + + #[test] + fn test_mime_type_from_string() { + let mime: MimeType = String::from("application/json").into(); + assert_eq!(mime.as_str(), "application/json"); + } + + #[test] + fn test_mime_type_from_str() { + let mime: MimeType = "text/plain".into(); + assert_eq!(mime.as_str(), "text/plain"); + } + + #[test] + fn test_mime_type_deref() { + let mime = MimeType::new("text/html"); + assert_eq!(mime.len(), 9); + assert!(mime.starts_with("text/")); + } + + #[test] + fn test_mime_type_default() { + let mime = MimeType::default(); + assert_eq!(mime.as_str(), ""); + } + + #[test] + fn test_mime_type_clone() { + let mime1 = MimeType::new("application/xml"); + let mime2 = mime1.clone(); + assert_eq!(mime1, mime2); + // Arc cloning is cheap - just increments refcount + } + + #[test] + fn test_mime_type_constants() { + assert_eq!(MimeType::TEXT_HTML, "text/html"); + assert_eq!(MimeType::TEXT_PLAIN, "text/plain"); + assert_eq!(MimeType::APPLICATION_XML, "application/xml"); + assert_eq!(MimeType::APPLICATION_JSON, "application/json"); + } + + #[test] + fn test_email_new() { + let email = Email::new("user@example.com"); + assert_eq!(email.as_str(), "user@example.com"); + } + + #[test] + fn test_email_from_string() { + let email: Email = String::from("user@example.com").into(); + assert_eq!(email.as_str(), "user@example.com"); + } + + #[test] + fn test_email_from_str() { + let email: Email = "user@example.com".into(); + assert_eq!(email.as_str(), "user@example.com"); + } + + #[test] + fn test_email_deref() { + let email = Email::new("user@example.com"); + assert_eq!(email.len(), 16); + assert!(email.contains('@')); + } + + #[test] + fn test_email_into_inner() { + let email = Email::new("user@example.com"); + let inner = email.into_inner(); + assert_eq!(inner, "user@example.com"); + } + + #[test] + fn test_email_default() { + let email = Email::default(); + assert_eq!(email.as_str(), ""); + } + + #[test] + fn test_email_clone() { + let email1 = Email::new("user@example.com"); + let email2 = email1.clone(); + assert_eq!(email1, email2); + } } diff --git a/crates/feedparser-rs-core/src/types/entry.rs b/crates/feedparser-rs-core/src/types/entry.rs index cc1262d..c081f4b 100644 --- a/crates/feedparser-rs-core/src/types/entry.rs +++ b/crates/feedparser-rs-core/src/types/entry.rs @@ -10,8 +10,8 @@ use chrono::{DateTime, Utc}; /// Feed entry/item #[derive(Debug, Clone, Default)] pub struct Entry { - /// Unique entry identifier - pub id: Option, + /// Unique entry identifier (stored inline for IDs ≤24 bytes) + pub id: Option, /// Entry title pub title: Option, /// Detailed title with metadata @@ -34,16 +34,16 @@ pub struct Entry { pub created: Option>, /// Expiration date pub expired: Option>, - /// Primary author name - pub author: Option, + /// Primary author name (stored inline for names ≤24 bytes) + pub author: Option, /// Detailed author information pub author_detail: Option, /// All authors pub authors: Vec, /// Contributors pub contributors: Vec, - /// Publisher name - pub publisher: Option, + /// Publisher name (stored inline for names ≤24 bytes) + pub publisher: Option, /// Detailed publisher information pub publisher_detail: Option, /// Tags/categories @@ -55,9 +55,9 @@ pub struct Entry { /// Source feed reference pub source: Option, /// iTunes episode metadata (if present) - pub itunes: Option, - /// Dublin Core creator (author fallback) - pub dc_creator: Option, + pub itunes: Option>, + /// Dublin Core creator (author fallback) - stored inline for names ≤24 bytes + pub dc_creator: Option, /// Dublin Core date (publication date fallback) pub dc_date: Option>, /// Dublin Core subjects (tags) @@ -73,9 +73,9 @@ pub struct Entry { /// Podcast 2.0 persons for this episode (hosts, guests, etc.) pub podcast_persons: Vec, /// Podcast 2.0 episode metadata - pub podcast: Option, + pub podcast: Option>, /// `GeoRSS` location data - pub geo: Option, + pub geo: Option>, /// License URL (Creative Commons, etc.) pub license: Option, } @@ -209,8 +209,8 @@ impl Entry { } self.links.try_push_limited( Link { - href, - rel: Some("alternate".to_string()), + href: href.into(), + rel: Some("alternate".into()), ..Default::default() }, max_links, diff --git a/crates/feedparser-rs-core/src/types/feed.rs b/crates/feedparser-rs-core/src/types/feed.rs index 506f9d7..6f51af0 100644 --- a/crates/feedparser-rs-core/src/types/feed.rs +++ b/crates/feedparser-rs-core/src/types/feed.rs @@ -30,20 +30,20 @@ pub struct FeedMeta { pub updated: Option>, /// Initial publication date (RSS pubDate, Atom published) pub published: Option>, - /// Primary author name - pub author: Option, + /// Primary author name (stored inline for names ≤24 bytes) + pub author: Option, /// Detailed author information pub author_detail: Option, /// All authors pub authors: Vec, /// Contributors pub contributors: Vec, - /// Publisher name - pub publisher: Option, + /// Publisher name (stored inline for names ≤24 bytes) + pub publisher: Option, /// Detailed publisher information pub publisher_detail: Option, - /// Feed language (e.g., "en-us") - pub language: Option, + /// Feed language (e.g., "en-us") - stored inline as lang codes are ≤24 bytes + pub language: Option, /// Copyright/rights statement pub rights: Option, /// Detailed rights with metadata @@ -65,21 +65,21 @@ pub struct FeedMeta { /// Time-to-live (update frequency hint) in minutes pub ttl: Option, /// iTunes podcast metadata (if present) - pub itunes: Option, + pub itunes: Option>, /// Podcast 2.0 namespace metadata (if present) - pub podcast: Option, - /// Dublin Core creator (author fallback) - pub dc_creator: Option, - /// Dublin Core publisher - pub dc_publisher: Option, + pub podcast: Option>, + /// Dublin Core creator (author fallback) - stored inline for names ≤24 bytes + pub dc_creator: Option, + /// Dublin Core publisher (stored inline for names ≤24 bytes) + pub dc_publisher: Option, /// Dublin Core rights (copyright) pub dc_rights: Option, /// License URL (Creative Commons, etc.) pub license: Option, /// Syndication module metadata (RSS 1.0) - pub syndication: Option, + pub syndication: Option>, /// Geographic location from `GeoRSS` namespace (feed level) - pub geo: Option, + pub geo: Option>, } /// Parsed feed result @@ -395,8 +395,8 @@ impl FeedMeta { } self.links.try_push_limited( Link { - href, - rel: Some("alternate".to_string()), + href: href.into(), + rel: Some("alternate".into()), ..Default::default() }, max_links, diff --git a/crates/feedparser-rs-core/src/types/mod.rs b/crates/feedparser-rs-core/src/types/mod.rs index 6db5ac3..dbbc243 100644 --- a/crates/feedparser-rs-core/src/types/mod.rs +++ b/crates/feedparser-rs-core/src/types/mod.rs @@ -6,8 +6,8 @@ mod podcast; mod version; pub use common::{ - Content, Enclosure, Generator, Image, Link, MediaContent, MediaThumbnail, Person, Source, Tag, - TextConstruct, TextType, + Content, Email, Enclosure, Generator, Image, Link, MediaContent, MediaThumbnail, MimeType, + Person, SmallString, Source, Tag, TextConstruct, TextType, Url, }; pub use entry::Entry; pub use feed::{FeedMeta, ParsedFeed}; diff --git a/crates/feedparser-rs-core/src/types/podcast.rs b/crates/feedparser-rs-core/src/types/podcast.rs index cda42cd..a906312 100644 --- a/crates/feedparser-rs-core/src/types/podcast.rs +++ b/crates/feedparser-rs-core/src/types/podcast.rs @@ -1,3 +1,5 @@ +use super::common::{MimeType, Url}; + /// iTunes podcast metadata for feeds /// /// Contains podcast-level iTunes namespace metadata from the `itunes:` prefix. @@ -26,7 +28,7 @@ pub struct ItunesFeedMeta { /// Explicit content flag (itunes:explicit) pub explicit: Option, /// Podcast artwork URL (itunes:image href attribute) - pub image: Option, + pub image: Option, /// Search keywords (itunes:keywords) pub keywords: Vec, /// Podcast type: "episodic" or "serial" @@ -44,7 +46,7 @@ pub struct ItunesFeedMeta { /// /// This URL comes from untrusted feed input and has NOT been validated for SSRF. /// Applications MUST validate URLs before fetching to prevent SSRF attacks. - pub new_feed_url: Option, + pub new_feed_url: Option, } /// iTunes podcast metadata for episodes @@ -77,7 +79,7 @@ pub struct ItunesEntryMeta { /// Explicit content flag for this episode pub explicit: Option, /// Episode-specific artwork URL (itunes:image href) - pub image: Option, + pub image: Option, /// Episode number (itunes:episode) pub episode: Option, /// Season number (itunes:season) @@ -271,8 +273,8 @@ pub struct PodcastValueRecipient { /// use feedparser_rs::PodcastTranscript; /// /// let transcript = PodcastTranscript { -/// url: "https://example.com/transcript.txt".to_string(), -/// transcript_type: Some("text/plain".to_string()), +/// url: "https://example.com/transcript.txt".into(), +/// transcript_type: Some("text/plain".into()), /// language: Some("en".to_string()), /// rel: None, /// }; @@ -287,9 +289,9 @@ pub struct PodcastTranscript { /// /// This URL comes from untrusted feed input and has NOT been validated for SSRF. /// Applications MUST validate URLs before fetching to prevent SSRF attacks. - pub url: String, + pub url: Url, /// MIME type (type attribute): "text/plain", "text/html", "application/json", etc. - pub transcript_type: Option, + pub transcript_type: Option, /// Language code (language attribute): "en", "es", etc. pub language: Option, /// Relationship (rel attribute): "captions" or empty @@ -306,7 +308,7 @@ pub struct PodcastTranscript { /// use feedparser_rs::PodcastFunding; /// /// let funding = PodcastFunding { -/// url: "https://example.com/donate".to_string(), +/// url: "https://example.com/donate".into(), /// message: Some("Support our show!".to_string()), /// }; /// @@ -320,7 +322,7 @@ pub struct PodcastFunding { /// /// This URL comes from untrusted feed input and has NOT been validated for SSRF. /// Applications MUST validate URLs before fetching to prevent SSRF attacks. - pub url: String, + pub url: Url, /// Optional message/call-to-action (text content) pub message: Option, } @@ -338,8 +340,8 @@ pub struct PodcastFunding { /// name: "John Doe".to_string(), /// role: Some("host".to_string()), /// group: None, -/// img: Some("https://example.com/john.jpg".to_string()), -/// href: Some("https://example.com/john".to_string()), +/// img: Some("https://example.com/john.jpg".into()), +/// href: Some("https://example.com/john".into()), /// }; /// /// assert_eq!(host.name, "John Doe"); @@ -359,14 +361,14 @@ pub struct PodcastPerson { /// /// This URL comes from untrusted feed input and has NOT been validated for SSRF. /// Applications MUST validate URLs before fetching to prevent SSRF attacks. - pub img: Option, + pub img: Option, /// Personal URL/homepage (href attribute) /// /// # Security Warning /// /// This URL comes from untrusted feed input and has NOT been validated for SSRF. /// Applications MUST validate URLs before fetching to prevent SSRF attacks. - pub href: Option, + pub href: Option, } /// Podcast 2.0 chapters information @@ -380,8 +382,8 @@ pub struct PodcastPerson { /// use feedparser_rs::PodcastChapters; /// /// let chapters = PodcastChapters { -/// url: "https://example.com/chapters.json".to_string(), -/// type_: "application/json+chapters".to_string(), +/// url: "https://example.com/chapters.json".into(), +/// type_: "application/json+chapters".into(), /// }; /// /// assert_eq!(chapters.url, "https://example.com/chapters.json"); @@ -394,9 +396,9 @@ pub struct PodcastChapters { /// /// This URL comes from untrusted feed input and has NOT been validated for SSRF. /// Applications MUST validate URLs before fetching to prevent SSRF attacks. - pub url: String, + pub url: Url, /// MIME type (type attribute): "application/json+chapters" or "application/xml+chapters" - pub type_: String, + pub type_: MimeType, } /// Podcast 2.0 soundbite (shareable clip) @@ -688,8 +690,8 @@ mod tests { #[allow(clippy::redundant_clone)] fn test_podcast_transcript_clone() { let transcript = PodcastTranscript { - url: "https://example.com/transcript.txt".to_string(), - transcript_type: Some("text/plain".to_string()), + url: "https://example.com/transcript.txt".to_string().into(), + transcript_type: Some("text/plain".to_string().into()), language: Some("en".to_string()), rel: None, }; @@ -702,7 +704,7 @@ mod tests { #[allow(clippy::redundant_clone)] fn test_podcast_funding_clone() { let funding = PodcastFunding { - url: "https://example.com/donate".to_string(), + url: "https://example.com/donate".to_string().into(), message: Some("Support us!".to_string()), }; let cloned = funding.clone(); @@ -717,8 +719,8 @@ mod tests { name: "John Doe".to_string(), role: Some("host".to_string()), group: None, - img: Some("https://example.com/john.jpg".to_string()), - href: Some("https://example.com".to_string()), + img: Some("https://example.com/john.jpg".to_string().into()), + href: Some("https://example.com".to_string().into()), }; let cloned = person.clone(); assert_eq!(cloned.name, "John Doe"); @@ -736,8 +738,8 @@ mod tests { #[allow(clippy::redundant_clone)] fn test_podcast_chapters_clone() { let chapters = PodcastChapters { - url: "https://example.com/chapters.json".to_string(), - type_: "application/json+chapters".to_string(), + url: "https://example.com/chapters.json".to_string().into(), + type_: "application/json+chapters".to_string().into(), }; let cloned = chapters.clone(); assert_eq!(cloned.url, "https://example.com/chapters.json"); @@ -779,7 +781,7 @@ mod tests { fn test_itunes_feed_meta_new_fields() { let meta = ItunesFeedMeta { complete: Some(true), - new_feed_url: Some("https://example.com/new-feed.xml".to_string()), + new_feed_url: Some("https://example.com/new-feed.xml".to_string().into()), ..Default::default() }; diff --git a/crates/feedparser-rs-node/src/lib.rs b/crates/feedparser-rs-node/src/lib.rs index a2bd1fe..4a28102 100644 --- a/crates/feedparser-rs-node/src/lib.rs +++ b/crates/feedparser-rs-node/src/lib.rs @@ -389,13 +389,13 @@ impl From for FeedMeta { subtitle_detail: core.subtitle_detail.map(TextConstruct::from), updated: core.updated.map(|dt| dt.timestamp_millis()), published: core.published.map(|dt| dt.timestamp_millis()), - author: core.author, + author: core.author.map(|s| s.to_string()), author_detail: core.author_detail.map(Person::from), authors: core.authors.into_iter().map(Person::from).collect(), contributors: core.contributors.into_iter().map(Person::from).collect(), - publisher: core.publisher, + publisher: core.publisher.map(|s| s.to_string()), publisher_detail: core.publisher_detail.map(Person::from), - language: core.language, + language: core.language.map(|s| s.to_string()), rights: core.rights, rights_detail: core.rights_detail.map(TextConstruct::from), generator: core.generator, @@ -404,16 +404,16 @@ impl From for FeedMeta { icon: core.icon, logo: core.logo, tags: core.tags.into_iter().map(Tag::from).collect(), - id: core.id, + id: core.id.map(|s| s.to_string()), ttl: core.ttl, license: core.license, - syndication: core.syndication.map(SyndicationMeta::from), - dc_creator: core.dc_creator, - dc_publisher: core.dc_publisher, + syndication: core.syndication.map(|b| SyndicationMeta::from(*b)), + dc_creator: core.dc_creator.map(|s| s.to_string()), + dc_publisher: core.dc_publisher.map(|s| s.to_string()), dc_rights: core.dc_rights, - geo: core.geo.map(GeoLocation::from), - itunes: core.itunes.map(ItunesFeedMeta::from), - podcast: core.podcast.map(PodcastMeta::from), + geo: core.geo.map(|b| GeoLocation::from(*b)), + itunes: core.itunes.map(|b| ItunesFeedMeta::from(*b)), + podcast: core.podcast.map(|b| PodcastMeta::from(*b)), } } } @@ -500,7 +500,7 @@ pub struct Entry { impl From for Entry { fn from(core: CoreEntry) -> Self { Self { - id: core.id, + id: core.id.map(|s| s.to_string()), title: core.title, title_detail: core.title_detail.map(TextConstruct::from), link: core.link, @@ -512,11 +512,11 @@ impl From for Entry { updated: core.updated.map(|dt| dt.timestamp_millis()), created: core.created.map(|dt| dt.timestamp_millis()), expired: core.expired.map(|dt| dt.timestamp_millis()), - author: core.author, + author: core.author.map(|s| s.to_string()), author_detail: core.author_detail.map(Person::from), authors: core.authors.into_iter().map(Person::from).collect(), contributors: core.contributors.into_iter().map(Person::from).collect(), - publisher: core.publisher, + publisher: core.publisher.map(|s| s.to_string()), publisher_detail: core.publisher_detail.map(Person::from), tags: core.tags.into_iter().map(Tag::from).collect(), enclosures: core.enclosures.into_iter().map(Enclosure::from).collect(), @@ -533,8 +533,8 @@ impl From for Entry { .map(PodcastPerson::from) .collect(), license: core.license, - geo: core.geo.map(GeoLocation::from), - dc_creator: core.dc_creator, + geo: core.geo.map(|b| GeoLocation::from(*b)), + dc_creator: core.dc_creator.map(|s| s.to_string()), dc_date: core.dc_date.map(|dt| dt.timestamp_millis()), dc_subject: core.dc_subject, dc_rights: core.dc_rights, @@ -548,8 +548,8 @@ impl From for Entry { .into_iter() .map(MediaContent::from) .collect(), - itunes: core.itunes.map(ItunesEntryMeta::from), - podcast: core.podcast.map(PodcastEntryMeta::from), + itunes: core.itunes.map(|b| ItunesEntryMeta::from(*b)), + podcast: core.podcast.map(|b| PodcastEntryMeta::from(*b)), } } } @@ -577,7 +577,7 @@ impl From for TextConstruct { TextType::Html => "html".to_string(), TextType::Xhtml => "xhtml".to_string(), }, - language: core.language, + language: core.language.map(|s| s.to_string()), base: core.base, } } @@ -604,12 +604,12 @@ pub struct Link { impl From for Link { fn from(core: CoreLink) -> Self { Self { - href: core.href, - rel: core.rel, - link_type: core.link_type, + href: core.href.into_inner(), + rel: core.rel.map(|s| s.to_string()), + link_type: core.link_type.map(|t| t.to_string()), title: core.title, length: core.length.map(|l| i64::try_from(l).unwrap_or(i64::MAX)), - hreflang: core.hreflang, + hreflang: core.hreflang.map(|s| s.to_string()), } } } @@ -628,8 +628,8 @@ pub struct Person { impl From for Person { fn from(core: CorePerson) -> Self { Self { - name: core.name, - email: core.email, + name: core.name.map(|s| s.to_string()), + email: core.email.map(|e| e.into_inner()), uri: core.uri, } } @@ -649,9 +649,9 @@ pub struct Tag { impl From for Tag { fn from(core: CoreTag) -> Self { Self { - term: core.term, - scheme: core.scheme, - label: core.label, + term: core.term.to_string(), + scheme: core.scheme.map(|s| s.to_string()), + label: core.label.map(|s| s.to_string()), } } } @@ -676,7 +676,7 @@ pub struct Image { impl From for Image { fn from(core: CoreImage) -> Self { Self { - url: core.url, + url: core.url.into_inner(), title: core.title, link: core.link, width: core.width, @@ -701,9 +701,9 @@ pub struct Enclosure { impl From for Enclosure { fn from(core: CoreEnclosure) -> Self { Self { - url: core.url, + url: core.url.into_inner(), length: core.length.map(|l| i64::try_from(l).unwrap_or(i64::MAX)), - enclosure_type: core.enclosure_type, + enclosure_type: core.enclosure_type.map(|t| t.to_string()), } } } @@ -726,8 +726,8 @@ impl From for Content { fn from(core: CoreContent) -> Self { Self { value: core.value, - content_type: core.content_type, - language: core.language, + content_type: core.content_type.map(|t| t.to_string()), + language: core.language.map(|s| s.to_string()), base: core.base, } } @@ -749,7 +749,7 @@ impl From for Generator { Self { value: core.value, uri: core.uri, - version: core.version, + version: core.version.map(|s| s.to_string()), } } } @@ -770,7 +770,7 @@ impl From for Source { Self { title: core.title, link: core.link, - id: core.id, + id: core.id.map(|s| s.to_string()), } } } @@ -830,7 +830,7 @@ pub struct MediaThumbnail { impl From for MediaThumbnail { fn from(core: CoreMediaThumbnail) -> Self { Self { - url: core.url, + url: core.url.into_inner(), width: core.width, height: core.height, } @@ -860,8 +860,8 @@ pub struct MediaContent { impl From for MediaContent { fn from(core: CoreMediaContent) -> Self { Self { - url: core.url, - content_type: core.content_type, + url: core.url.into_inner(), + content_type: core.content_type.map(|t| t.to_string()), filesize: core.filesize.map(|f| i64::try_from(f).unwrap_or(i64::MAX)), width: core.width, height: core.height, @@ -902,7 +902,7 @@ pub struct ItunesFeedMeta { impl From for ItunesFeedMeta { fn from(core: CoreItunesFeedMeta) -> Self { Self { - author: core.author, + author: core.author.map(|s| s.to_string()), owner: core.owner.map(ItunesOwner::from), categories: core .categories @@ -910,11 +910,11 @@ impl From for ItunesFeedMeta { .map(ItunesCategory::from) .collect(), explicit: core.explicit, - image: core.image, + image: core.image.map(|u| u.into_inner()), keywords: core.keywords, podcast_type: core.podcast_type, complete: core.complete, - new_feed_url: core.new_feed_url, + new_feed_url: core.new_feed_url.map(|u| u.into_inner()), } } } @@ -931,7 +931,7 @@ pub struct ItunesOwner { impl From for ItunesOwner { fn from(core: CoreItunesOwner) -> Self { Self { - name: core.name, + name: core.name.map(|s| s.to_string()), email: core.email, } } @@ -985,10 +985,10 @@ impl From for ItunesEntryMeta { fn from(core: CoreItunesEntryMeta) -> Self { Self { title: core.title, - author: core.author, + author: core.author.map(|s| s.to_string()), duration: core.duration, explicit: core.explicit, - image: core.image, + image: core.image.map(|u| u.into_inner()), episode: core.episode, season: core.season, episode_type: core.episode_type, @@ -1075,7 +1075,7 @@ pub struct PodcastValueRecipient { impl From for PodcastValueRecipient { fn from(core: CorePodcastValueRecipient) -> Self { Self { - name: core.name, + name: core.name.map(|s| s.to_string()), recipient_type: core.type_, address: core.address, split: core.split, @@ -1098,7 +1098,7 @@ pub struct PodcastFunding { impl From for PodcastFunding { fn from(core: CorePodcastFunding) -> Self { Self { - url: core.url, + url: core.url.into_inner(), message: core.message, } } @@ -1151,8 +1151,8 @@ pub struct PodcastChapters { impl From for PodcastChapters { fn from(core: CorePodcastChapters) -> Self { Self { - url: core.url, - chapters_type: core.type_, + url: core.url.into_inner(), + chapters_type: core.type_.to_string(), } } } @@ -1198,10 +1198,10 @@ pub struct PodcastTranscript { impl From for PodcastTranscript { fn from(core: CorePodcastTranscript) -> Self { Self { - url: core.url, - transcript_type: core.transcript_type, - language: core.language, - rel: core.rel, + url: core.url.into_inner(), + transcript_type: core.transcript_type.map(|t| t.to_string()), + language: core.language.map(|s| s.to_string()), + rel: core.rel.map(|s| s.to_string()), } } } @@ -1231,8 +1231,8 @@ impl From for PodcastPerson { name: core.name, role: core.role, group: core.group, - img: core.img, - href: core.href, + img: core.img.map(|u| u.into_inner()), + href: core.href.map(|u| u.into_inner()), } } } diff --git a/crates/feedparser-rs-py/src/types/entry.rs b/crates/feedparser-rs-py/src/types/entry.rs index e95771f..c518de4 100644 --- a/crates/feedparser-rs-py/src/types/entry.rs +++ b/crates/feedparser-rs-py/src/types/entry.rs @@ -194,7 +194,7 @@ impl PyEntry { fn itunes(&self) -> Option { self.inner .itunes - .as_ref() + .as_deref() .map(|i| PyItunesEntryMeta::from_core(i.clone())) } @@ -239,7 +239,7 @@ impl PyEntry { fn geo(&self) -> Option { self.inner .geo - .as_ref() + .as_deref() .map(|g| PyGeoLocation::from_core(g.clone())) } @@ -290,7 +290,7 @@ impl PyEntry { fn podcast(&self) -> Option { self.inner .podcast - .as_ref() + .as_deref() .map(|p| PyPodcastEntryMeta::from_core(p.clone())) } diff --git a/crates/feedparser-rs-py/src/types/feed_meta.rs b/crates/feedparser-rs-py/src/types/feed_meta.rs index 4f34c59..fff1850 100644 --- a/crates/feedparser-rs-py/src/types/feed_meta.rs +++ b/crates/feedparser-rs-py/src/types/feed_meta.rs @@ -197,7 +197,7 @@ impl PyFeedMeta { fn itunes(&self) -> Option { self.inner .itunes - .as_ref() + .as_deref() .map(|i| PyItunesFeedMeta::from_core(i.clone())) } @@ -205,7 +205,7 @@ impl PyFeedMeta { fn podcast(&self) -> Option { self.inner .podcast - .as_ref() + .as_deref() .map(|p| PyPodcastMeta::from_core(p.clone())) } @@ -218,7 +218,7 @@ impl PyFeedMeta { fn syndication(&self) -> Option { self.inner .syndication - .as_ref() + .as_deref() .map(|s| PySyndicationMeta::from_core(s.clone())) } @@ -241,7 +241,7 @@ impl PyFeedMeta { fn geo(&self) -> Option { self.inner .geo - .as_ref() + .as_deref() .map(|g| PyGeoLocation::from_core(g.clone())) }