Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 35 additions & 29 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ nursery = { level = "warn", priority = -1 }
module_name_repetitions = "allow"
must_use_candidate = "allow"
too_many_lines = "allow"
unwrap_used = "deny"
expect_used = "deny"
panic = "deny"

[profile.release]
lto = true
Expand Down
3 changes: 2 additions & 1 deletion crates/feedparser-rs-core/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#![cfg_attr(test, allow(clippy::unwrap_used, clippy::expect_used, clippy::panic))]

//! feedparser-rs-core: High-performance RSS/Atom/JSON Feed parser
//!
//! This crate provides a pure Rust implementation of feed parsing with
Expand All @@ -17,7 +19,6 @@
//! </rss>
//! "#;
//!
//! // Parsing will be fully implemented in Phase 2
//! let feed = parse(xml.as_bytes()).unwrap();
//! assert!(feed.bozo == false);
//! ```
Expand Down
131 changes: 91 additions & 40 deletions crates/feedparser-rs-core/src/parser/rss.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,9 @@ fn parse_channel(
feed.bozo_exception = Some(MALFORMED_ATTRIBUTES_ERROR.to_string());
}

// Extract xml:lang before matching to avoid borrow issues
let item_lang = extract_xml_lang(&e, limits.max_attribute_length);

// Use full qualified name to distinguish standard RSS tags from namespaced tags
match tag.as_slice() {
b"title" | b"link" | b"description" | b"language" | b"pubDate"
Expand All @@ -186,48 +189,21 @@ fn parse_channel(
}
}
b"item" => {
let item_lang = extract_xml_lang(&e, limits.max_attribute_length);

if !feed.check_entry_limit(reader, &mut buf, limits, depth)? {
continue;
}

let effective_lang = item_lang.as_deref().or(channel_lang);

match parse_item(reader, &mut buf, limits, depth, base_ctx, effective_lang)
{
Ok((entry, has_attr_errors)) => {
if has_attr_errors {
feed.bozo = true;
feed.bozo_exception =
Some(MALFORMED_ATTRIBUTES_ERROR.to_string());
}
feed.entries.push(entry);
}
Err(e) => {
feed.bozo = true;
feed.bozo_exception = Some(e.to_string());
}
}
parse_channel_item(
item_lang.as_deref(),
reader,
&mut buf,
feed,
limits,
depth,
base_ctx,
channel_lang,
)?;
}
_ => {
let mut handled = parse_channel_itunes(
parse_channel_extension(
reader, &mut buf, &tag, &attrs, feed, limits, depth,
)?;
if !handled {
handled = parse_channel_podcast(
reader, &mut buf, &tag, &attrs, feed, limits,
)?;
}
if !handled {
handled = parse_channel_namespace(
reader, &mut buf, &tag, feed, limits, *depth,
)?;
}

if !handled {
skip_element(reader, &mut buf, limits, *depth)?;
}
}
}
*depth = depth.saturating_sub(1);
Expand All @@ -245,6 +221,71 @@ fn parse_channel(
Ok(())
}

/// Parse <item> element within channel
///
/// Note: Uses 8 parameters instead of a context struct due to borrow checker constraints
/// with multiple simultaneous `&mut` references during parsing.
#[inline]
#[allow(clippy::too_many_arguments)]
fn parse_channel_item(
item_lang: Option<&str>,
reader: &mut Reader<&[u8]>,
buf: &mut Vec<u8>,
feed: &mut ParsedFeed,
limits: &ParserLimits,
depth: &mut usize,
base_ctx: &BaseUrlContext,
channel_lang: Option<&str>,
) -> Result<()> {
if !feed.check_entry_limit(reader, buf, limits, depth)? {
return Ok(());
}

let effective_lang = item_lang.or(channel_lang);

match parse_item(reader, buf, limits, depth, base_ctx, effective_lang) {
Ok((entry, has_attr_errors)) => {
if has_attr_errors {
feed.bozo = true;
feed.bozo_exception = Some(MALFORMED_ATTRIBUTES_ERROR.to_string());
}
feed.entries.push(entry);
}
Err(e) => {
feed.bozo = true;
feed.bozo_exception = Some(e.to_string());
}
}

Ok(())
}

/// Parse channel extension elements (iTunes, Podcast, namespaces)
#[inline]
fn parse_channel_extension(
reader: &mut Reader<&[u8]>,
buf: &mut Vec<u8>,
tag: &[u8],
attrs: &[(Vec<u8>, String)],
feed: &mut ParsedFeed,
limits: &ParserLimits,
depth: &mut usize,
) -> Result<()> {
let mut handled = parse_channel_itunes(reader, buf, tag, attrs, feed, limits, depth)?;
if !handled {
handled = parse_channel_podcast(reader, buf, tag, attrs, feed, limits)?;
}
if !handled {
handled = parse_channel_namespace(reader, buf, tag, feed, limits, *depth)?;
}

if !handled {
skip_element(reader, buf, limits, *depth)?;
}

Ok(())
}

/// Parse enclosure element from attributes
#[inline]
fn parse_enclosure(attrs: &[(Vec<u8>, String)], limits: &ParserLimits) -> Option<Enclosure> {
Expand Down Expand Up @@ -621,8 +662,9 @@ fn parse_item(
}
}
_ => {
let mut handled =
parse_item_itunes(reader, buf, &tag, &attrs, &mut entry, limits)?;
let mut handled = parse_item_itunes(
reader, buf, &tag, &attrs, &mut entry, limits, is_empty, *depth,
)?;
if !handled {
handled = parse_item_podcast(
reader, buf, &tag, &attrs, &mut entry, limits, is_empty, *depth,
Expand Down Expand Up @@ -729,14 +771,20 @@ fn parse_item_standard(
/// Parse iTunes namespace tags at item level
///
/// Returns `Ok(true)` if the tag was recognized and handled, `Ok(false)` if not recognized.
///
/// Note: Uses 8 parameters instead of a context struct due to borrow checker constraints
/// with multiple simultaneous `&mut` references during parsing.
#[inline]
#[allow(clippy::too_many_arguments)]
fn parse_item_itunes(
reader: &mut Reader<&[u8]>,
buf: &mut Vec<u8>,
tag: &[u8],
attrs: &[(Vec<u8>, String)],
entry: &mut Entry,
limits: &ParserLimits,
is_empty: bool,
depth: usize,
) -> Result<bool> {
if is_itunes_tag(tag, b"title") {
let text = read_text(reader, buf, limits)?;
Expand All @@ -763,6 +811,9 @@ fn parse_item_itunes(
let itunes = entry.itunes.get_or_insert_with(ItunesEntryMeta::default);
itunes.image = Some(truncate_to_length(value, limits.max_attribute_length));
}
if !is_empty {
skip_element(reader, buf, limits, depth)?;
}
Ok(true)
} else if is_itunes_tag(tag, b"episode") {
let text = read_text(reader, buf, limits)?;
Expand Down
Loading