diff --git a/mkdocs_rss_plugin/config.py b/mkdocs_rss_plugin/config.py index bc37a95..78449b1 100644 --- a/mkdocs_rss_plugin/config.py +++ b/mkdocs_rss_plugin/config.py @@ -30,6 +30,8 @@ class _DateFromMeta(Config): class _FeedsFilenamesConfig(Config): """Sub configuration for feeds filenames.""" + atom_created = config_options.Type(str, default="feed_atom_created.xml") + atom_updated = config_options.Type(str, default="feed_atom_updated.xml") json_created = config_options.Type(str, default="feed_json_created.json") json_updated = config_options.Type(str, default="feed_json_updated.json") rss_created = config_options.Type(str, default="feed_rss_created.xml") @@ -41,6 +43,7 @@ class RssPluginConfig(Config): abstract_chars_count = config_options.Type(int, default=160) abstract_delimiter = config_options.Type(str, default="") + atom_feed_enabled = config_options.Type(bool, default=True) categories = config_options.Optional( config_options.ListOfItems(config_options.Type(str)) ) diff --git a/mkdocs_rss_plugin/models.py b/mkdocs_rss_plugin/models.py index 2b458e5..62dfd35 100644 --- a/mkdocs_rss_plugin/models.py +++ b/mkdocs_rss_plugin/models.py @@ -62,6 +62,7 @@ class PageInformation: created: datetime | None = None description: str | None = None guid: str | None = None + html_content: str | None = None image: tuple[str, str, int] | None = None link: str | None = None pub_date: str | None = None @@ -77,6 +78,7 @@ class PageInformation: class RssFeedBase: """Object describing a feed.""" + atom_url: str | None = None author: str | None = None buildDate: str | None = None copyright: str | None = None diff --git a/mkdocs_rss_plugin/plugin.py b/mkdocs_rss_plugin/plugin.py index 8383b80..0c5842c 100644 --- a/mkdocs_rss_plugin/plugin.py +++ b/mkdocs_rss_plugin/plugin.py @@ -110,7 +110,13 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig: return config # Fail if any export option is enabled - if not any([self.config.json_feed_enabled, self.config.rss_feed_enabled]): + if not any( + [ + self.config.atom_feed_enabled, + self.config.json_feed_enabled, + self.config.rss_feed_enabled, + ] + ): logger.error( "At least one export option has to be enabled. Plugin is disabled." ) @@ -237,11 +243,11 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig: # final feed url if base_feed.html_url: # concatenate both URLs - self.feed_created.rss_url = ( - base_feed.html_url + self.config.feeds_filenames.rss_created + self.feed_created.atom_url = ( + base_feed.html_url + self.config.feeds_filenames.atom_created ) - self.feed_updated.rss_url = ( - base_feed.html_url + self.config.feeds_filenames.rss_updated + self.feed_updated.atom_url = ( + base_feed.html_url + self.config.feeds_filenames.atom_updated ) self.feed_created.json_url = ( base_feed.html_url + self.config.feeds_filenames.json_created @@ -249,6 +255,12 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig: self.feed_updated.json_url = ( base_feed.html_url + self.config.feeds_filenames.json_updated ) + self.feed_created.rss_url = ( + base_feed.html_url + self.config.feeds_filenames.rss_created + ) + self.feed_updated.rss_url = ( + base_feed.html_url + self.config.feeds_filenames.rss_updated + ) else: logger.error( "The variable `site_url` is not set in the MkDocs " @@ -257,7 +269,9 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig: ) self.feed_created.rss_url = self.feed_updated.json_url = ( self.feed_updated.rss_url - ) = self.feed_updated.json_url = None + ) = self.feed_updated.json_url = self.feed_created.atom_url = ( + self.feed_updated.atom_url + ) = None # ending event return config @@ -323,6 +337,11 @@ def on_page_content( else: page_url_comments = None + # Store full HTML content if needed for Atom feed + page_content: str | None = None + if self.config.abstract_chars_count == -1: + page_content = html + # append to list to be filtered later self.pages_to_filter.append( PageInformation( @@ -332,6 +351,7 @@ def on_page_content( in_page=page, categories_labels=self.config.categories ), comments_url=page_url_comments, + html_content=page_content, created=page_dates[0], description=self.util.get_description_or_abstract( in_page=page, @@ -362,13 +382,13 @@ def on_post_build(self, config: config_options.Config) -> None: return # pretty print or not - pretty_print = self.config.pretty_print + pretty_print: bool = self.config.pretty_print # output filepaths - out_feed_created = Path(config.site_dir).joinpath( + out_rss_created = Path(config.site_dir).joinpath( self.config.feeds_filenames.rss_created ) - out_feed_updated = Path(config.site_dir).joinpath( + out_rss_updated = Path(config.site_dir).joinpath( self.config.feeds_filenames.rss_updated ) out_json_created = Path(config.site_dir).joinpath( @@ -377,6 +397,12 @@ def on_post_build(self, config: config_options.Config) -> None: out_json_updated = Path(config.site_dir).joinpath( self.config.feeds_filenames.json_updated ) + out_atom_created: Path = Path(config.site_dir).joinpath( + self.config.feeds_filenames.atom_created + ) + out_atom_updated: Path = Path(config.site_dir).joinpath( + self.config.feeds_filenames.atom_updated + ) # created items self.feed_created.entries.extend( @@ -435,7 +461,7 @@ def on_post_build(self, config: config_options.Config) -> None: page.pub_date = format_datetime(dt=page.created) # write file - with out_feed_created.open(mode="w", encoding="UTF8") as fifeed_created: + with out_rss_created.open(mode="w", encoding="UTF8") as fifeed_created: if pretty_print: fifeed_created.write(template.render(feed=self.feed_created)) else: @@ -457,7 +483,7 @@ def on_post_build(self, config: config_options.Config) -> None: page.pub_date = format_datetime(dt=page.updated) # write file - with out_feed_updated.open(mode="w", encoding="UTF8") as fifeed_updated: + with out_rss_updated.open(mode="w", encoding="UTF8") as fifeed_updated: if pretty_print: fifeed_updated.write(template.render(feed=self.feed_updated)) else: @@ -487,3 +513,88 @@ def on_post_build(self, config: config_options.Config) -> None: fp, indent=4 if self.config.pretty_print else None, ) + + # ATOM FEED + if self.config.atom_feed_enabled: + # Jinja environment depending on the pretty print option + if pretty_print: + env = Environment( + autoescape=select_autoescape(["html", "xml"]), + loader=FileSystemLoader(self.tpl_folder), + ) + else: + env = Environment( + autoescape=select_autoescape(["html", "xml"]), + loader=FileSystemLoader(self.tpl_folder), + lstrip_blocks=True, + trim_blocks=True, + ) + + template = env.get_template("atom.xml.jinja2") + + # -- Feed sorted by creation date + logger.debug( + "Fill creation dates and dump created feed into Atom template." + ) + + # Format dates for Atom (ISO 8601) + for page in self.feed_created.entries: + page.atom_published = page.created.isoformat() + page.atom_updated = page.updated.isoformat() + + # Format feed buildDate for Atom (ISO 8601) + build_date_atom: str = datetime.fromtimestamp( + get_build_timestamp() + ).isoformat() + + # Temporarily store the ISO format + original_build_date: str = self.feed_created.buildDate + self.feed_created.buildDate = build_date_atom + + # write file + with out_atom_created.open(mode="w", encoding="UTF8") as fiatom_created: + if pretty_print: + fiatom_created.write(template.render(feed=self.feed_created)) + else: + prev_char = "" + for char in template.render(feed=asdict(self.feed_created)): + if char == "\n": + # convert new lines to spaces to preserve sentence structure + char = " " + if char == " " and prev_char == " ": + prev_char = char + continue + prev_char = char + fiatom_created.write(char) + + # Restore original buildDate + self.feed_created.buildDate = original_build_date + + # -- Feed sorted by update date + logger.debug("Fill update dates and dump updated feed into Atom template.") + + for page in self.feed_updated.entries: + page.atom_published = page.created.isoformat() + page.atom_updated = page.updated.isoformat() + + original_build_date = self.feed_updated.buildDate + self.feed_updated.buildDate = build_date_atom + + # write file + with out_atom_updated.open(mode="w", encoding="UTF8") as fiatom_updated: + if pretty_print: + fiatom_updated.write(template.render(feed=self.feed_updated)) + else: + prev_char = "" + for char in template.render(feed=asdict(self.feed_updated)): + if char == "\n": + # convert new lines to spaces to preserve sentence structure + char = " " + if char == " " and prev_char == " ": + prev_char = char + continue + prev_char = char + fiatom_updated.write(char) + + # Restore original buildDate + self.feed_updated.buildDate = original_build_date diff --git a/mkdocs_rss_plugin/templates/atom.xml.jinja2 b/mkdocs_rss_plugin/templates/atom.xml.jinja2 new file mode 100644 index 0000000..6af3dea --- /dev/null +++ b/mkdocs_rss_plugin/templates/atom.xml.jinja2 @@ -0,0 +1,67 @@ + + + {# Mandatory elements #} + {% if feed.title is not none %}{{ feed.title|e }}{% endif %} + {% if feed.html_url is not none %}{% endif %} + {% if feed.atom_url is not none %}{% endif %} + {% if feed.atom_url is not none %}{{ feed.atom_url }}{% endif %} + + {# Optional elements #} + {% if feed.description is not none %}{{ feed.description|e }}{% endif %} + {% if feed.author is not none %} + + {{ feed.author|e }} + + {% endif %} + + {# Timestamps #} + {{ feed.buildDate }} + + {# Credits #} + {{ feed.generator }} + + {# Feed illustration #} + {% if feed.logo_url is defined %} + {{ feed.logo_url }} + {% endif %} + + {# Entries #} + {% for item in feed.entries %} + + {{ item.title|e }} + {% if item.link is not none %}{% endif %} + {% if item.guid is not none %}{{ item.guid }}{% endif %} + {{ item.atom_updated }} + {{ item.atom_published }} + + {# Authors loop #} + {% if item.authors is not none %} + {% for author in item.authors %} + + {{ author }} + + {% endfor %} + {% endif %} + + {# Categories loop #} + {% if item.categories is not none %} + {% for category in item.categories %} + + {% endfor %} + {% endif %} + + {{ item.description|e }} + + {# Full content if available - only include if not empty #} + {% if item.content is not none and item.content|trim != "" %} + {{ item.content|e }} + {% endif %} + + + {# Image enclosure #} + {% if item.image is not none %} + + {% endif %} + + {% endfor %} + diff --git a/mkdocs_rss_plugin/util.py b/mkdocs_rss_plugin/util.py index 332a769..f8be15c 100644 --- a/mkdocs_rss_plugin/util.py +++ b/mkdocs_rss_plugin/util.py @@ -219,6 +219,18 @@ def get_file_dates( tuple[datetime, datetime]: tuple of timestamps (creation date, last commit date) """ logger.debug(f"Extracting dates for {in_page.file.src_uri}") + + # handle temporary pages (e.g. archive pages of blog plugin of Material for Mkdocs) + if in_page.file.abs_src_path.startswith("/tmp"): + logger.debug( + f"Temporary page detected ({in_page.file.abs_src_path}). " + "Falling back to build date for both creation and update dates." + ) + return ( + get_build_datetime(), + get_build_datetime(), + ) + # empty vars dt_created = dt_updated = None if meta_default_time is None: @@ -619,6 +631,7 @@ def get_image( if img_local_cache_path := self.social_cards.get_social_card_cache_path_for_page( mkdocs_page=in_page ): + print("HA", img_local_cache_path, img_url) img_length = img_local_cache_path.stat().st_size img_type = guess_type(url=img_local_cache_path, strict=False)[0] elif img_local_build_path := self.social_cards.get_social_card_build_path_for_page( diff --git a/tests/fixtures/mkdocs_atom_disabled.yml b/tests/fixtures/mkdocs_atom_disabled.yml new file mode 100644 index 0000000..6fca392 --- /dev/null +++ b/tests/fixtures/mkdocs_atom_disabled.yml @@ -0,0 +1,10 @@ +site_name: Test RSS Plugin +site_description: Test with Atom disabled +site_url: https://guts.github.io/mkdocs-rss-plugin + +plugins: + - rss: + atom_feed_enabled: false + +theme: + name: mkdocs diff --git a/tests/mkdocs_custom_atom_filenames.yml b/tests/mkdocs_custom_atom_filenames.yml new file mode 100644 index 0000000..d787764 --- /dev/null +++ b/tests/mkdocs_custom_atom_filenames.yml @@ -0,0 +1,12 @@ +site_name: Test RSS Plugin with custom Atom filenames +site_description: Test Atom with custom filenames +site_url: https://guts.github.io/mkdocs-rss-plugin + +plugins: + - rss: + feeds_filenames: + atom_created: atom.xml + atom_updated: atom-updated.xml + +theme: + name: mkdocs diff --git a/tests/test_atom.py b/tests/test_atom.py new file mode 100644 index 0000000..07630a6 --- /dev/null +++ b/tests/test_atom.py @@ -0,0 +1,288 @@ +#! python3 # noqa E265 + +"""Usage from the repo root folder: + +.. code-block:: python + + # for whole test + python -m unittest tests.test_atom + +""" + +# ############################################################################# +# ########## Libraries ############# +# ################################## + +# Standard library +import logging +import tempfile +import unittest +from pathlib import Path +from traceback import format_exception + +# 3rd party +import feedparser + +# test suite +from tests.base import BaseTest + +# -- Globals -- +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + +OUTPUT_ATOM_FEED_CREATED = "feed_atom_created.xml" +OUTPUT_ATOM_FEED_UPDATED = "feed_atom_updated.xml" + + +# ############################################################################# +# ########## Classes ############### +# ################################## + + +class TestBuildAtom(BaseTest): + """Test MkDocs build with Atom feed generation.""" + + # -- Standard methods -------------------------------------------------------- + @classmethod + def setUpClass(cls): + """Executed when module is loaded before any test.""" + cls.feed_image = "https://github.com/Guts/mkdocs-rss-plugin/blob/main/docs/assets/logo_rss_plugin_mkdocs.png?raw=true" + + def setUp(self): + """Executed before each test.""" + pass + + def tearDown(self): + """Executed after each test.""" + pass + + @classmethod + def tearDownClass(cls): + """Executed after the last test.""" + pass + + # -- TESTS --------------------------------------------------------- + def test_simple_build_atom(self): + """Test that Atom feeds are generated correctly.""" + with tempfile.TemporaryDirectory() as tmpdirname: + cli_result = self.build_docs_setup( + testproject_path="docs", + mkdocs_yml_filepath=Path("tests/fixtures/mkdocs_minimal.yml"), + output_path=tmpdirname, + strict=True, + ) + + if cli_result.exception is not None: + e = cli_result.exception + logger.debug(format_exception(type(e), e, e.__traceback__)) + + self.assertEqual(cli_result.exit_code, 0) + self.assertIsNone(cli_result.exception) + + # Check that Atom feeds were created + self.assertTrue( + Path(tmpdirname).joinpath(OUTPUT_ATOM_FEED_CREATED).exists() + ) + self.assertTrue( + Path(tmpdirname).joinpath(OUTPUT_ATOM_FEED_UPDATED).exists() + ) + + def test_atom_feed_validation(self): + """Test that generated Atom feeds are valid.""" + with tempfile.TemporaryDirectory() as tmpdirname: + cli_result = self.build_docs_setup( + testproject_path="docs", + mkdocs_yml_filepath=Path("tests/fixtures/mkdocs_complete.yml"), + output_path=tmpdirname, + ) + + if cli_result.exception is not None: + e = cli_result.exception + logger.debug(format_exception(type(e), e, e.__traceback__)) + + self.assertEqual(cli_result.exit_code, 0) + self.assertIsNone(cli_result.exception) + + # created items + feed_parsed = feedparser.parse(Path(tmpdirname) / OUTPUT_ATOM_FEED_CREATED) + self.assertEqual(feed_parsed.bozo, 0, "Feed should parse without errors") + self.assertEqual(feed_parsed.version, "atom10", "Should be Atom 1.0 feed") + + # updated items + feed_parsed = feedparser.parse(Path(tmpdirname) / OUTPUT_ATOM_FEED_UPDATED) + self.assertEqual(feed_parsed.bozo, 0, "Feed should parse without errors") + self.assertEqual(feed_parsed.version, "atom10", "Should be Atom 1.0 feed") + + def test_atom_feed_content(self): + """Test that Atom feed contains correct elements.""" + with tempfile.TemporaryDirectory() as tmpdirname: + cli_result = self.build_docs_setup( + testproject_path="docs", + mkdocs_yml_filepath=Path("tests/fixtures/mkdocs_complete.yml"), + output_path=tmpdirname, + ) + + if cli_result.exception is not None: + e = cli_result.exception + logger.debug(format_exception(type(e), e, e.__traceback__)) + + self.assertEqual(cli_result.exit_code, 0) + self.assertIsNone(cli_result.exception) + + # Parse the feed + feed_parsed = feedparser.parse(Path(tmpdirname) / OUTPUT_ATOM_FEED_CREATED) + + # Check feed-level elements + self.assertTrue(hasattr(feed_parsed.feed, "title")) + self.assertTrue(hasattr(feed_parsed.feed, "subtitle")) + self.assertTrue(hasattr(feed_parsed.feed, "updated")) + self.assertTrue(hasattr(feed_parsed.feed, "id")) + + # Check entries + for feed_item in feed_parsed.entries: + # Mandatory properties + self.assertTrue("title" in feed_item) + self.assertTrue("id" in feed_item) + self.assertTrue("updated" in feed_item) + self.assertTrue("published" in feed_item) + self.assertTrue("summary" in feed_item) + + # Links + self.assertTrue("links" in feed_item) + self.assertGreater(len(feed_item.links), 0) + + def test_atom_feed_dates_rfc3339(self): + """Test that Atom feed dates are in RFC 3339 format (ISO 8601 with timezone).""" + with tempfile.TemporaryDirectory() as tmpdirname: + cli_result = self.build_docs_setup( + testproject_path="docs", + mkdocs_yml_filepath=Path("tests/fixtures/mkdocs_complete.yml"), + output_path=tmpdirname, + ) + + if cli_result.exception is not None: + e = cli_result.exception + logger.debug(format_exception(type(e), e, e.__traceback__)) + + self.assertEqual(cli_result.exit_code, 0) + self.assertIsNone(cli_result.exception) + + # Parse the feed + feed_parsed = feedparser.parse(Path(tmpdirname) / OUTPUT_ATOM_FEED_CREATED) + + # Check feed updated date contains timezone + self.assertIn( + "+", + feed_parsed.feed.updated, + "Feed updated date should contain timezone offset", + ) + + # Check entry dates contain timezone + for feed_item in feed_parsed.entries: + self.assertIn( + "+", + feed_item.updated, + f"Entry '{feed_item.title}' updated date should contain timezone", + ) + self.assertIn( + "+", + feed_item.published, + f"Entry '{feed_item.title}' published date should contain timezone", + ) + + def test_atom_feed_disabled(self): + """Test that Atom feeds are not generated when disabled.""" + with tempfile.TemporaryDirectory() as tmpdirname: + cli_result = self.build_docs_setup( + testproject_path="docs", + mkdocs_yml_filepath=Path("tests/fixtures/mkdocs_atom_disabled.yml"), + output_path=tmpdirname, + ) + + if cli_result.exception is not None: + e = cli_result.exception + logger.debug(format_exception(type(e), e, e.__traceback__)) + + self.assertEqual(cli_result.exit_code, 0) + self.assertIsNone(cli_result.exception) + + # Check that Atom feeds were NOT created + self.assertFalse( + Path(tmpdirname).joinpath(OUTPUT_ATOM_FEED_CREATED).exists() + ) + self.assertFalse( + Path(tmpdirname).joinpath(OUTPUT_ATOM_FEED_UPDATED).exists() + ) + + def test_atom_feed_full_content(self): + """Test Atom feed with full content (abstract_chars_count: -1).""" + with tempfile.TemporaryDirectory() as tmpdirname: + cli_result = self.build_docs_setup( + testproject_path="docs", + mkdocs_yml_filepath=Path( + "tests/fixtures/mkdocs_item_length_unlimited.yml" + ), + output_path=tmpdirname, + ) + + if cli_result.exception is not None: + e = cli_result.exception + logger.debug(format_exception(type(e), e, e.__traceback__)) + + self.assertEqual(cli_result.exit_code, 0) + self.assertIsNone(cli_result.exception) + + # Parse the feed + feed_parsed = feedparser.parse(Path(tmpdirname) / OUTPUT_ATOM_FEED_CREATED) + + # Check that entries have content element + for feed_item in feed_parsed.entries: + if feed_item.title not in ( + "Page without meta with short text", + "Blog sample", + "Blog", + ): + # Should have content element with full HTML + self.assertTrue( + "content" in feed_item, + f"Entry '{feed_item.title}' should have content element", + ) + if "content" in feed_item: + self.assertGreater( + len(feed_item.content[0].value), + 150, + f"Entry '{feed_item.title}' content should be longer than summary", + ) + + def test_atom_feed_language(self): + """Test that Atom feed has xml:lang attribute.""" + with tempfile.TemporaryDirectory() as tmpdirname: + cli_result = self.build_docs_setup( + testproject_path="docs", + mkdocs_yml_filepath=Path( + "tests/fixtures/mkdocs_locale_with_territory.yml" + ), + output_path=tmpdirname, + ) + + if cli_result.exception is not None: + e = cli_result.exception + logger.debug(format_exception(type(e), e, e.__traceback__)) + + self.assertEqual(cli_result.exit_code, 0) + self.assertIsNone(cli_result.exception) + + # Read the raw XML to check for xml:lang attribute + atom_file = Path(tmpdirname) / OUTPUT_ATOM_FEED_CREATED + with atom_file.open("r", encoding="utf-8") as f: + xml_content = f.read() + + # Check that xml:lang attribute is present + self.assertIn('xml:lang="en-US"', xml_content) + + +# ############################################################################## +# ##### Stand alone program ######## +# ################################## +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_build.py b/tests/test_build.py index 5a58b74..30a6693 100644 --- a/tests/test_build.py +++ b/tests/test_build.py @@ -934,6 +934,38 @@ def test_xml_escaping_in_author(self): self.assertIn("OpenSavvy & contributors", feed_xml) self.assertNotIn("OpenSavvy & contributors", feed_xml) + def test_simple_build_custom_atom_filenames(self): + """Test build with custom Atom feed filenames.""" + config = self.get_plugin_config_from_mkdocs( + mkdocs_yml_filepath=Path("tests/fixtures/mkdocs_custom_atom_filenames.yml"), + plugin_name="rss", + ) + + with tempfile.TemporaryDirectory() as tmpdirname: + cli_result = self.build_docs_setup( + testproject_path="docs", + mkdocs_yml_filepath=Path( + "tests/fixtures/mkdocs_custom_atom_filenames.yml" + ), + output_path=tmpdirname, + strict=True, + ) + + if cli_result.exception is not None: + e = cli_result.exception + logger.debug(format_exception(type(e), e, e.__traceback__)) + + self.assertEqual(cli_result.exit_code, 0) + self.assertIsNone(cli_result.exception) + + # Check custom Atom filenames + self.assertTrue( + Path(tmpdirname).joinpath(config.feeds_filenames.atom_created).exists() + ) + self.assertTrue( + Path(tmpdirname).joinpath(config.feeds_filenames.atom_updated).exists() + ) + # ############################################################################## # ##### Stand alone program ######## diff --git a/tests/test_config.py b/tests/test_config.py index 17200ea..e889c1d 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -79,6 +79,7 @@ def test_plugin_config_defaults(self): "feed_ttl": 1440, "image": None, "json_feed_enabled": True, + "atom_feed_enabled": True, # Add this line "length": 20, "match_path": ".*", "feeds_filenames": { @@ -86,6 +87,8 @@ def test_plugin_config_defaults(self): "json_updated": "feed_json_updated.json", "rss_created": "feed_rss_created.xml", "rss_updated": "feed_rss_updated.xml", + "atom_created": "feed_atom_created.xml", # Add this line + "atom_updated": "feed_atom_updated.xml", # Add this line }, "pretty_print": False, "rss_feed_enabled": True,