From d51364b75d9995fd2429734901c3d7ebad82cd14 Mon Sep 17 00:00:00 2001 From: Michael Weiss Date: Wed, 14 Jan 2026 21:19:21 +0100 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Add=20encoding=20parameter=20to=20w?= =?UTF-8?q?rite=5Ffile()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: claell <26320273+claell@users.noreply.github.com> --- bibtexparser/entrypoint.py | 6 ++- tests/test_entrypoint.py | 83 +++++++++++++++++++++++++++++++++++++- 2 files changed, 86 insertions(+), 3 deletions(-) diff --git a/bibtexparser/entrypoint.py b/bibtexparser/entrypoint.py index d136558..3024784 100644 --- a/bibtexparser/entrypoint.py +++ b/bibtexparser/entrypoint.py @@ -139,6 +139,7 @@ def write_file( unparse_stack: Optional[Iterable[Middleware]] = None, prepend_middleware: Optional[Iterable[Middleware]] = None, bibtex_format: Optional[BibtexFormat] = None, + encoding: str = "UTF-8", ) -> None: """Write a BibTeX database to a file. @@ -148,7 +149,8 @@ def write_file( If None, a default stack will be used. :param prepend_middleware: List of middleware to prepend to the default stack. Only applicable if `unparse_stack` is None. - :param bibtex_format: Customized BibTeX format to use (optional).""" + :param bibtex_format: Customized BibTeX format to use (optional). + :param encoding: Encoding of the .bib file. Default encoding is ``"UTF-8"``.""" bibtex_str = write_string( library=library, unparse_stack=unparse_stack, @@ -156,7 +158,7 @@ def write_file( bibtex_format=bibtex_format, ) if isinstance(file, str): - with open(file, "w") as f: + with open(file, "w", encoding=encoding) as f: f.write(bibtex_str) else: file.write(bibtex_str) diff --git a/tests/test_entrypoint.py b/tests/test_entrypoint.py index 95fac0a..d2bb129 100644 --- a/tests/test_entrypoint.py +++ b/tests/test_entrypoint.py @@ -1,6 +1,13 @@ -"""Testing the parse_file function.""" +"""Testing the parse_file and write_file functions.""" + +import os +import tempfile from bibtexparser import parse_file +from bibtexparser import write_file +from bibtexparser.library import Library +from bibtexparser.model import Entry +from bibtexparser.model import Field def test_gbk(): @@ -9,3 +16,77 @@ def test_gbk(): assert library.entries[0]["title"] == "Test Title" assert library.entries[0]["year"] == "2013" assert library.entries[0]["journal"] == "测试期刊" + + +def test_write_file_default_encoding(): + """Test write_file uses UTF-8 by default.""" + entry = Entry( + entry_type="article", + key="test2024", + fields=[ + Field(key="author", value="Müller"), + Field(key="title", value="Ångström measurements"), + ], + ) + library = Library([entry]) + + with tempfile.NamedTemporaryFile(mode="w", suffix=".bib", delete=False) as f: + temp_path = f.name + + try: + write_file(temp_path, library) + # Read back and verify + with open(temp_path, encoding="UTF-8") as f: + content = f.read() + assert "Müller" in content + assert "Ångström" in content + finally: + os.unlink(temp_path) + + +def test_write_file_gbk_encoding(): + """Test write_file with GBK encoding for Chinese characters.""" + entry = Entry( + entry_type="article", + key="test2024", + fields=[ + Field(key="author", value="凯撒"), + Field(key="title", value="Test Title"), + Field(key="journal", value="测试期刊"), + ], + ) + library = Library([entry]) + + with tempfile.NamedTemporaryFile(mode="w", suffix=".bib", delete=False) as f: + temp_path = f.name + + try: + write_file(temp_path, library, encoding="gbk") + # Read back with GBK and verify + with open(temp_path, encoding="gbk") as f: + content = f.read() + assert "凯撒" in content + assert "测试期刊" in content + finally: + os.unlink(temp_path) + + +def test_write_file_roundtrip_gbk(): + """Test round-trip: parse GBK file, write with GBK, parse again.""" + # Parse original GBK file + library = parse_file("tests/resources/gbk_test.bib", encoding="gbk") + original_author = library.entries[0]["author"] + original_journal = library.entries[0]["journal"] + + with tempfile.NamedTemporaryFile(mode="w", suffix=".bib", delete=False) as f: + temp_path = f.name + + try: + # Write with GBK encoding + write_file(temp_path, library, encoding="gbk") + # Parse back + library2 = parse_file(temp_path, encoding="gbk") + assert library2.entries[0]["author"] == original_author + assert library2.entries[0]["journal"] == original_journal + finally: + os.unlink(temp_path)