From d6d89a7065c5cbe89ec442406ec2837b8a8058af Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 4 Nov 2025 20:50:42 +0000 Subject: [PATCH 01/12] Move BaseWritingItem so that it is available in _Creator --- libzim/libzim.pyx | 124 +++++++++++++++++++++++----------------------- 1 file changed, 61 insertions(+), 63 deletions(-) diff --git a/libzim/libzim.pyx b/libzim/libzim.pyx index 4744829..d14a507 100644 --- a/libzim/libzim.pyx +++ b/libzim/libzim.pyx @@ -254,6 +254,67 @@ class Hint(enum.Enum): FRONT_ARTICLE = zim.HintKeys.FRONT_ARTICLE +class BaseWritingItem: + """ + Data to be added to the archive. + + This is a stub to override. Pass a subclass of it to `Creator.add_item()` + """ + __module__ = writer_module_name + + def __init__(self): + self._blob = None + get_indexdata = None + + def get_path(self) -> str: + """Full path of item. + + The path must be absolute and unique. + + Returns: + Path of the item. + """ + raise NotImplementedError("get_path must be implemented.") + + def get_title(self) -> str: + """Item title. Might be indexed and used in suggestions. + + Returns: + Title of the item. + """ + raise NotImplementedError("get_title must be implemented.") + + def get_mimetype(self) -> str: + """MIME-type of the item's content. + + Returns: + Mimetype of the item. + """ + raise NotImplementedError("get_mimetype must be implemented.") + + def get_contentprovider(self) -> ContentProvider: + """ContentProvider containing the complete content of the item. + + Returns: + The content provider of the item. + """ + raise NotImplementedError("get_contentprovider must be implemented.") + + def get_hints(self) -> Dict[Hint, pyint]: + """Get the Hints that help the Creator decide how to handle this item. + + Hints affects compression, presence in suggestion, random and search. + + Returns: + Hints to help the Creator decide how to handle this item. + """ + raise NotImplementedError("get_hints must be implemented.") + + def __repr__(self) -> str: + return ( + f"{self.__class__.__name__}(path={self.get_path()}, " + f"title={self.get_title()})" + ) cdef class _Creator: """ZIM Creator. @@ -644,69 +705,6 @@ class IndexData: return None -class BaseWritingItem: - """ - Data to be added to the archive. - - This is a stub to override. Pass a subclass of it to `Creator.add_item()` - """ - __module__ = writer_module_name - - def __init__(self): - self._blob = None - get_indexdata = None - - def get_path(self) -> str: - """Full path of item. - - The path must be absolute and unique. - - Returns: - Path of the item. - """ - raise NotImplementedError("get_path must be implemented.") - - def get_title(self) -> str: - """Item title. Might be indexed and used in suggestions. - - Returns: - Title of the item. - """ - raise NotImplementedError("get_title must be implemented.") - - def get_mimetype(self) -> str: - """MIME-type of the item's content. - - Returns: - Mimetype of the item. - """ - raise NotImplementedError("get_mimetype must be implemented.") - - def get_contentprovider(self) -> ContentProvider: - """ContentProvider containing the complete content of the item. - - Returns: - The content provider of the item. - """ - raise NotImplementedError("get_contentprovider must be implemented.") - - def get_hints(self) -> Dict[Hint, pyint]: - """Get the Hints that help the Creator decide how to handle this item. - - Hints affects compression, presence in suggestion, random and search. - - Returns: - Hints to help the Creator decide how to handle this item. - """ - raise NotImplementedError("get_hints must be implemented.") - - def __repr__(self) -> str: - return ( - f"{self.__class__.__name__}(path={self.get_path()}, " - f"title={self.get_title()})" - ) - - class Creator(_Creator): """Creator to create ZIM files.""" __module__ = writer_module_name From a84261205b0de4e8c01759d649e9479b6e2ea104 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 4 Nov 2025 20:51:06 +0000 Subject: [PATCH 02/12] Fix _Creator return types --- libzim/libzim.pyx | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/libzim/libzim.pyx b/libzim/libzim.pyx index d14a507..598e0ba 100644 --- a/libzim/libzim.pyx +++ b/libzim/libzim.pyx @@ -344,7 +344,7 @@ cdef class _Creator: def __init__(self, filename: pathlib.Path): pass - def config_verbose(self, bool verbose: bool) -> Creator: + def config_verbose(self, bool verbose: bool) -> _Creator: """Set creator verbosity inside libzim (default: off). Args: @@ -358,7 +358,7 @@ cdef class _Creator: self.c_creator.configVerbose(verbose) return self - def config_compression(self, compression: Compression) -> Creator: + def config_compression(self, compression: Compression) -> _Creator: """Set compression algorithm to use. Check libzim for default setting. (Fall 2021 default: zstd). @@ -374,7 +374,7 @@ cdef class _Creator: self.c_creator.configCompression(zim.comp_from_int(compression.value)) return self - def config_clustersize(self, int size: pyint) -> Creator: + def config_clustersize(self, int size: pyint) -> _Creator: """Set size of created clusters. Check libzim for default setting. (Fall 2021 default: 2Mib). @@ -393,7 +393,7 @@ cdef class _Creator: self.c_creator.configClusterSize(size) return self - def config_indexing(self, bool indexing: bool, str language: str) -> Creator: + def config_indexing(self, bool indexing: bool, str language: str) -> _Creator: """Configures the full-text indexing feature. Args: @@ -408,7 +408,7 @@ cdef class _Creator: self.c_creator.configIndexing(indexing, language.encode('UTF-8')) return self - def config_nbworkers(self, int nbWorkers: pyint) -> Creator: + def config_nbworkers(self, int nbWorkers: pyint) -> _Creator: """Configures the number of threads to use for internal workers (default: 4). Args: @@ -422,7 +422,7 @@ cdef class _Creator: self.c_creator.configNbWorkers(nbWorkers) return self - def set_mainpath(self, str mainPath: str) -> Creator: + def set_mainpath(self, str mainPath: str) -> _Creator: """Set path of the main entry. Args: @@ -449,7 +449,7 @@ cdef class _Creator: cdef string _content = content self.c_creator.addIllustration(size, _content) -# def set_uuid(self, uuid) -> Creator: +# def set_uuid(self, uuid) -> _Creator: # self.c_creator.setUuid(uuid) def add_item(self, writer_item not None: BaseWritingItem): From 7c10d1b7a07c164877b706e8a76f019458eb0266 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 4 Nov 2025 20:51:30 +0000 Subject: [PATCH 03/12] Fix nogil position, must be last --- libzim/zim.pxd | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libzim/zim.pxd b/libzim/zim.pxd index 3ab094e..bbd25f4 100644 --- a/libzim/zim.pxd +++ b/libzim/zim.pxd @@ -66,14 +66,14 @@ cdef extern from "zim/writer/creator.h" namespace "zim::writer": void configClusterSize(int size) void configIndexing(bint indexing, string language) void configNbWorkers(int nbWorkers) - void startZimCreation(string filepath) nogil except +; - void addItem(shared_ptr[WriterItem] item) nogil except + - void addMetadata(string name, string content, string mimetype) nogil except + - void addRedirection(string path, string title, string targetpath, map[HintKeys, uint64_t] hints) nogil except + + void startZimCreation(string filepath) except + nogil + void addItem(shared_ptr[WriterItem] item) except + nogil + void addMetadata(string name, string content, string mimetype) except + nogil + void addRedirection(string path, string title, string targetpath, map[HintKeys, uint64_t] hints) except + nogil void addAlias(string path, string title, string targetpath, map[HintKeys, uint64_t] hints) except + nogil - void finishZimCreation() nogil except + + void finishZimCreation() except + nogil void setMainPath(string mainPath) - void addIllustration(unsigned int size, string content) nogil except + + void addIllustration(unsigned int size, string content) except + nogil cdef extern from "zim/search.h" namespace "zim": cdef cppclass Query: From 80f37629f749e88398aeafb3a511eeb16511c2ef Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 4 Nov 2025 20:51:50 +0000 Subject: [PATCH 04/12] Use nwe format for license in pyproject.toml --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 65a9712..efb0d99 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ authors = [ {name = "openZIM", email = "dev@kiwix.org"}, ] readme = "README.md" -license = {text = "GPL-3.0-or-later"} +license = "GPL-3.0-or-later" classifiers = [ "Development Status :: 5 - Production/Stable", "Topic :: Utilities", @@ -36,7 +36,6 @@ classifiers = [ "Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3.14", "Typing :: Stubs Only", - "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", "Operating System :: MacOS", "Operating System :: POSIX", ] From 9c2aaec18cfb74bbd0f1b5181d8336111a94ae58 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 4 Nov 2025 20:54:35 +0000 Subject: [PATCH 05/12] Move ContentProvider --- libzim/libzim.pyx | 87 ++++++++++++++++++++++++----------------------- 1 file changed, 44 insertions(+), 43 deletions(-) diff --git a/libzim/libzim.pyx b/libzim/libzim.pyx index 598e0ba..e2ae38a 100644 --- a/libzim/libzim.pyx +++ b/libzim/libzim.pyx @@ -254,6 +254,50 @@ class Hint(enum.Enum): FRONT_ARTICLE = zim.HintKeys.FRONT_ARTICLE +class ContentProvider: + """ABC in charge of providing the content to add in the archive to the Creator.""" + __module__ = writer_module_name + def __init__(self): + self.generator = None + + def get_size(self) -> pyint: + """Size of `get_data`'s result in bytes. + + Returns: + int: The size of the data in bytes. + """ + raise NotImplementedError("get_size must be implemented.") + + def feed(self) -> WritingBlob: + """Blob(s) containing the complete content of the article. + + Must return an empty blob to tell writer no more content has to be written. + Sum(size(blobs)) must be equals to `self.get_size()` + + Returns: + WritingBlob: The content blob(s) of the article. + """ + if self.generator is None: + self.generator = self.gen_blob() + + try: + # We have to keep a ref to _blob to be sure gc do not del it while cpp is + # using it + self._blob = next(self.generator) + except StopIteration: + self._blob = WritingBlob("") + + return self._blob + + def gen_blob(self) -> Generator[WritingBlob, None, None]: + """Generator yielding blobs for the content of the article. + + Yields: + WritingBlob: A blob containing part of the article content. + """ + raise NotImplementedError("gen_blob (ro feed) must be implemented") + + class BaseWritingItem: """ Data to be added to the archive. @@ -564,49 +608,6 @@ cdef class _Creator: """ return self._filename -class ContentProvider: - """ABC in charge of providing the content to add in the archive to the Creator.""" - __module__ = writer_module_name - def __init__(self): - self.generator = None - - def get_size(self) -> pyint: - """Size of `get_data`'s result in bytes. - - Returns: - int: The size of the data in bytes. - """ - raise NotImplementedError("get_size must be implemented.") - - def feed(self) -> WritingBlob: - """Blob(s) containing the complete content of the article. - - Must return an empty blob to tell writer no more content has to be written. - Sum(size(blobs)) must be equals to `self.get_size()` - - Returns: - WritingBlob: The content blob(s) of the article. - """ - if self.generator is None: - self.generator = self.gen_blob() - - try: - # We have to keep a ref to _blob to be sure gc do not del it while cpp is - # using it - self._blob = next(self.generator) - except StopIteration: - self._blob = WritingBlob("") - - return self._blob - - def gen_blob(self) -> Generator[WritingBlob, None, None]: - """Generator yielding blobs for the content of the article. - - Yields: - WritingBlob: A blob containing part of the article content. - """ - raise NotImplementedError("gen_blob (ro feed) must be implemented") - class StringProvider(ContentProvider): """ContentProvider for a single encoded-or-not UTF-8 string.""" From dbfc2f9ed05bc7c9243f0d212091d5d04785e223 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 4 Nov 2025 20:55:17 +0000 Subject: [PATCH 06/12] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index eb9c05c..d0bfc2d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add support for free-threaded CPython (3.13 and 3.14) - Upgrade Github CI Actions - Run tests on minimum supported platforms + more recent stable ones +- Fix various Cython warning and deprecation notices (#239) ## [3.7.0] - 2025-04-18 From 1cc9e8780f824703bb1f7e1866193f005503a82f Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 4 Nov 2025 07:39:22 +0000 Subject: [PATCH 07/12] Upgrade to libzim 9.4.0 --- .github/workflows/CI-wheels.yaml | 2 +- .github/workflows/Publish.yaml | 2 +- .github/workflows/QA.yaml | 2 +- .github/workflows/Tests.yaml | 2 +- setup.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/CI-wheels.yaml b/.github/workflows/CI-wheels.yaml index 79a8703..e6e5b4a 100644 --- a/.github/workflows/CI-wheels.yaml +++ b/.github/workflows/CI-wheels.yaml @@ -7,7 +7,7 @@ on: - main env: - LIBZIM_DL_VERSION: "9.3.0-1" + LIBZIM_DL_VERSION: "9.4.0" MACOSX_DEPLOYMENT_TARGET: "13.0" CIBW_ENVIRONMENT_PASS_LINUX: "LIBZIM_DL_VERSION" CIBW_BUILD_VERBOSITY: "3" diff --git a/.github/workflows/Publish.yaml b/.github/workflows/Publish.yaml index a692ab2..49a7a15 100644 --- a/.github/workflows/Publish.yaml +++ b/.github/workflows/Publish.yaml @@ -6,7 +6,7 @@ on: - published env: - LIBZIM_DL_VERSION: "9.3.0-1" + LIBZIM_DL_VERSION: "9.4.0" MACOSX_DEPLOYMENT_TARGET: "13.0" CIBW_ENVIRONMENT_PASS_LINUX: "LIBZIM_DL_VERSION" # APPLE_SIGNING_KEYCHAIN_PATH set in prepare keychain step diff --git a/.github/workflows/QA.yaml b/.github/workflows/QA.yaml index ea14c54..51e763c 100644 --- a/.github/workflows/QA.yaml +++ b/.github/workflows/QA.yaml @@ -2,7 +2,7 @@ name: QA on: [push] env: - LIBZIM_DL_VERSION: "9.3.0-1" + LIBZIM_DL_VERSION: "9.4.0" MACOSX_DEPLOYMENT_TARGET: "13.0" jobs: diff --git a/.github/workflows/Tests.yaml b/.github/workflows/Tests.yaml index f1a84f2..de1bf82 100644 --- a/.github/workflows/Tests.yaml +++ b/.github/workflows/Tests.yaml @@ -2,7 +2,7 @@ name: Tests on: [push] env: - LIBZIM_DL_VERSION: "9.3.0-1" + LIBZIM_DL_VERSION: "9.4.0" MACOSX_DEPLOYMENT_TARGET: "13.0" # we want cython traces for coverage PROFILE: "1" diff --git a/setup.py b/setup.py index 32f2ef3..276214d 100755 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ class Config: - libzim_dl_version: str = os.getenv("LIBZIM_DL_VERSION", "9.3.0-1") + libzim_dl_version: str = os.getenv("LIBZIM_DL_VERSION", "9.4.0") use_system_libzim: bool = bool(os.getenv("USE_SYSTEM_LIBZIM") or False) download_libzim: bool = not bool(os.getenv("DONT_DOWNLOAD_LIBZIM") or False) From 6a54e2f6531fab01ede4fdbfb2ddaaf8f0a218b1 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 4 Nov 2025 07:41:31 +0000 Subject: [PATCH 08/12] Deprecate get_illustration_sizes() method --- libzim/libzim.pyx | 10 ++++++++++ tests/test_libzim_creator.py | 3 ++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/libzim/libzim.pyx b/libzim/libzim.pyx index e2ae38a..ead364a 100644 --- a/libzim/libzim.pyx +++ b/libzim/libzim.pyx @@ -39,6 +39,7 @@ import os import pathlib import sys import traceback +import warnings from collections import OrderedDict from types import ModuleType from typing import Dict, Generator, Iterator, List, Optional, Set, TextIO, Tuple, Union @@ -1304,9 +1305,18 @@ cdef class Archive: def get_illustration_sizes(self) -> Set[pyint]: """Sizes for which an illustration is available (@1 scale only). + .. deprecated:: 3.8.0 + Use :meth:`get_illustration_infos` instead for full illustration metadata + including width, height, and scale information. + Returns: The set of available sizes of the illustration. """ + warnings.warn( + "get_illustration_sizes() is deprecated, use get_illustration_infos() instead", + DeprecationWarning, + stacklevel=2 + ) return self.c_archive.getIllustrationSizes() def has_illustration(self, size: pyint = None) -> pybool: diff --git a/tests/test_libzim_creator.py b/tests/test_libzim_creator.py index 3bc627f..ef00932 100644 --- a/tests/test_libzim_creator.py +++ b/tests/test_libzim_creator.py @@ -369,7 +369,8 @@ def test_creator_illustration(fpath, favicon_data): assert zim.has_illustration(128) is False assert bytes(zim.get_illustration_item().content) == favicon_data assert bytes(zim.get_illustration_item(96).content) == favicon_data - assert zim.get_illustration_sizes() == {48, 96} + with pytest.warns(DeprecationWarning, match="get_illustration_sizes.*deprecated"): + assert zim.get_illustration_sizes() == {48, 96} def test_creator_additem(fpath, lipsum_item): From 75c4e87cb12711483a948db01f7f930371198e17 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 4 Nov 2025 08:22:26 +0000 Subject: [PATCH 09/12] Adopt libzim changes around cache management --- CHANGELOG.md | 5 +++ libzim/libwrapper.h | 5 --- libzim/libzim.pyx | 84 +++++++++++++------------------------ libzim/reader.pyi | 14 ++----- libzim/zim.pxd | 10 ++--- tests/test_libzim_reader.py | 58 +++++++++---------------- 6 files changed, 63 insertions(+), 113 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d0bfc2d..44f8d1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Upgrade Github CI Actions - Run tests on minimum supported platforms + more recent stable ones - Fix various Cython warning and deprecation notices (#239) +- libzim 9.4.0 Cache Control API + - remove `Archive.dirent_lookup_cache_max_size`, does not exists anymore in libzim + - move `Archive.cluster_cache_max_size` and `Archive.cluster_cache_current_size` to methods outside of `Archive` object: `get_cluster_cache_max_size`, `set_cluster_cache_max_size`, `get_cluster_cache_current_size` (size is now in bytes) + - deprecate usage of `get_illustration_sizes()` + ## [3.7.0] - 2025-04-18 diff --git a/libzim/libwrapper.h b/libzim/libwrapper.h index 7da1b47..c66eba6 100644 --- a/libzim/libwrapper.h +++ b/libzim/libwrapper.h @@ -172,14 +172,9 @@ class Archive : public Wrapper FORWARD(bool, hasTitleIndex) FORWARD(bool, hasChecksum) FORWARD(bool, check) - FORWARD(zim::size_type, getClusterCacheMaxSize) - FORWARD(zim::size_type, getClusterCacheCurrentSize) - FORWARD(void, setClusterCacheMaxSize) FORWARD(zim::size_type, getDirentCacheMaxSize) FORWARD(zim::size_type, getDirentCacheCurrentSize) FORWARD(void, setDirentCacheMaxSize) - FORWARD(zim::size_type, getDirentLookupCacheMaxSize) - FORWARD(void, setDirentLookupCacheMaxSize) }; class SearchResultSet : public Wrapper diff --git a/libzim/libzim.pyx b/libzim/libzim.pyx index ead364a..db63e4c 100644 --- a/libzim/libzim.pyx +++ b/libzim/libzim.pyx @@ -1342,37 +1342,6 @@ cdef class Archive: except RuntimeError as e: raise KeyError(str(e)) - @property - def cluster_cache_max_size(self) -> pyint: - """Maximum size of the cluster cache. - - Returns: - (int): maximum number of clusters stored in the cache. - """ - return self.c_archive.getClusterCacheMaxSize() - - @cluster_cache_max_size.setter - def cluster_cache_max_size(self, nb_clusters: pyint): - """Set the size of the cluster cache. - - If the new size is lower than the number of currently stored clusters - some clusters will be dropped from cache to respect the new size. - - Args: - nb_clusters (int): maximum number of clusters stored in the cache - """ - - self.c_archive.setClusterCacheMaxSize(nb_clusters) - - @property - def cluster_cache_current_size(self) -> pyint: - """Size of the cluster cache. - - Returns: - (int): number of clusters currently stored in the cache. - """ - return self.c_archive.getClusterCacheCurrentSize() - @property def dirent_cache_max_size(self) -> pyint: """Maximum size of the dirent cache. @@ -1403,36 +1372,38 @@ cdef class Archive: """ return self.c_archive.getDirentCacheCurrentSize() - @property - def dirent_lookup_cache_max_size(self) -> pyint: - """Size of the dirent lookup cache. + def __repr__(self) -> str: + return f"{self.__class__.__name__}(filename={self.filename})" - The returned size returns the default size or the last set size. - This may not correspond to the actual size of the dirent lookup cache. - See set_dirent_lookup_cache_max_size for more information. - Returns: - (int): maximum number of sub ranges created in the lookup cache. - """ - return self.c_archive.getDirentLookupCacheMaxSize() +def get_cluster_cache_max_size() -> pyint: + """Get the maximum size of the cluster cache. - @dirent_lookup_cache_max_size.setter - def dirent_lookup_cache_max_size(self, nb_ranges: pyint): - """Set the size of the dirent lookup cache. + Returns: + (int): the maximum memory size used by the cluster cache (in bytes). + """ + return zim.getClusterCacheMaxSize() - Contrary to other set__cache_max_size, this method is useless - once the lookup cache is created. - The lookup cache is created at first access to a entry in the archive. - So this method must be called before any access to content (including metadata). - It is best to call this method first, just after the archive creation. +def set_cluster_cache_max_size(size_in_bytes: pyint): + """Set the size of the cluster cache. - Args: - nb_ranges (int): maximum number of sub ranges created in the lookup cache. - """ - self.c_archive.setDirentLookupCacheMaxSize(nb_ranges) + If the new size is lower than the number of currently stored clusters + some clusters will be dropped from cache to respect the new size. + + Args: + size_in_bytes (int): the memory limit (in bytes) for the cluster cache. + """ + + zim.setClusterCacheMaxSize(size_in_bytes) + +def get_cluster_cache_current_size() -> pyint: + """Get the current size of the cluster cache. + + Returns: + (int): the current memory size (in bytes) used by the cluster cache. + """ + return zim.getClusterCacheCurrentSize() - def __repr__(self) -> str: - return f"{self.__class__.__name__}(filename={self.filename})" reader_module_doc = """libzim reader module @@ -1452,6 +1423,9 @@ reader_public_objects = [ Archive, Entry, Item, + get_cluster_cache_max_size, + set_cluster_cache_max_size, + get_cluster_cache_current_size, ] reader = create_module(reader_module_name, reader_module_doc, reader_public_objects) diff --git a/libzim/reader.pyi b/libzim/reader.pyi index e0f1cb5..143e099 100644 --- a/libzim/reader.pyi +++ b/libzim/reader.pyi @@ -78,19 +78,13 @@ class Archive: def has_illustration(self, size: int | None = None) -> bool: ... def get_illustration_item(self, size: int | None = None) -> Item: ... @property - def cluster_cache_max_size(self) -> int: ... - @cluster_cache_max_size.setter - def cluster_cache_max_size(self, nb_clusters: int): ... - @property - def cluster_cache_current_size(self) -> int: ... - @property def dirent_cache_max_size(self) -> int: ... @dirent_cache_max_size.setter def dirent_cache_max_size(self, nb_dirents: int): ... @property def dirent_cache_current_size(self) -> int: ... - @property - def dirent_lookup_cache_max_size(self) -> int: ... - @dirent_lookup_cache_max_size.setter - def dirent_lookup_cache_max_size(self, nb_ranges: int): ... def __repr__(self) -> str: ... + +def get_cluster_cache_max_size() -> int: ... +def set_cluster_cache_max_size(nb_clusters: int): ... +def get_cluster_cache_current_size() -> int: ... diff --git a/libzim/zim.pxd b/libzim/zim.pxd index bbd25f4..691f0e9 100644 --- a/libzim/zim.pxd +++ b/libzim/zim.pxd @@ -178,14 +178,9 @@ cdef extern from "libwrapper.h" namespace "wrapper": bool hasChecksum() except + bool check() except + - uint64_t getClusterCacheMaxSize() except + - uint64_t getClusterCacheCurrentSize() except + - void setClusterCacheMaxSize(uint64_t nbClusters) except + uint64_t getDirentCacheMaxSize() except + uint64_t getDirentCacheCurrentSize() except + void setDirentCacheMaxSize(uint64_t nbDirents) except + - uint64_t getDirentLookupCacheMaxSize() except + - void setDirentLookupCacheMaxSize(uint64_t nbRanges) except + cdef cppclass Searcher: Searcher() @@ -233,3 +228,8 @@ cdef extern from "libwrapper.h" namespace "wrapper": cdef extern from "zim/version.h" namespace "zim": cdef vector[pair[string, string]] getVersions() + +cdef extern from "zim/archive.h" namespace "zim": + cdef uint64_t getClusterCacheMaxSize() except + + cdef uint64_t getClusterCacheCurrentSize() except + + cdef void setClusterCacheMaxSize(uint64_t sizeInB) except + \ No newline at end of file diff --git a/tests/test_libzim_reader.py b/tests/test_libzim_reader.py index 2e0ca28..e8e0bd3 100644 --- a/tests/test_libzim_reader.py +++ b/tests/test_libzim_reader.py @@ -9,7 +9,13 @@ import pytest import libzim.writer # pyright: ignore [reportMissingModuleSource] -from libzim.reader import Archive, Entry # pyright: ignore [reportMissingModuleSource] +from libzim.reader import ( # pyright: ignore [reportMissingModuleSource] + Archive, + Entry, + get_cluster_cache_current_size, + get_cluster_cache_max_size, + set_cluster_cache_max_size, +) from libzim.search import Query, Searcher # pyright: ignore [reportMissingModuleSource] from libzim.suggestion import ( # pyright: ignore [reportMissingModuleSource] SuggestionSearcher, @@ -621,28 +627,25 @@ def test_reader_get_random_entry(all_zims): @pytest.mark.parametrize(*parametrize_for(["filename"])) def test_cluster_cache(all_zims, filename): zim = Archive(all_zims / filename) - default_value = 16 - new_value = 1 - empty_value = 0 + default_value = 536870912 # 512M + new_value = 1024 - assert zim.cluster_cache_max_size == default_value + assert get_cluster_cache_max_size() == default_value - zim.cluster_cache_max_size = new_value - assert zim.cluster_cache_max_size == new_value + # modify cluster cache max size + set_cluster_cache_max_size(new_value) + assert get_cluster_cache_max_size() == new_value # test index access for index in range(0, zim.entry_count - 1): bytes(zim._get_entry_by_id(index).get_item().content) - assert zim.cluster_cache_current_size <= new_value - - zim.cluster_cache_max_size = empty_value - assert zim.cluster_cache_max_size == empty_value - - for index in range(0, zim.entry_count - 1): - bytes(zim._get_entry_by_id(index).get_item().content) + # check current size is not too big (not really relevant since cache keeps at least + # one cluster in memory, so this value depends on maximum cluster size + assert get_cluster_cache_current_size() <= new_value - assert zim.cluster_cache_current_size == empty_value + # restore default value for next tests + set_cluster_cache_max_size(default_value) @skip_if_offline @@ -671,26 +674,5 @@ def test_dirent_cache(all_zims, filename): for index in range(0, zim.entry_count - 1): bytes(zim._get_entry_by_id(index).get_item().content) - assert zim.dirent_cache_current_size == empty_value - - -@skip_if_offline -@pytest.mark.parametrize(*parametrize_for(["filename"])) -def test_dirent_lookup_cache(all_zims, filename): - zim = Archive(all_zims / filename) - default_value = 1024 - new_value = 2 - empty_value = 0 - - assert zim.dirent_lookup_cache_max_size == default_value - - zim.dirent_lookup_cache_max_size = new_value - assert zim.dirent_lookup_cache_max_size == new_value - - # test index access - for index in range(0, zim.entry_count - 1): - bytes(zim._get_entry_by_id(index).get_item().content) - - # setting after reading records the value but it has no use - zim.dirent_lookup_cache_max_size = empty_value - assert zim.dirent_lookup_cache_max_size == empty_value + # always at least one entry is kept in cache unless ZIM is empty + assert zim.dirent_cache_current_size == (1 if zim.entry_count else 0) From 1efd0f9b66d1769d6851951516fcb5ac5db2691a Mon Sep 17 00:00:00 2001 From: rgaudin Date: Thu, 6 Nov 2025 11:26:51 +0000 Subject: [PATCH 10/12] [DEV] only build manylinux for arm64 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index efb0d99..e45a8a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -115,7 +115,7 @@ libzim = [ ] [tool.cibuildwheel] -build = "*" +build = "cp31*-manylinux_aarch64" # disabling PyPy due to 2 failing tests skip = "pp* *win32*" From dd9c45b2033dce839f080dba4f7cecc41f977899 Mon Sep 17 00:00:00 2001 From: rgaudin Date: Thu, 6 Nov 2025 11:33:02 +0000 Subject: [PATCH 11/12] [DEV] hardcoding test URL --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index 276214d..ffc79a3 100755 --- a/setup.py +++ b/setup.py @@ -253,6 +253,8 @@ def _download_and_extract(self, filename: str) -> pathlib.Path: elif self.is_nightly: source_url = f"http://download.openzim.org/nightly/{self.libzim_dl_version}" url = f"{source_url}/{fpath.name}" + fpath = Path("libzim_linux-aarch64-manylinux-2025-11-05.tar.gz") + url = "https://tmp.kiwix.org/ci/dev_preview/manylinux/libzim_linux-aarch64-manylinux-2025-11-05.tar.gz" # download a local copy if none present if not fpath.exists(): From 290b3068b95af3a2610c8bbb2fb4b362345c97ed Mon Sep 17 00:00:00 2001 From: rgaudin Date: Thu, 6 Nov 2025 11:37:07 +0000 Subject: [PATCH 12/12] [DEV] cant skip something not selected --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e45a8a1..4cfb1f8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -116,8 +116,7 @@ libzim = [ [tool.cibuildwheel] build = "cp31*-manylinux_aarch64" -# disabling PyPy due to 2 failing tests -skip = "pp* *win32*" + test-requires = ["pytest"] test-command = "py.test {project}/tests/"