From 09bd180c4c6862355db2e4517400a68aa9df0acc Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 31 Mar 2025 13:52:26 +0330 Subject: [PATCH 01/37] Update changelog --- CHANGELOG.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f78b95f..51cb48a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,15 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v5.0.0 - Unreleased +### Targets: +- Update `hashbrown` dependency +- Make 2x faster by changing my *`isize` to `u64` strategy* in Rust. +- Rewrite cache classes API in Python; this help users to use classes as subclass and customize them. +- Make benchmarks better +- Make error handlings better +- Make customizable and extensible: make your own strategies (If I found a good way) + ## 4.5.0 - 2025-01-31 ### Updated - `cached` and `cachedmethod` improved: From 7f389ca19d2edd0d7b1ef142ab49037d7f5fb190 Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 31 Mar 2025 15:03:04 +0330 Subject: [PATCH 02/37] Start rewriting --- Cargo.lock | 39 +- Cargo.toml | 36 +- Makefile | 40 +- cachebox/__init__.py | 45 - cachebox/_cachebox.pyi | 1300 ------------------------ cachebox/py.typed | 0 cachebox/utils.py | 520 ---------- pyproject.toml | 12 +- {tests => python/cachebox}/__init__.py | 0 src/bridge/baseimpl.rs | 41 - src/bridge/cache.rs | 484 --------- src/bridge/fifocache.rs | 499 --------- src/bridge/lfucache.rs | 514 ---------- src/bridge/lrucache.rs | 532 ---------- src/bridge/mod.rs | 10 - src/bridge/rrcache.rs | 492 --------- src/bridge/ttlcache.rs | 619 ----------- src/bridge/vttlcache.rs | 600 ----------- src/hashedkey.rs | 46 - src/internal/fifo.rs | 360 ------- src/internal/lfu.rs | 339 ------ src/internal/lru.rs | 284 ------ src/internal/mod.rs | 17 - src/internal/nopolicy.rs | 181 ---- src/internal/ttl.rs | 427 -------- src/internal/vttl.rs | 452 -------- src/lib.rs | 67 +- src/linked_list.rs | 193 ---- src/mutex.rs | 173 ---- src/sorted_heap.rs | 177 ---- src/util.rs | 253 ----- tests/mixin.py | 455 --------- tests/test_caches.py | 573 ----------- tests/test_concurrency.py | 108 -- tests/test_utils.py | 301 ------ 35 files changed, 76 insertions(+), 10113 deletions(-) delete mode 100644 cachebox/__init__.py delete mode 100644 cachebox/_cachebox.pyi delete mode 100644 cachebox/py.typed delete mode 100644 cachebox/utils.py rename {tests => python/cachebox}/__init__.py (100%) delete mode 100644 src/bridge/baseimpl.rs delete mode 100644 src/bridge/cache.rs delete mode 100644 src/bridge/fifocache.rs delete mode 100644 src/bridge/lfucache.rs delete mode 100644 src/bridge/lrucache.rs delete mode 100644 src/bridge/mod.rs delete mode 100644 src/bridge/rrcache.rs delete mode 100644 src/bridge/ttlcache.rs delete mode 100644 src/bridge/vttlcache.rs delete mode 100644 src/hashedkey.rs delete mode 100644 src/internal/fifo.rs delete mode 100644 src/internal/lfu.rs delete mode 100644 src/internal/lru.rs delete mode 100644 src/internal/mod.rs delete mode 100644 src/internal/nopolicy.rs delete mode 100644 src/internal/ttl.rs delete mode 100644 src/internal/vttl.rs delete mode 100644 src/linked_list.rs delete mode 100644 src/mutex.rs delete mode 100644 src/sorted_heap.rs delete mode 100644 src/util.rs delete mode 100644 tests/mixin.py delete mode 100644 tests/test_caches.py delete mode 100644 tests/test_concurrency.py delete mode 100644 tests/test_utils.py diff --git a/Cargo.lock b/Cargo.lock index 80778f7..f2632aa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -14,22 +14,14 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" -[[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - [[package]] name = "cachebox" -version = "4.5.3" +version = "5.0.0" dependencies = [ "cfg-if", "fastrand", - "fxhash", "hashbrown", - "lock_api", - "parking_lot_core", + "parking_lot", "pyo3", "pyo3-build-config", ] @@ -46,20 +38,11 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" -[[package]] -name = "fxhash" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" -dependencies = [ - "byteorder", -] - [[package]] name = "hashbrown" -version = "0.14.5" +version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" [[package]] name = "heck" @@ -100,9 +83,19 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.21.1" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "parking_lot" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d75b0bedcc4fe52caa0e03d9f1151a323e4aa5e2d78ba3580400cd3c9e2bc4bc" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] [[package]] name = "parking_lot_core" diff --git a/Cargo.toml b/Cargo.toml index 0abdd72..5d52ee4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cachebox" -version = "4.5.3" +version = "5.0.0" edition = "2021" description = "The fastest memoizing and caching Python library written in Rust" readme = "README.md" @@ -21,17 +21,29 @@ lto = "fat" panic = "abort" strip = "symbols" -[dependencies] -hashbrown = { version = "^0.14", default-features = false, features=["inline-more", "raw"]} -fastrand = "^2.3" -pyo3 = { version = "0.24.0", default-features = false, features=["macros", "extension-module"] } -cfg-if = "1.0" -parking_lot_core = { version = "^0.9", default-features = false } -lock_api = { version = "^0.4", default-features = false } -fxhash = {version = "^0.2"} - -[build-dependencies] -pyo3-build-config = { version = "0.24.0", features = ["resolve-config"] } +[dependencies.hashbrown] +version = "0.15.2" +default-features = false +features = ["inline-more"] + +[dependencies.fastrand] +version = "2.3.0" + +[dependencies.pyo3] +version = "0.24.0" +default-features = false +features = ["macros", "extension-module"] + +[dependencies.cfg-if] +version = "1.0.0" + +[dependencies.parking_lot] +version = "0.12.3" +default-features = false + +[build-dependencies.pyo3-build-config] +version = "0.24.0" +features = ["resolve-config"] [lints.clippy] dbg_macro = "warn" diff --git a/Makefile b/Makefile index 5d2e21b..e0412ac 100644 --- a/Makefile +++ b/Makefile @@ -1,29 +1,32 @@ -.DEFAULT_GOAL := all -export CARGO_TERM_COLOR=$(shell (test -t 0 && echo "always") || echo "auto") +help: + @echo "Commands:" + @echo -e "\tbuild-dev build source" + @echo -e "\tbuild-prod build source (release mode)" + @echo -e "\ttest-rs clippy and test rust code" + @echo -e "\ttest-py build and test python code" + @echo -e "\tformat format rust and python code" + @echo -e "\tclean clean all the unneeded files" .PHONY: build-dev build-dev: maturin develop - .PHONY: build-prod build-prod: maturin develop --release - -.PHONY: test-py -test-py: - maturin develop - RUST_BACKTRACE=1 pytest -vv - rm -rf .pytest_cache - ruff check . - ruff clean - - .PHONY: test-rs test-rs: cargo clippy + cargo test -- --nocapture +.PHONY: test-py +test-py: build-dev + coverage run -m pytest -s -vv + -rm -rf .pytest_cache + -ruff check . + ruff clean + coverage html .PHONY: format format: @@ -31,13 +34,12 @@ format: ruff clean cargo fmt - .PHONY: clean clean: -rm -rf `find . -name __pycache__` - -rm -rf cachebox/*.so + -rm -rf python/cachebox/*.so -rm -rf target/release - - -.PHONY: all -all: format test-rs test-py clean + -rm -rf .pytest_cache + -rm -rf .coverage + -rm -rf htmlcov + -ruff clean diff --git a/cachebox/__init__.py b/cachebox/__init__.py deleted file mode 100644 index 488bf16..0000000 --- a/cachebox/__init__.py +++ /dev/null @@ -1,45 +0,0 @@ -""" -The fastest caching library written in Rust. - -Example:: - - from cachebox import TTLCache - import time - - cache = TTLCache(1000, ttl=2) - cache[0] = 1 - time.sleep(2) - cache.get(0, None) # None -""" - -from ._cachebox import ( - BaseCacheImpl as BaseCacheImpl, - Cache as Cache, - FIFOCache as FIFOCache, - RRCache as RRCache, - TTLCache as TTLCache, - LRUCache as LRUCache, - LFUCache as LFUCache, - VTTLCache as VTTLCache, - cache_iterator as cache_iterator, - fifocache_iterator as fifocache_iterator, - ttlcache_iterator as ttlcache_iterator, - lrucache_iterator as lrucache_iterator, - lfucache_iterator as lfucache_iterator, - vttlcache_iterator as vttlcache_iterator, - __version__ as __version__, - __author__ as __author__, - version_info as version_info, -) - -from .utils import ( - Frozen as Frozen, - cached as cached, - cachedmethod as cachedmethod, - make_key as make_key, - make_hash_key as make_hash_key, - make_typed_key as make_typed_key, - EVENT_HIT as EVENT_HIT, - EVENT_MISS as EVENT_MISS, - is_cached as is_cached, -) diff --git a/cachebox/_cachebox.pyi b/cachebox/_cachebox.pyi deleted file mode 100644 index 80a9796..0000000 --- a/cachebox/_cachebox.pyi +++ /dev/null @@ -1,1300 +0,0 @@ -""" -cachebox core ( written in Rust ) -""" - -import typing - -__version__: str -__author__: str - -version_info: typing.Tuple[int, int, int, bool] -""" (major, minor, patch, is_beta) """ - -KT = typing.TypeVar("KT") -VT = typing.TypeVar("VT") -DT = typing.TypeVar("DT") - -class BaseCacheImpl(typing.Generic[KT, VT]): - """ - This is the base class of all cache classes such as Cache, FIFOCache, ... - - Do not try to call its constructor, this is only for type-hint. - """ - - def __init__( - self, - maxsize: int, - iterable: typing.Union[typing.Iterable[typing.Tuple[KT, VT]], typing.Dict[KT, VT]] = ..., - *, - capacity: int = ..., - ) -> None: ... - @staticmethod - def __class_getitem__(*args) -> None: ... - @property - def maxsize(self) -> int: ... - def _state(self) -> int: ... - def __len__(self) -> int: ... - def __sizeof__(self) -> int: ... - def __bool__(self) -> bool: ... - def __contains__(self, key: KT) -> bool: ... - def __setitem__(self, key: KT, value: VT) -> None: ... - def __getitem__(self, key: KT) -> VT: ... - def __delitem__(self, key: KT) -> VT: ... - def __str__(self) -> str: ... - def __iter__(self) -> typing.Iterator[KT]: ... - def __richcmp__(self, other, op: int) -> bool: ... - def __getstate__(self) -> object: ... - def __getnewargs__(self) -> tuple: ... - def __setstate__(self, state: object) -> None: ... - def capacity(self) -> int: ... - def is_full(self) -> bool: ... - def is_empty(self) -> bool: ... - def insert(self, key: KT, value: VT) -> typing.Optional[VT]: ... - def get(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: ... - def pop(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: ... - def setdefault( - self, key: KT, default: typing.Optional[DT] = None - ) -> typing.Optional[VT | DT]: ... - def popitem(self) -> typing.Tuple[KT, VT]: ... - def drain(self, n: int) -> int: ... - def clear(self, *, reuse: bool = False) -> None: ... - def shrink_to_fit(self) -> None: ... - def update( - self, iterable: typing.Union[typing.Iterable[typing.Tuple[KT, VT]], typing.Dict[KT, VT]] - ) -> None: ... - def keys(self) -> typing.Iterable[KT]: ... - def values(self) -> typing.Iterable[VT]: ... - def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: ... - -class Cache(BaseCacheImpl[KT, VT]): - """ - A simple cache that has no algorithm; this is only a hashmap. - - `Cache` vs `dict`: - - it is thread-safe and unordered, while `dict` isn't thread-safe and ordered (Python 3.6+). - - it uses very lower memory than `dict`. - - it supports useful and new methods for managing memory, while `dict` does not. - - it does not support `popitem`, while `dict` does. - - You can limit the size of `Cache`, but you cannot for `dict`. - """ - - def __init__( - self, - maxsize: int, - iterable: typing.Union[typing.Iterable[typing.Tuple[KT, VT]], typing.Dict[KT, VT]] = ..., - *, - capacity: int = ..., - ) -> None: - """ - A simple cache that has no algorithm; this is only a hashmap. - - :param maxsize: you can specify the limit size of the cache ( zero means infinity ); this is unchangable. - - :param iterable: you can create cache from a dict or an iterable. - - :param capacity: If `capacity` param is given, cache attempts to allocate a new hash table with at - least enough capacity for inserting the given number of elements without reallocating. - """ - ... - - def __setitem__(self, key: KT, value: VT) -> None: - """ - Set self[key] to value. - - Note: raises `OverflowError` if the cache reached the maxsize limit, - because this class does not have any algorithm. - """ - ... - - def __getitem__(self, key: KT) -> VT: - """ - Returns self[key]. - - Note: raises `KeyError` if key not found. - """ - ... - - def __delitem__(self, key: KT) -> VT: - """ - Deletes self[key]. - - Note: raises `KeyError` if key not found. - """ - ... - - def capacity(self) -> int: - """ - Returns the number of elements the map can hold without reallocating. - """ - ... - - def is_full(self) -> bool: - """ - Equivalent directly to `len(self) == self.maxsize` - """ - ... - - def is_empty(self) -> bool: - """ - Equivalent directly to `len(self) == 0` - """ - ... - - def insert(self, key: KT, value: VT) -> typing.Optional[VT]: - """ - Equals to `self[key] = value`, but returns a value: - - - If the cache did not have this key present, None is returned. - - If the cache did have this key present, the value is updated, - and the old value is returned. The key is not updated, though; - - Note: raises `OverflowError` if the cache reached the maxsize limit, - because this class does not have any algorithm. - """ - ... - - def get(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: - """ - Equals to `self[key]`, but returns `default` if the cache don't have this key present. - """ - ... - - def pop(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: - """ - Removes specified key and return the corresponding value. - - If the key is not found, returns the `default`. - """ - ... - - def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Optional[VT | DT]: - """ - Inserts key with a value of default if key is not in the cache. - - Return the value for key if key is in the cache, else default. - """ - ... - - def popitem(self) -> typing.NoReturn: ... # not implemented for this class - def drain(self, n: int) -> typing.NoReturn: ... # not implemented for this class - def clear(self, *, reuse: bool = False) -> None: - """ - Removes all items from cache. - - If reuse is True, will not free the memory for reusing in the future. - """ - ... - - def shrink_to_fit(self) -> None: - """ - Shrinks the cache to fit len(self) elements. - """ - ... - - def update(self, iterable: typing.Iterable[typing.Tuple[KT, VT]] | typing.Dict[KT, VT]) -> None: - """ - Updates the cache with elements from a dictionary or an iterable object of key/value pairs. - - Note: raises `OverflowError` if the cache reached the maxsize limit. - """ - ... - - def keys(self) -> typing.Iterable[KT]: - """ - Returns an iterable object of the cache's keys. - - Notes: - - You should not make any changes in cache while using this iterable object. - - Keys are not ordered. - """ - ... - - def values(self) -> typing.Iterable[VT]: - """ - Returns an iterable object of the cache's values. - - Notes: - - You should not make any changes in cache while using this iterable object. - - Values are not ordered. - """ - ... - - def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: - """ - Returns an iterable object of the cache's items (key-value pairs). - - Notes: - - You should not make any changes in cache while using this iterable object. - - Items are not ordered. - """ - ... - -class FIFOCache(BaseCacheImpl[KT, VT]): - """ - FIFO Cache implementation - First-In First-Out Policy (thread-safe). - - In simple terms, the FIFO cache will remove the element that has been in the cache the longest - """ - def __init__( - self, - maxsize: int, - iterable: typing.Union[typing.Iterable[typing.Tuple[KT, VT]], typing.Dict[KT, VT]] = ..., - *, - capacity: int = ..., - ) -> None: - """ - FIFO Cache implementation - First-In First-Out Policy (thread-safe). - - :param maxsize: you can specify the limit size of the cache ( zero means infinity ); this is unchangable. - - :param iterable: you can create cache from a dict or an iterable. - - :param capacity: If `capacity` param is given, cache attempts to allocate a new hash table with at - least enough capacity for inserting the given number of elements without reallocating. - """ - ... - - def __setitem__(self, key: KT, value: VT) -> None: - """ - Set self[key] to value. - """ - ... - - def __getitem__(self, key: KT) -> VT: - """ - Returns self[key]. - - Note: raises `KeyError` if key not found. - """ - ... - - def __delitem__(self, key: KT) -> VT: - """ - Deletes self[key]. - - Note: raises `KeyError` if key not found. - """ - ... - - def capacity(self) -> int: - """ - Returns the number of elements the map can hold without reallocating. - """ - ... - - def is_full(self) -> bool: - """ - Equivalent directly to `len(self) == self.maxsize` - """ - ... - - def is_empty(self) -> bool: - """ - Equivalent directly to `len(self) == 0` - """ - ... - - def insert(self, key: KT, value: VT) -> typing.Optional[VT]: - """ - Equals to `self[key] = value`, but returns a value: - - - If the cache did not have this key present, None is returned. - - If the cache did have this key present, the value is updated, - and the old value is returned. The key is not updated, though; - """ - ... - - def get(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: - """ - Equals to `self[key]`, but returns `default` if the cache don't have this key present. - """ - ... - - def pop(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: - """ - Removes specified key and return the corresponding value. - - If the key is not found, returns the `default`. - """ - ... - - def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Optional[VT | DT]: - """ - Inserts key with a value of default if key is not in the cache. - - Return the value for key if key is in the cache, else default. - """ - ... - - def popitem(self) -> typing.Tuple[KT, VT]: - """ - Removes the element that has been in the cache the longest - """ - ... - - def drain(self, n: int) -> int: - """ - Does the `popitem()` `n` times and returns count of removed items. - """ - ... - - def clear(self, *, reuse: bool = False) -> None: - """ - Removes all items from cache. - - If reuse is True, will not free the memory for reusing in the future. - """ - ... - - def update(self, iterable: typing.Iterable[typing.Tuple[KT, VT]] | typing.Dict[KT, VT]) -> None: - """ - Updates the cache with elements from a dictionary or an iterable object of key/value pairs. - """ - ... - - def keys(self) -> typing.Iterable[KT]: - """ - Returns an iterable object of the cache's keys. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - ... - - def values(self) -> typing.Iterable[VT]: - """ - Returns an iterable object of the cache's values. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - ... - - def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: - """ - Returns an iterable object of the cache's items (key-value pairs). - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - ... - - def first(self, n: int = 0) -> typing.Optional[KT]: - """ - Returns the first key in cache; this is the one which will be removed by `popitem()` (if n == 0). - - By using `n` parameter, you can browse order index by index. - """ - ... - - def last(self) -> typing.Optional[KT]: - """ - Returns the last key in cache. - """ - ... - -class RRCache(BaseCacheImpl[KT, VT]): - """ - RRCache implementation - Random Replacement policy (thread-safe). - - In simple terms, the RR cache will choice randomly element to remove it to make space when necessary. - """ - - def __init__( - self, - maxsize: int, - iterable: typing.Union[typing.Iterable[typing.Tuple[KT, VT]], typing.Dict[KT, VT]] = ..., - *, - capacity: int = ..., - ) -> None: - """ - RRCache implementation - Random Replacement policy (thread-safe). - - :param maxsize: you can specify the limit size of the cache ( zero means infinity ); this is unchangable. - - :param iterable: you can create cache from a dict or an iterable. - - :param capacity: If `capacity` param is given, cache attempts to allocate a new hash table with at - least enough capacity for inserting the given number of elements without reallocating. - """ - ... - - def __setitem__(self, key: KT, value: VT) -> None: - """ - Set self[key] to value. - """ - ... - - def __getitem__(self, key: KT) -> VT: - """ - Returns self[key]. - - Note: raises `KeyError` if key not found. - """ - ... - - def __delitem__(self, key: KT) -> VT: - """ - Deletes self[key]. - - Note: raises `KeyError` if key not found. - """ - ... - - def capacity(self) -> int: - """ - Returns the number of elements the map can hold without reallocating. - """ - ... - - def is_full(self) -> bool: - """ - Equivalent directly to `len(self) == self.maxsize` - """ - ... - - def is_empty(self) -> bool: - """ - Equivalent directly to `len(self) == 0` - """ - ... - - def insert(self, key: KT, value: VT) -> typing.Optional[VT]: - """ - Equals to `self[key] = value`, but returns a value: - - - If the cache did not have this key present, None is returned. - - If the cache did have this key present, the value is updated, - and the old value is returned. The key is not updated, though; - """ - ... - - def get(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: - """ - Equals to `self[key]`, but returns `default` if the cache don't have this key present. - """ - ... - - def pop(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: - """ - Removes specified key and return the corresponding value. - - If the key is not found, returns the `default`. - """ - ... - - def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Optional[VT | DT]: - """ - Inserts key with a value of default if key is not in the cache. - - Return the value for key if key is in the cache, else default. - """ - ... - - def clear(self, *, reuse: bool = False) -> None: - """ - Removes all items from cache. - - If reuse is True, will not free the memory for reusing in the future. - """ - ... - - def shrink_to_fit(self) -> None: - """ - Shrinks the cache to fit len(self) elements. - """ - ... - - def update(self, iterable: typing.Iterable[typing.Tuple[KT, VT]] | typing.Dict[KT, VT]) -> None: - """ - Updates the cache with elements from a dictionary or an iterable object of key/value pairs. - - Note: raises `OverflowError` if the cache reached the maxsize limit. - """ - ... - - def keys(self) -> typing.Iterable[KT]: - """ - Returns an iterable object of the cache's keys. - - Notes: - - You should not make any changes in cache while using this iterable object. - - Keys are not ordered. - """ - ... - - def values(self) -> typing.Iterable[VT]: - """ - Returns an iterable object of the cache's values. - - Notes: - - You should not make any changes in cache while using this iterable object. - - Values are not ordered. - """ - ... - - def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: - """ - Returns an iterable object of the cache's items (key-value pairs). - - Notes: - - You should not make any changes in cache while using this iterable object. - - Items are not ordered. - """ - ... - -class TTLCache(BaseCacheImpl[KT, VT]): - """ - TTL Cache implementation - Time-To-Live Policy (thread-safe). - - In simple terms, the TTL cache will automatically remove the element in the cache that has expired. - """ - - def __init__( - self, - maxsize: int, - ttl: float, - iterable: typing.Union[typing.Iterable[typing.Tuple[KT, VT]], typing.Dict[KT, VT]] = ..., - *, - capacity: int = ..., - ) -> None: - """ - TTL Cache implementation - Time-To-Live Policy (thread-safe). - - :param maxsize: you can specify the limit size of the cache ( zero means infinity ); this is unchangable. - - :param ttl: specifies the time-to-live value for each element in cache (in seconds); cannot be zero or negative. - - :param iterable: you can create cache from a dict or an iterable. - - :param capacity: If `capacity` param is given, cache attempts to allocate a new hash table with at - least enough capacity for inserting the given number of elements without reallocating. - """ - ... - - def __setitem__(self, key: KT, value: VT) -> None: - """ - Set self[key] to value. - """ - ... - - def __getitem__(self, key: KT) -> VT: - """ - Returns self[key]. - - Note: raises `KeyError` if key not found. - """ - ... - - def __delitem__(self, key: KT) -> VT: - """ - Deletes self[key]. - - Note: raises `KeyError` if key not found. - """ - ... - - def capacity(self) -> int: - """ - Returns the number of elements the map can hold without reallocating. - """ - ... - - def is_full(self) -> bool: - """ - Equivalent directly to `len(self) == self.maxsize` - """ - ... - - def is_empty(self) -> bool: - """ - Equivalent directly to `len(self) == 0` - """ - ... - - def insert(self, key: KT, value: VT) -> typing.Optional[VT]: - """ - Equals to `self[key] = value`, but returns a value: - - - If the cache did not have this key present, None is returned. - - If the cache did have this key present, the value is updated, - and the old value is returned. The key is not updated, though; - """ - ... - - def get(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: - """ - Equals to `self[key]`, but returns `default` if the cache don't have this key present. - """ - ... - - def pop(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: - """ - Removes specified key and return the corresponding value. - - If the key is not found, returns the `default`. - """ - ... - - def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Optional[VT | DT]: - """ - Inserts key with a value of default if key is not in the cache. - - Return the value for key if key is in the cache, else default. - """ - ... - - def popitem(self) -> typing.Tuple[KT, VT]: - """ - Removes the element that has been in the cache the longest - """ - ... - - def drain(self, n: int) -> int: - """ - Does the `popitem()` `n` times and returns count of removed items. - """ - ... - - def clear(self, *, reuse: bool = False) -> None: - """ - Removes all items from cache. - - If reuse is True, will not free the memory for reusing in the future. - """ - ... - - def update(self, iterable: typing.Iterable[typing.Tuple[KT, VT]] | typing.Dict[KT, VT]) -> None: - """ - Updates the cache with elements from a dictionary or an iterable object of key/value pairs. - """ - ... - - def keys(self) -> typing.Iterable[KT]: - """ - Returns an iterable object of the cache's keys. - - Notes: - - You should not make any changes in cache while using this iterable object. - - Don't call `len(cache)`, `bool(cache)`, `cache.is_full()` or `cache.is_empty()` while using this iterable object. - """ - ... - - def values(self) -> typing.Iterable[VT]: - """ - Returns an iterable object of the cache's values. - - Notes: - - You should not make any changes in cache while using this iterable object. - - Don't call `len(cache)`, `bool(cache)`, `cache.is_full()` or `cache.is_empty()` while using this iterable object. - """ - ... - - def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: - """ - Returns an iterable object of the cache's items (key-value pairs). - - Notes: - - You should not make any changes in cache while using this iterable object. - - Don't call `len(cache)`, `bool(cache)`, `cache.is_full()` or `cache.is_empty()` while using this iterable object. - """ - ... - - def first(self, n: int = 0) -> typing.Optional[KT]: - """ - Returns the oldest key in cache; this is the one which will be removed by `popitem()` (if n == 0). - - By using `n` parameter, you can browse order index by index. - """ - ... - - def last(self) -> typing.Optional[KT]: - """ - Returns the newest key in cache. - """ - ... - - def get_with_expire( - self, key: KT, default: DT = None - ) -> typing.Tuple[typing.Union[VT, DT], float]: - """ - Works like `.get()`, but also returns the remaining time-to-live. - """ - ... - - def pop_with_expire( - self, key: KT, default: DT = None - ) -> typing.Tuple[typing.Union[VT, DT], float]: - """ - Works like `.pop()`, but also returns the remaining time-to-live. - """ - ... - - def popitem_with_expire(self) -> typing.Tuple[KT, VT, float]: - """ - Works like `.popitem()`, but also returns the remaining time-to-live. - """ - ... - -class LRUCache(BaseCacheImpl[KT, VT]): - """ - LRU Cache implementation - Least recently used policy (thread-safe). - - In simple terms, the LRU cache will remove the element in the cache that has not been accessed in the longest time. - """ - - def __init__( - self, - maxsize: int, - iterable: typing.Union[typing.Iterable[typing.Tuple[KT, VT]], typing.Dict[KT, VT]] = ..., - *, - capacity: int = ..., - ) -> None: - """ - LRU Cache implementation - Least recently used policy (thread-safe). - - :param maxsize: you can specify the limit size of the cache ( zero means infinity ); this is unchangable. - - :param iterable: you can create cache from a dict or an iterable. - - :param capacity: If `capacity` param is given, cache attempts to allocate a new hash table with at - least enough capacity for inserting the given number of elements without reallocating. - """ - ... - - def __setitem__(self, key: KT, value: VT) -> None: - """ - Set self[key] to value. - """ - ... - - def __getitem__(self, key: KT) -> VT: - """ - Returns self[key]. - - Note: raises `KeyError` if key not found. - """ - ... - - def __delitem__(self, key: KT) -> VT: - """ - Deletes self[key]. - - Note: raises `KeyError` if key not found. - """ - ... - - def capacity(self) -> int: - """ - Returns the number of elements the map can hold without reallocating. - """ - ... - - def is_full(self) -> bool: - """ - Equivalent directly to `len(self) == self.maxsize` - """ - ... - - def is_empty(self) -> bool: - """ - Equivalent directly to `len(self) == 0` - """ - ... - - def insert(self, key: KT, value: VT) -> typing.Optional[VT]: - """ - Equals to `self[key] = value`, but returns a value: - - - If the cache did not have this key present, None is returned. - - If the cache did have this key present, the value is updated, - and the old value is returned. The key is not updated, though; - """ - ... - - def get(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: - """ - Equals to `self[key]`, but returns `default` if the cache don't have this key present. - """ - ... - - def peek(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: - """ - Searches for a key-value in the cache and returns it (without moving the key to recently used). - """ - ... - - def pop(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: - """ - Removes specified key and return the corresponding value. - - If the key is not found, returns the `default`. - """ - ... - - def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Optional[VT | DT]: - """ - Inserts key with a value of default if key is not in the cache. - - Return the value for key if key is in the cache, else default. - """ - ... - - def popitem(self) -> typing.Tuple[KT, VT]: - """ - Removes the element that has been in the cache the longest - """ - ... - - def drain(self, n: int) -> int: - """ - Does the `popitem()` `n` times and returns count of removed items. - """ - ... - - def clear(self, *, reuse: bool = False) -> None: - """ - Removes all items from cache. - - If reuse is True, will not free the memory for reusing in the future. - """ - ... - - def update(self, iterable: typing.Iterable[typing.Tuple[KT, VT]] | typing.Dict[KT, VT]) -> None: - """ - Updates the cache with elements from a dictionary or an iterable object of key/value pairs. - """ - ... - - def keys(self) -> typing.Iterable[KT]: - """ - Returns an iterable object of the cache's keys. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - ... - - def values(self) -> typing.Iterable[VT]: - """ - Returns an iterable object of the cache's values. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - ... - - def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: - """ - Returns an iterable object of the cache's items (key-value pairs). - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - ... - - def least_recently_used(self, n: int = 0) -> typing.Optional[KT]: - """ - Returns the key in the cache that has not been accessed in the longest time. - """ - ... - - def most_recently_used(self) -> typing.Optional[KT]: - """ - Returns the key in the cache that has been accessed in the shortest time. - """ - ... - -class LFUCache(BaseCacheImpl[KT, VT]): - """ - LFU Cache implementation - Least frequantly used policy (thread-safe). - - In simple terms, the LFU cache will remove the element in the cache that has been accessed the least, regardless of time - """ - - def __init__( - self, - maxsize: int, - iterable: typing.Union[typing.Iterable[typing.Tuple[KT, VT]], typing.Dict[KT, VT]] = ..., - *, - capacity: int = ..., - ) -> None: - """ - LFU Cache implementation - Least frequantly used policy (thread-safe). - - :param maxsize: you can specify the limit size of the cache ( zero means infinity ); this is unchangable. - - :param iterable: you can create cache from a dict or an iterable. - - :param capacity: If `capacity` param is given, cache attempts to allocate a new hash table with at - least enough capacity for inserting the given number of elements without reallocating. - """ - ... - - def __setitem__(self, key: KT, value: VT) -> None: - """ - Set self[key] to value. - """ - ... - - def __getitem__(self, key: KT) -> VT: - """ - Returns self[key]. - - Note: raises `KeyError` if key not found. - """ - ... - - def __delitem__(self, key: KT) -> VT: - """ - Deletes self[key]. - - Note: raises `KeyError` if key not found. - """ - ... - - def capacity(self) -> int: - """ - Returns the number of elements the map can hold without reallocating. - """ - ... - - def is_full(self) -> bool: - """ - Equivalent directly to `len(self) == self.maxsize` - """ - ... - - def is_empty(self) -> bool: - """ - Equivalent directly to `len(self) == 0` - """ - ... - - def insert(self, key: KT, value: VT) -> typing.Optional[VT]: - """ - Equals to `self[key] = value`, but returns a value: - - - If the cache did not have this key present, None is returned. - - If the cache did have this key present, the value is updated, - and the old value is returned. The key is not updated, though; - """ - ... - - def get(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: - """ - Equals to `self[key]`, but returns `default` if the cache don't have this key present. - """ - ... - - def peek(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: - """ - Searches for a key-value in the cache and returns it (without increasing frequenctly counter). - """ - ... - - def pop(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: - """ - Removes specified key and return the corresponding value. - - If the key is not found, returns the `default`. - """ - ... - - def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Optional[VT | DT]: - """ - Inserts key with a value of default if key is not in the cache. - - Return the value for key if key is in the cache, else default. - """ - ... - - def popitem(self) -> typing.Tuple[KT, VT]: - """ - Removes the element that has been in the cache the longest - """ - ... - - def drain(self, n: int) -> int: - """ - Does the `popitem()` `n` times and returns count of removed items. - """ - ... - - def clear(self, *, reuse: bool = False) -> None: - """ - Removes all items from cache. - - If reuse is True, will not free the memory for reusing in the future. - """ - ... - - def update(self, iterable: typing.Iterable[typing.Tuple[KT, VT]] | typing.Dict[KT, VT]) -> None: - """ - Updates the cache with elements from a dictionary or an iterable object of key/value pairs. - """ - ... - - def keys(self) -> typing.Iterable[KT]: - """ - Returns an iterable object of the cache's keys. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - ... - - def values(self) -> typing.Iterable[VT]: - """ - Returns an iterable object of the cache's values. - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - ... - - def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: - """ - Returns an iterable object of the cache's items (key-value pairs). - - Notes: - - You should not make any changes in cache while using this iterable object. - """ - ... - - def least_frequently_used(self, n: int = 0) -> typing.Optional[KT]: - """ - Returns the key in the cache that has been accessed the least, regardless of time. - """ - ... - -class VTTLCache(BaseCacheImpl[KT, VT]): - """ - VTTL Cache implementation - Time-To-Live Per-Key Policy (thread-safe). - - In simple terms, the TTL cache will automatically remove the element in the cache that has expired when need. - """ - - def __init__( - self, - maxsize: int, - iterable: typing.Union[typing.Iterable[typing.Tuple[KT, VT]], typing.Dict[KT, VT]] = ..., - ttl: typing.Optional[float] = 0.0, - *, - capacity: int = ..., - ) -> None: - """ - VTTL Cache implementation - Time-To-Live Per-Key Policy (thread-safe). - - :param maxsize: you can specify the limit size of the cache ( zero means infinity ); this is unchangable. - - :param iterable: you can create cache from a dict or an iterable. - - :param ttl: specifies the time-to-live value for each element in cache (in seconds); cannot be zero or negative. - - :param capacity: If `capacity` param is given, cache attempts to allocate a new hash table with at - least enough capacity for inserting the given number of elements without reallocating. - """ - ... - - def __setitem__(self, key: KT, value: VT) -> None: - """ - Set self[key] to value. - - Recommended to use `.insert()` method here. - """ - ... - - def __getitem__(self, key: KT) -> VT: - """ - Returns self[key]. - - Note: raises `KeyError` if key not found. - """ - ... - - def __delitem__(self, key: KT) -> VT: - """ - Deletes self[key]. - - Note: raises `KeyError` if key not found. - """ - ... - - def capacity(self) -> int: - """ - Returns the number of elements the map can hold without reallocating. - """ - ... - - def is_full(self) -> bool: - """ - Equivalent directly to `len(self) == self.maxsize` - """ - ... - - def is_empty(self) -> bool: - """ - Equivalent directly to `len(self) == 0` - """ - ... - - def insert(self, key: KT, value: VT, ttl: typing.Optional[float] = None) -> typing.Optional[VT]: - """ - Equals to `self[key] = value`, but: - - Here you can set ttl for key-value ( with `self[key] = value` you can't ) - - If the cache did not have this key present, None is returned. - - If the cache did have this key present, the value is updated, - and the old value is returned. The key is not updated, though; - """ - ... - - def get(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: - """ - Equals to `self[key]`, but returns `default` if the cache don't have this key present. - """ - ... - - def pop(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: - """ - Removes specified key and return the corresponding value. - - If the key is not found, returns the `default`. - """ - ... - - def setdefault( - self, key: KT, default: typing.Optional[DT] = None, ttl: typing.Optional[float] = None - ) -> typing.Optional[VT | DT]: - """ - Inserts key with a value of default if key is not in the cache. - - Return the value for key if key is in the cache, else default. - """ - ... - - def popitem(self) -> typing.Tuple[KT, VT]: - """ - Removes the element that has been in the cache the longest - """ - ... - - def drain(self, n: int) -> int: - """ - Does the `popitem()` `n` times and returns count of removed items. - """ - ... - - def clear(self, *, reuse: bool = False) -> None: - """ - Removes all items from cache. - - If reuse is True, will not free the memory for reusing in the future. - """ - ... - - def update( - self, - iterable: typing.Iterable[typing.Tuple[KT, VT]] | typing.Dict[KT, VT], - ttl: typing.Optional[float] = None, - ) -> None: - """ - Updates the cache with elements from a dictionary or an iterable object of key/value pairs. - """ - ... - - def keys(self) -> typing.Iterable[KT]: - """ - Returns an iterable object of the cache's keys. - - Notes: - - You should not make any changes in cache while using this iterable object. - - Don't call `len(cache)`, `bool(cache)`, `cache.is_full()` or `cache.is_empty()` while using this iterable object. - """ - ... - - def values(self) -> typing.Iterable[VT]: - """ - Returns an iterable object of the cache's values. - - Notes: - - You should not make any changes in cache while using this iterable object. - - Don't call `len(cache)`, `bool(cache)`, `cache.is_full()` or `cache.is_empty()` while using this iterable object. - """ - ... - - def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: - """ - Returns an iterable object of the cache's items (key-value pairs). - - Notes: - - You should not make any changes in cache while using this iterable object. - - Don't call `len(cache)`, `bool(cache)`, `cache.is_full()` or `cache.is_empty()` while using this iterable object. - """ - ... - - def first(self, n: int = 0) -> typing.Optional[KT]: - """ - Returns the oldest key in cache; this is the one which will be removed by `popitem()` (if n == 0). - - By using `n` parameter, you can browse order index by index. - """ - ... - - def last(self) -> typing.Optional[KT]: - """ - Returns the newest key in cache. - """ - ... - - def get_with_expire( - self, key: KT, default: DT = None - ) -> typing.Tuple[typing.Union[VT, DT], float]: - """ - Works like `.get()`, but also returns the remaining time-to-live. - """ - ... - - def pop_with_expire( - self, key: KT, default: DT = None - ) -> typing.Tuple[typing.Union[VT, DT], float]: - """ - Works like `.pop()`, but also returns the remaining time-to-live. - """ - ... - - def popitem_with_expire(self) -> typing.Tuple[KT, VT, float]: - """ - Works like `.popitem()`, but also returns the remaining time-to-live. - """ - ... - -class cache_iterator: - def __len__(self) -> int: ... - def __iter__(self) -> typing.Iterator: ... - def __next__(self) -> typing.Any: ... - -class fifocache_iterator: - def __len__(self) -> int: ... - def __iter__(self) -> typing.Iterator: ... - def __next__(self) -> typing.Any: ... - -class ttlcache_iterator: - def __len__(self) -> int: ... - def __iter__(self) -> typing.Iterator: ... - def __next__(self) -> typing.Any: ... - -class lrucache_iterator: - def __len__(self) -> int: ... - def __iter__(self) -> typing.Iterator: ... - def __next__(self) -> typing.Any: ... - -class lfucache_iterator: - def __len__(self) -> int: ... - def __iter__(self) -> typing.Iterator: ... - def __next__(self) -> typing.Any: ... - -class vttlcache_iterator: - def __len__(self) -> int: ... - def __iter__(self) -> typing.Iterator: ... - def __next__(self) -> typing.Any: ... diff --git a/cachebox/py.typed b/cachebox/py.typed deleted file mode 100644 index e69de29..0000000 diff --git a/cachebox/utils.py b/cachebox/utils.py deleted file mode 100644 index 43e9290..0000000 --- a/cachebox/utils.py +++ /dev/null @@ -1,520 +0,0 @@ -from ._cachebox import BaseCacheImpl, FIFOCache -from collections import namedtuple, defaultdict -import functools -import warnings -import asyncio -import _thread -import inspect -import typing - - -KT = typing.TypeVar("KT") -VT = typing.TypeVar("VT") -DT = typing.TypeVar("DT") - - -class Frozen(BaseCacheImpl, typing.Generic[KT, VT]): - __slots__ = ("__cache", "ignore") - - def __init__(self, cls: BaseCacheImpl[KT, VT], ignore: bool = False) -> None: - """ - **This is not a cache.** this class can freeze your caches and prevents changes. - - :param cls: your cache - - :param ignore: If False, will raise TypeError if anyone try to change cache. will do nothing otherwise. - """ - assert isinstance(cls, BaseCacheImpl) - assert type(cls) is not Frozen - - self.__cache = cls - self.ignore = ignore - - @property - def cache(self) -> BaseCacheImpl[KT, VT]: - return self.__cache - - @property - def maxsize(self) -> int: - return self.__cache.maxsize - - def __len__(self) -> int: - return len(self.__cache) - - def __sizeof__(self) -> int: - return self.__cache.__sizeof__() - - def __bool__(self) -> bool: - return bool(self.__cache) - - def __contains__(self, key: KT) -> bool: - return key in self.__cache - - def __setitem__(self, key: KT, value: VT) -> None: - if self.ignore: - return - - raise TypeError("This cache is frozen.") - - def __getitem__(self, key: KT) -> VT: - return self.__cache[key] - - def __delitem__(self, key: KT) -> VT: - if self.ignore: - return # type: ignore - - raise TypeError("This cache is frozen.") - - def __repr__(self) -> str: - return f"" - - def __iter__(self) -> typing.Iterator[KT]: - return iter(self.__cache) - - def __richcmp__(self, other, op: int) -> bool: - return self.__cache.__richcmp__(other, op) - - def capacity(self) -> int: - return self.__cache.capacity() - - def is_full(self) -> bool: - return self.__cache.is_full() - - def is_empty(self) -> bool: - return self.__cache.is_empty() - - def insert(self, key: KT, value: VT, *args, **kwargs) -> typing.Optional[VT]: - if self.ignore: - return - - raise TypeError("This cache is frozen.") - - def get(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: - return self.__cache.get(key, default) - - def pop(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: - if self.ignore: - return # type: ignore - - raise TypeError("This cache is frozen.") - - def setdefault( - self, key: KT, default: typing.Optional[DT] = None, *args, **kwargs - ) -> typing.Optional[typing.Union[VT, DT]]: - if self.ignore: - return - - raise TypeError("This cache is frozen.") - - def popitem(self) -> typing.Tuple[KT, VT]: - if self.ignore: - return # type: ignore - - raise TypeError("This cache is frozen.") - - def drain(self, n: int) -> int: - if self.ignore: - return # type: ignore - - raise TypeError("This cache is frozen.") - - def clear(self, *, reuse: bool = False) -> None: - if self.ignore: - return - - raise TypeError("This cache is frozen.") - - def shrink_to_fit(self) -> None: - if self.ignore: - return - - raise TypeError("This cache is frozen.") - - def update( - self, iterable: typing.Union[typing.Iterable[typing.Tuple[KT, VT]], typing.Dict[KT, VT]] - ) -> None: - if self.ignore: - return - - raise TypeError("This cache is frozen.") - - def keys(self) -> typing.Iterable[KT]: - return self.__cache.keys() - - def values(self) -> typing.Iterable[VT]: - return self.__cache.values() - - def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: - return self.__cache.items() - - -class _LockWithCounter: - """ - A threading/asyncio lock which count the waiters - """ - - __slots__ = ("lock", "waiters") - - def __init__(self, is_async: bool = False): - self.lock = _thread.allocate_lock() if not is_async else asyncio.Lock() - self.waiters = 0 - - async def __aenter__(self) -> None: - self.waiters += 1 - await self.lock.acquire() - - async def __aexit__(self, *args, **kwds) -> None: - self.waiters -= 1 - self.lock.release() - - def __enter__(self) -> None: - self.waiters += 1 - self.lock.acquire() - - def __exit__(self, *args, **kwds) -> None: - self.waiters -= 1 - self.lock.release() - - -def _copy_if_need(obj, tocopy=(dict, list, set), level: int = 1): - from copy import copy - - if level == 0: - return obj - - if level == 2: - return copy(obj) - - return copy(obj) if (type(obj) in tocopy) else obj - - -def make_key(args: tuple, kwds: dict, fasttype=(int, str)): - key = args - if kwds: - key += (object,) - for item in kwds.items(): - key += item - - if fasttype and len(key) == 1 and type(key[0]) in fasttype: - return key[0] - - return key - - -def make_hash_key(args: tuple, kwds: dict): - return hash(make_key(args, kwds)) - - -def make_typed_key(args: tuple, kwds: dict): - key = make_key(args, kwds, fasttype=()) - - key += tuple(type(v) for v in args) # type: ignore - if kwds: - key += tuple(type(v) for v in kwds.values()) - - return key - - -CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "length", "cachememory"]) -EVENT_MISS = 1 -EVENT_HIT = 2 - - -def _cached_wrapper( - func, - cache: BaseCacheImpl, - key_maker: typing.Callable[[tuple, dict], typing.Hashable], - clear_reuse: bool, - callback: typing.Optional[typing.Callable[[int, typing.Any, typing.Any], typing.Any]], - copy_level: int, - is_method: bool, -) -> None: - _key_maker = (lambda args, kwds: key_maker(args[1:], kwds)) if is_method else key_maker - - hits = 0 - misses = 0 - locks = defaultdict(_LockWithCounter) - exceptions = {} - - def _wrapped(*args, **kwds): - nonlocal hits, misses, locks, exceptions - - if kwds.pop("cachebox__ignore", False): - return func(*args, **kwds) - - key = _key_maker(args, kwds) - - # try to get result from cache - try: - result = cache[key] - except KeyError: - pass - else: - # A NOTE FOR ME: we don't want to catch KeyError exceptions from `callback` - # so don't wrap it with try except - hits += 1 - - if callback is not None: - callback(EVENT_HIT, key, result) - - return _copy_if_need(result, level=copy_level) - - with locks[key]: - if exceptions.get(key, None) is not None: - cached_error = exceptions[key] if locks[key].waiters > 1 else exceptions.pop(key) - raise cached_error - - try: - result = cache[key] - hits += 1 - event = EVENT_HIT - except KeyError: - try: - result = func(*args, **kwds) - except Exception as e: - if locks[key].waiters > 1: - exceptions[key] = e - - raise e - - else: - cache[key] = result - misses += 1 - event = EVENT_MISS - - if callback is not None: - callback(event, key, result) - - return _copy_if_need(result, level=copy_level) - - _wrapped.cache = cache - _wrapped.callback = callback - _wrapped.cache_info = lambda: CacheInfo( - hits, misses, cache.maxsize, len(cache), cache.capacity() - ) - - def cache_clear(): - nonlocal misses, hits, locks, exceptions - cache.clear(reuse=clear_reuse) - misses = 0 - hits = 0 - locks.clear() - exceptions.clear() - - _wrapped.cache_clear = cache_clear - - return _wrapped - - -def _async_cached_wrapper( - func, - cache: BaseCacheImpl, - key_maker: typing.Callable[[tuple, dict], typing.Hashable], - clear_reuse: bool, - callback: typing.Optional[typing.Callable[[int, typing.Any, typing.Any], typing.Any]], - copy_level: int, - is_method: bool, -) -> None: - _key_maker = (lambda args, kwds: key_maker(args[1:], kwds)) if is_method else key_maker - - hits = 0 - misses = 0 - locks = defaultdict(lambda: _LockWithCounter(True)) - exceptions = {} - - async def _wrapped(*args, **kwds): - nonlocal hits, misses, locks, exceptions - - if kwds.pop("cachebox__ignore", False): - return await func(*args, **kwds) - - key = _key_maker(args, kwds) - - # try to get result from cache - try: - result = cache[key] - except KeyError: - pass - else: - # A NOTE FOR ME: we don't want to catch KeyError exceptions from `callback` - # so don't wrap it with try except - hits += 1 - - if callback is not None: - awaitable = callback(EVENT_HIT, key, result) - if inspect.isawaitable(awaitable): - await awaitable - - return _copy_if_need(result, level=copy_level) - - async with locks[key]: - if exceptions.get(key, None) is not None: - cached_error = exceptions[key] if locks[key].waiters > 1 else exceptions.pop(key) - raise cached_error - - try: - result = cache[key] - hits += 1 - event = EVENT_HIT - except KeyError: - try: - result = await func(*args, **kwds) - except Exception as e: - if locks[key].waiters > 1: - exceptions[key] = e - - raise e - - else: - cache[key] = result - misses += 1 - event = EVENT_MISS - - if callback is not None: - awaitable = callback(event, key, result) - if inspect.isawaitable(awaitable): - await awaitable - - return _copy_if_need(result, level=copy_level) - - _wrapped.cache = cache - _wrapped.callback = callback - _wrapped.cache_info = lambda: CacheInfo( - hits, misses, cache.maxsize, len(cache), cache.capacity() - ) - - def cache_clear(): - nonlocal misses, hits, locks, exceptions - cache.clear(reuse=clear_reuse) - misses = 0 - hits = 0 - locks.clear() - exceptions.clear() - - _wrapped.cache_clear = cache_clear - - return _wrapped - - -def cached( - cache: typing.Union[BaseCacheImpl, dict, None], - key_maker: typing.Callable[[tuple, dict], typing.Hashable] = make_key, - clear_reuse: bool = False, - callback: typing.Optional[typing.Callable[[int, typing.Any, typing.Any], typing.Any]] = None, - copy_level: int = 1, - always_copy: typing.Optional[bool] = None, -): - """ - Decorator to wrap a function with a memoizing callable that saves results in a cache. - - :param cache: Specifies a cache that handles and stores the results. if `None` or `dict`, `FIFOCache` will be used. - - :param key_maker: Specifies a function that will be called with the same positional and keyword - arguments as the wrapped function itself, and which has to return a suitable - cache key (must be hashable). - - :param clear_reuse: The wrapped function has a function named `clear_cache` that uses `cache.clear` - method to clear the cache. This parameter will be passed to cache's `clear` method. - - :param callback: Every time the `cache` is used, callback is also called. - The callback arguments are: event number (see `EVENT_MISS` or `EVENT_HIT` variables), key, and then result. - - :param copy_level: The wrapped function always copies the result of your function and then returns it. - This parameter specifies that the wrapped function has to copy which type of results. - `0` means "never copy", `1` means "only copy `dict`, `list`, and `set` results" and - `2` means "always copy the results". - - Example:: - - @cachebox.cached(cachebox.LRUCache(128)) - def sum_as_string(a, b): - return str(a+b) - - assert sum_as_string(1, 2) == "3" - - assert len(sum_as_string.cache) == 1 - sum_as_string.cache_clear() - assert len(sum_as_string.cache) == 0 - - See more: [documentation](https://github.com/awolverp/cachebox#function-cached) - """ - if cache is None: - cache = FIFOCache(0) - - if type(cache) is dict: - cache = FIFOCache(0, cache) - - if not isinstance(cache, BaseCacheImpl): - raise TypeError("we expected cachebox caches, got %r" % (cache,)) - - if always_copy is not None: - warnings.warn( - "'always_copy' parameter is deprecated and will be removed in future; use 'copy_level' instead", - category=DeprecationWarning, - ) - if always_copy is True: - copy_level = 2 - - def decorator(func): - if inspect.iscoroutinefunction(func): - wrapper = _async_cached_wrapper( - func, cache, key_maker, clear_reuse, callback, copy_level, False - ) - else: - wrapper = _cached_wrapper( - func, cache, key_maker, clear_reuse, callback, copy_level, False - ) - - return functools.update_wrapper(wrapper, func) - - return decorator - - -def cachedmethod( - cache: typing.Union[BaseCacheImpl, dict, None], - key_maker: typing.Callable[[tuple, dict], typing.Hashable] = make_key, - clear_reuse: bool = False, - callback: typing.Optional[typing.Callable[[int, typing.Any, typing.Any], typing.Any]] = None, - copy_level: int = 1, - always_copy: typing.Optional[bool] = None, -): - """ - this is excatly works like `cached()`, but ignores `self` parameters in hashing and key making. - """ - if cache is None: - cache = FIFOCache(0) - - if type(cache) is dict: - cache = FIFOCache(0, cache) - - if not isinstance(cache, BaseCacheImpl): - raise TypeError("we expected cachebox caches, got %r" % (cache,)) - - if always_copy is not None: - warnings.warn( - "'always_copy' parameter is deprecated and will be removed in future; use 'copy_level' instead", - category=DeprecationWarning, - ) - if always_copy is True: - copy_level = 2 - - def decorator(func): - if inspect.iscoroutinefunction(func): - wrapper = _async_cached_wrapper( - func, cache, key_maker, clear_reuse, callback, copy_level, True - ) - else: - wrapper = _cached_wrapper( - func, cache, key_maker, clear_reuse, callback, copy_level, True - ) - - return functools.update_wrapper(wrapper, func) - - return decorator - - -def is_cached(func: object) -> bool: - """ - Check if a function/method cached by cachebox or not - """ - return hasattr(func, "cache") and isinstance(func.cache, BaseCacheImpl) diff --git a/pyproject.toml b/pyproject.toml index 503566e..313e4f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["maturin>=1.6,<2.0"] +requires = ["maturin>=1.8,<2.0"] build-backend = "maturin" [project] @@ -41,7 +41,13 @@ dynamic = [ [project.urls] Homepage = 'https://github.com/awolverp/cachebox' +[project.optional-dependencies] +tests = [ + "pytest", + "coverage", +] + [tool.maturin] +python-source = "python" features = ["pyo3/extension-module"] -bindings = 'pyo3' -module-name = "cachebox._cachebox" +module-name = "cachebox._core" diff --git a/tests/__init__.py b/python/cachebox/__init__.py similarity index 100% rename from tests/__init__.py rename to python/cachebox/__init__.py diff --git a/src/bridge/baseimpl.rs b/src/bridge/baseimpl.rs deleted file mode 100644 index 134cb78..0000000 --- a/src/bridge/baseimpl.rs +++ /dev/null @@ -1,41 +0,0 @@ -//! implement [`BaseCacheImpl`], the base class of all classes. - -use pyo3::types::PyTypeMethods; - -/// This is the base class of all cache classes such as Cache, FIFOCache, ... -/// -/// Do not try to call its constructor, this is only for type-hint. -#[pyo3::pyclass(module = "cachebox._cachebox", subclass, frozen)] -pub struct BaseCacheImpl {} - -#[pyo3::pymethods] -impl BaseCacheImpl { - #[new] - #[pyo3(signature = (*args, **kwargs))] - #[classmethod] - #[allow(unused_variables)] - pub fn __new__( - cls: &pyo3::Bound<'_, pyo3::types::PyType>, - args: &pyo3::Bound<'_, pyo3::PyAny>, - kwargs: Option<&pyo3::Bound<'_, pyo3::PyAny>>, - ) -> pyo3::PyResult { - let size = unsafe { pyo3::ffi::PyTuple_Size(cls.mro().as_ptr()) }; - - // This means BaseCacheImpl is used as subclass - // So we shouldn't raise NotImplementedError - if size > 2 { - Ok(Self {}) - } else { - Err(err!(pyo3::exceptions::PyNotImplementedError, "do not call this constructor, you can subclass this implementation or use other classes.")) - } - } - - #[allow(unused_variables)] - #[classmethod] - pub fn __class_getitem__( - cls: &pyo3::Bound<'_, pyo3::types::PyType>, - args: pyo3::PyObject, - ) -> pyo3::PyObject { - cls.clone().into() - } -} diff --git a/src/bridge/cache.rs b/src/bridge/cache.rs deleted file mode 100644 index cbe7390..0000000 --- a/src/bridge/cache.rs +++ /dev/null @@ -1,484 +0,0 @@ -//! implement Cache, our simple cache without any algorithms and policies - -use crate::hashedkey::HashedKey; -use crate::util::_KeepForIter; - -/// A simple cache that has no algorithm; this is only a hashmap. -/// -/// [`Cache`] vs `dict`: -/// - it is thread-safe and unordered, while `dict` isn't thread-safe and ordered (Python 3.6+). -/// - it uses very lower memory than `dict`. -/// - it supports useful and new methods for managing memory, while `dict` does not. -/// - it does not support `popitem`, while `dict` does. -/// - You can limit the size of [`Cache`], but you cannot for `dict`. -#[pyo3::pyclass(module="cachebox._cachebox", extends=crate::bridge::baseimpl::BaseCacheImpl, frozen)] -pub struct Cache { - raw: crate::mutex::Mutex, -} - -#[pyo3::pymethods] -impl Cache { - /// A simple cache that has no algorithm; this is only a hashmap. - /// - /// By maxsize param, you can specify the limit size of the cache ( zero means infinity ); this is unchangable. - /// - /// By iterable param, you can create cache from a dict or an iterable. - /// - /// If capacity param is given, cache attempts to allocate a new hash table with at - /// least enough capacity for inserting the given number of elements without reallocating. - #[new] - #[pyo3(signature=(maxsize, iterable=None, *, capacity=0))] - pub fn __new__( - py: pyo3::Python<'_>, - maxsize: usize, - iterable: Option, - capacity: usize, - ) -> pyo3::PyResult<(Self, crate::bridge::baseimpl::BaseCacheImpl)> { - let mut raw = crate::internal::NoPolicy::new(maxsize, capacity)?; - if iterable.is_some() { - raw.update(py, unsafe { iterable.unwrap_unchecked() })?; - } - - let self_ = Self { - raw: crate::mutex::Mutex::new(raw), - }; - Ok((self_, crate::bridge::baseimpl::BaseCacheImpl {})) - } - - /// Returns the cache maxsize - #[getter] - pub fn maxsize(&self) -> usize { - let lock = self.raw.lock(); - lock.maxsize.get() - } - - pub fn _state(&self) -> usize { - let lock = self.raw.lock(); - lock.state.get() - } - - /// Returns the number of elements in the table - len(self) - pub fn __len__(&self) -> usize { - let lock = self.raw.lock(); - lock.table.len() - } - - /// Returns allocated memory size - sys.getsizeof(self) - pub fn __sizeof__(&self) -> usize { - let lock = self.raw.lock(); - let cap = lock.table.capacity(); - - core::mem::size_of::() + cap * (crate::HASHEDKEY_SIZE + crate::PYOBJECT_SIZE) - } - - /// Returns true if cache not empty - bool(self) - pub fn __bool__(&self) -> bool { - let lock = self.raw.lock(); - !lock.table.is_empty() - } - - /// Returns true if the cache have the key present - key in self - pub fn __contains__(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let lock = self.raw.lock(); - Ok(lock.contains_key(&hk)) - } - - /// Sets self\[key\] to value. - /// - /// Note: raises OverflowError if the cache reached the maxsize limit, - /// because this class does not have any algorithm. - pub fn __setitem__( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - value: pyo3::PyObject, - ) -> pyo3::PyResult<()> { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - lock.insert(hk, value)?; - Ok(()) - } - - /// Returns self\[key\] - /// - /// Note: raises KeyError if key not found. - pub fn __getitem__( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.get(&hk) { - Some(val) => Ok(val.clone_ref(py)), - None => Err(err!(pyo3::exceptions::PyKeyError, hk.key)), - } - } - - /// Deletes self[key]. - /// - /// Note: raises KeyError if key not found. - pub fn __delitem__(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult<()> { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.remove(&hk) { - Some(_) => Ok(()), - None => Err(err!(pyo3::exceptions::PyKeyError, hk.key)), - } - } - - /// Returns repr(self) - pub fn __repr__(&self) -> String { - let lock = self.raw.lock(); - - format!( - "Cache({} / {}, capacity={})", - lock.table.len(), - lock.maxsize.get(), - lock.table.capacity(), - ) - } - - /// Returns iter(self) - pub fn __iter__( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let lock = slf.raw.lock(); - let (len, state) = (lock.table.len(), lock.state.get()); - let iter = unsafe { lock.table.iter() }; - - let result = cache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(iter), - typ: 0, - }; - - pyo3::Py::new(py, result) - } - - /// Supports == and != - pub fn __richcmp__( - slf: pyo3::PyRef<'_, Self>, - other: pyo3::PyRef<'_, Self>, - op: pyo3::class::basic::CompareOp, - ) -> pyo3::PyResult { - match op { - pyo3::class::basic::CompareOp::Eq => { - if slf.as_ptr() == other.as_ptr() { - return Ok(true); - } - - let (a1, a2) = (slf.raw.lock(), other.raw.lock()); - Ok(a1.eq(&a2)) - } - pyo3::class::basic::CompareOp::Ne => { - if slf.as_ptr() == other.as_ptr() { - return Ok(false); - } - - let (a1, a2) = (slf.raw.lock(), other.raw.lock()); - Ok(a1.ne(&a2)) - } - _ => Err(err!(pyo3::exceptions::PyNotImplementedError, ())), - } - } - - pub fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { - let lock = self.raw.lock(); - unsafe { - let state = lock.to_pickle(py)?; - Ok(pyo3::Py::from_owned_ptr(py, state)) - } - } - - pub fn __getnewargs__(&self) -> (usize,) { - (0,) - } - - pub fn __setstate__(&self, py: pyo3::Python<'_>, state: pyo3::PyObject) -> pyo3::PyResult<()> { - let mut lock = self.raw.lock(); - unsafe { lock.from_pickle(py, state.as_ptr()) } - } - - pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { - for value in unsafe { self.raw.lock().table.iter() } { - let (key, value) = unsafe { value.as_ref() }; - visit.call(&key.key)?; - visit.call(value)?; - } - Ok(()) - } - - pub fn __clear__(&self) { - let mut lock = self.raw.lock(); - lock.table.clear() - } - - /// Returns the number of elements the map can hold without reallocating. - pub fn capacity(&self) -> usize { - let lock = self.raw.lock(); - lock.table.capacity() - } - - /// Equivalent directly to `len(self) == self.maxsize` - pub fn is_full(&self) -> bool { - let lock = self.raw.lock(); - lock.table.len() == lock.maxsize.get() - } - - /// Equivalent directly to `len(self) == 0` - pub fn is_empty(&self) -> bool { - let lock = self.raw.lock(); - lock.table.len() == 0 - } - - /// Equals to `self[key] = value`, but returns a value: - /// - /// - If the cache did not have this key present, None is returned. - /// - If the cache did have this key present, the value is updated, - /// and the old value is returned. The key is not updated, though; - /// - /// Note: raises `OverflowError` if the cache reached the maxsize limit, - /// because this class does not have any algorithm. - pub fn insert( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - value: pyo3::PyObject, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - let op = lock.insert(hk, value)?; - Ok(op.unwrap_or_else(|| py.None())) - } - - /// Equals to `self[key]`, but returns `default` if the cache don't have this key present. - #[pyo3(signature = (key, default=None))] - pub fn get( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.get(&hk) { - Some(val) => Ok(val.clone_ref(py)), - None => Ok(default.unwrap_or_else(|| py.None())), - } - } - - /// Removes specified key and return the corresponding value. - /// - /// If the key is not found, returns the default - #[pyo3(signature = (key, default=None))] - pub fn pop( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.remove(&hk) { - Some((_, val)) => Ok(val), - None => Ok(default.unwrap_or_else(|| py.None())), - } - } - - /// Inserts key with a value of default if key is not in the cache. - /// - /// Return the value for key if key is in the cache, else default. - #[pyo3(signature=(key, default=None))] - pub fn setdefault( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - if let Some(x) = lock.get(&hk) { - return Ok(x.clone_ref(py)); - } - - let defval = default.unwrap_or_else(|| py.None()); - lock.insert(hk, defval.clone_ref(py))?; - Ok(defval) - } - - /// not implemented - pub fn popitem(&self) -> pyo3::PyResult<()> { - Err(err!(pyo3::exceptions::PyNotImplementedError, ())) - } - - /// not implemented - #[allow(unused_variables)] - pub fn drain(&self, n: usize) -> pyo3::PyResult<()> { - Err(err!(pyo3::exceptions::PyNotImplementedError, ())) - } - - /// Removes all items from cache. - /// - /// If reuse is True, will not free the memory for reusing in the future. - #[pyo3(signature=(*, reuse=false))] - pub fn clear(&self, reuse: bool) { - let mut lock = self.raw.lock(); - lock.table.clear(); - - if !reuse { - lock.table.shrink_to(0, |x| x.0.hash); - } - } - - /// Shrinks the cache to fit len(self) elements. - pub fn shrink_to_fit(&self) { - let mut lock = self.raw.lock(); - lock.table.shrink_to(0, |x| x.0.hash); - lock.state.change(); - } - - /// Updates the cache with elements from a dictionary or an iterable object of key/value pairs. - /// - /// Note: raises `OverflowError` if the cache reached the maxsize limit. - pub fn update( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - iterable: pyo3::PyObject, - ) -> pyo3::PyResult<()> { - if slf.as_ptr() == iterable.as_ptr() { - return Ok(()); - } - - let mut lock = slf.raw.lock(); - lock.update(py, iterable) - } - - /// Returns an iterable object of the cache's items (key-value pairs). - /// - /// Notes: - /// - You should not make any changes in cache while using this iterable object. - /// - Items are not ordered. - pub fn items( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let lock = slf.raw.lock(); - let (len, state) = (lock.table.len(), lock.state.get()); - let iter = unsafe { lock.table.iter() }; - - let result = cache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(iter), - typ: 2, - }; - - pyo3::Py::new(py, result) - } - - /// Returns an iterable object of the cache's keys. - /// - /// Notes: - /// - You should not make any changes in cache while using this iterable object. - /// - Keys are not ordered. - pub fn keys( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let lock = slf.raw.lock(); - let (len, state) = (lock.table.len(), lock.state.get()); - let iter = unsafe { lock.table.iter() }; - - let result = cache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(iter), - typ: 0, - }; - - pyo3::Py::new(py, result) - } - - /// Returns an iterable object of the cache's values. - /// - /// Notes: - /// - You should not make any changes in cache while using this iterable object. - /// - Values are not ordered. - pub fn values( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let lock = slf.raw.lock(); - let (len, state) = (lock.table.len(), lock.state.get()); - let iter = unsafe { lock.table.iter() }; - - let result = cache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(iter), - typ: 1, - }; - - pyo3::Py::new(py, result) - } -} - -#[allow(non_camel_case_types)] -#[pyo3::pyclass(module = "cachebox._cachebox")] -pub struct cache_iterator { - pub ptr: _KeepForIter, - pub iter: crate::mutex::Mutex>, - pub typ: u8, -} - -#[pyo3::pymethods] -impl cache_iterator { - pub fn __len__(&self) -> usize { - self.ptr.len - } - - pub fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { - slf - } - - #[allow(unused_mut)] - pub fn __next__( - mut slf: pyo3::PyRefMut<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { - slf.ptr.status(py)?; - - let mut l = slf.iter.lock(); - if let Some(x) = l.next() { - let (key, val) = unsafe { x.as_ref() }; - - match slf.typ { - 0 => return Ok(key.key.clone_ref(py).into_ptr()), - 1 => return Ok(val.clone_ref(py).into_ptr()), - 2 => { - return tuple!( - py, - 2, - 0 => key.key.clone_ref(py).into_ptr(), - 1 => val.clone_ref(py).into_ptr(), - ); - } - _ => { - #[cfg(not(debug_assertions))] - unsafe { - core::hint::unreachable_unchecked() - }; - #[cfg(debug_assertions)] - unreachable!(); - } - } - } - - Err(err!(pyo3::exceptions::PyStopIteration, ())) - } -} diff --git a/src/bridge/fifocache.rs b/src/bridge/fifocache.rs deleted file mode 100644 index dbe892e..0000000 --- a/src/bridge/fifocache.rs +++ /dev/null @@ -1,499 +0,0 @@ -//! implement FIFOCache, our fifo implementation - -use crate::{hashedkey::HashedKey, util::_KeepForIter}; - -/// FIFO Cache implementation - First-In First-Out Policy (thread-safe). -/// -/// In simple terms, the FIFO cache will remove the element that has been in the cache the longest. -#[pyo3::pyclass(module="cachebox._cachebox", extends=crate::bridge::baseimpl::BaseCacheImpl, frozen)] -pub struct FIFOCache { - raw: crate::mutex::Mutex, -} - -#[pyo3::pymethods] -impl FIFOCache { - /// FIFO Cache implementation - First-In First-Out Policy (thread-safe). - /// - /// By maxsize param, you can specify the limit size of the cache ( zero means infinity ); this is unchangable. - /// - /// By iterable param, you can create cache from a dict or an iterable. - /// - /// If capacity param is given, cache attempts to allocate a new hash table with at - /// least enough capacity for inserting the given number of elements without reallocating. - #[new] - #[pyo3(signature=(maxsize, iterable=None, *, capacity=0))] - pub fn __new__( - py: pyo3::Python<'_>, - maxsize: usize, - iterable: Option, - capacity: usize, - ) -> pyo3::PyResult<(Self, crate::bridge::baseimpl::BaseCacheImpl)> { - let mut raw = crate::internal::FIFOPolicy::new(maxsize, capacity)?; - if iterable.is_some() { - raw.update(py, unsafe { iterable.unwrap_unchecked() })?; - } - - let self_ = Self { - raw: crate::mutex::Mutex::new(raw), - }; - Ok((self_, crate::bridge::baseimpl::BaseCacheImpl {})) - } - - /// Returns the cache maxsize - #[getter] - pub fn maxsize(&self) -> usize { - let lock = self.raw.lock(); - lock.maxsize.get() - } - - pub fn _state(&self) -> usize { - let lock = self.raw.lock(); - lock.state.get() - } - - /// Returns the number of elements in the table - len(self) - pub fn __len__(&self) -> usize { - let lock = self.raw.lock(); - lock.table.len() - } - - /// Returns allocated memory size - sys.getsizeof(self) - pub fn __sizeof__(&self) -> usize { - let lock = self.raw.lock(); - - core::mem::size_of::() - + lock.table.capacity() * core::mem::size_of::() - + lock.entries.capacity() * (crate::HASHEDKEY_SIZE + crate::PYOBJECT_SIZE) - } - - /// Returns true if cache not empty - bool(self) - pub fn __bool__(&self) -> bool { - let lock = self.raw.lock(); - !lock.table.is_empty() - } - - /// Returns true if the cache have the key present - key in self - pub fn __contains__(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let lock = self.raw.lock(); - Ok(lock.contains_key(&hk)) - } - - /// Sets self\[key\] to value. - pub fn __setitem__( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - value: pyo3::PyObject, - ) -> pyo3::PyResult<()> { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - lock.insert(hk, value); - Ok(()) - } - - /// Returns self\[key\] - /// - /// Note: raises KeyError if key not found. - pub fn __getitem__( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.get(&hk) { - Some(val) => Ok(val.clone_ref(py)), - None => Err(err!(pyo3::exceptions::PyKeyError, hk.key)), - } - } - - /// Deletes self[key]. - /// - /// Note: raises KeyError if key not found. - pub fn __delitem__(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult<()> { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.remove(&hk) { - Some(_) => Ok(()), - None => Err(err!(pyo3::exceptions::PyKeyError, hk.key)), - } - } - - /// Returns repr(self) - pub fn __repr__(&self) -> String { - let lock = self.raw.lock(); - - format!( - "FIFOCache({} / {}, capacity={})", - lock.table.len(), - lock.maxsize.get(), - lock.table.capacity(), - ) - } - - /// Returns iter(self) - pub fn __iter__( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let lock = slf.raw.lock(); - let state = lock.state.get(); - let len = lock.table.len(); - - let result = fifocache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(lock.iter()), - typ: 0, - }; - - pyo3::Py::new(py, result) - } - - /// Supports == and != - pub fn __richcmp__( - slf: pyo3::PyRef<'_, Self>, - other: pyo3::PyRef<'_, Self>, - op: pyo3::class::basic::CompareOp, - ) -> pyo3::PyResult { - match op { - pyo3::class::basic::CompareOp::Eq => { - if slf.as_ptr() == other.as_ptr() { - return Ok(true); - } - - let (a1, a2) = (slf.raw.lock(), other.raw.lock()); - Ok(a1.eq(&a2)) - } - pyo3::class::basic::CompareOp::Ne => { - if slf.as_ptr() == other.as_ptr() { - return Ok(false); - } - - let (a1, a2) = (slf.raw.lock(), other.raw.lock()); - Ok(a1.ne(&a2)) - } - _ => Err(err!(pyo3::exceptions::PyNotImplementedError, ())), - } - } - - pub fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { - let lock = self.raw.lock(); - unsafe { - let state = lock.to_pickle(py)?; - Ok(pyo3::Py::from_owned_ptr(py, state)) - } - } - - pub fn __getnewargs__(&self) -> (usize,) { - (0,) - } - - pub fn __setstate__(&self, py: pyo3::Python<'_>, state: pyo3::PyObject) -> pyo3::PyResult<()> { - let mut lock = self.raw.lock(); - unsafe { lock.from_pickle(py, state.as_ptr()) } - } - - pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { - for (key, val) in self.raw.lock().entries.iter() { - visit.call(&key.key)?; - visit.call(val)?; - } - Ok(()) - } - - pub fn __clear__(&self) { - let mut lock = self.raw.lock(); - lock.table.clear(); - lock.entries.clear(); - } - - /// Returns the number of elements the map can hold without reallocating. - pub fn capacity(&self) -> usize { - let lock = self.raw.lock(); - lock.table.capacity() - } - - /// Equivalent directly to `len(self) == self.maxsize` - pub fn is_full(&self) -> bool { - let lock = self.raw.lock(); - lock.table.len() == lock.maxsize.get() - } - - /// Equivalent directly to `len(self) == 0` - pub fn is_empty(&self) -> bool { - let lock = self.raw.lock(); - lock.table.len() == 0 - } - - /// Equals to `self[key] = value`, but returns a value: - /// - /// - If the cache did not have this key present, None is returned. - /// - If the cache did have this key present, the value is updated, - /// and the old value is returned. The key is not updated, though; - pub fn insert( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - value: pyo3::PyObject, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - let op = lock.insert(hk, value); - Ok(op.unwrap_or_else(|| py.None())) - } - - /// Equals to `self[key]`, but returns `default` if the cache don't have this key present. - #[pyo3(signature = (key, default=None))] - pub fn get( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.get(&hk) { - Some(val) => Ok(val.clone_ref(py)), - None => Ok(default.unwrap_or_else(|| py.None())), - } - } - - /// Removes specified key and return the corresponding value. - /// - /// If the key is not found, returns the default - #[pyo3(signature = (key, default=None))] - pub fn pop( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.remove(&hk) { - Some((_, val)) => Ok(val), - None => Ok(default.unwrap_or_else(|| py.None())), - } - } - - /// Inserts key with a value of default if key is not in the cache. - /// - /// Return the value for key if key is in the cache, else default. - #[pyo3(signature=(key, default=None))] - pub fn setdefault( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - if let Some(x) = lock.get(&hk) { - return Ok(x.clone_ref(py)); - } - - let defval = default.unwrap_or_else(|| py.None()); - lock.insert(hk, defval.clone_ref(py)); - Ok(defval) - } - - /// Removes the element that has been in the cache the longest - pub fn popitem(&self) -> pyo3::PyResult<(pyo3::PyObject, pyo3::PyObject)> { - let mut lock = self.raw.lock(); - match lock.popitem() { - Some((key, val)) => Ok((key.key, val)), - None => Err(err!(pyo3::exceptions::PyKeyError, ())), - } - } - - /// Does the `popitem()` `n` times and returns count of removed items. - pub fn drain(&self, n: usize) -> usize { - let mut lock = self.raw.lock(); - - for c in 0..n { - if lock.popitem().is_none() { - return c; - } - } - - 0 - } - - /// Removes all items from cache. - /// - /// If reuse is True, will not free the memory for reusing in the future. - #[pyo3(signature=(*, reuse=false))] - pub fn clear(&self, reuse: bool) { - let mut lock = self.raw.lock(); - lock.table.clear(); - lock.entries.clear(); - lock.n_shifts = 0; - - if !reuse { - lock.shrink_to_fit(); - } - } - - /// Shrinks the cache to fit len(self) elements. - pub fn shrink_to_fit(&self) { - let mut lock = self.raw.lock(); - lock.shrink_to_fit(); - } - - /// Updates the cache with elements from a dictionary or an iterable object of key/value pairs. - pub fn update( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - iterable: pyo3::PyObject, - ) -> pyo3::PyResult<()> { - if slf.as_ptr() == iterable.as_ptr() { - return Ok(()); - } - - let mut lock = slf.raw.lock(); - lock.update(py, iterable) - } - - /// Returns an iterable object of the cache's items (key-value pairs). - /// - /// Notes: - /// - You should not make any changes in cache while using this iterable object. - pub fn items( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let lock = slf.raw.lock(); - let state = lock.state.get(); - let len = lock.table.len(); - - let result = fifocache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(lock.iter()), - typ: 2, - }; - - pyo3::Py::new(py, result) - } - - /// Returns an iterable object of the cache's keys. - /// - /// Notes: - /// - You should not make any changes in cache while using this iterable object. - pub fn keys( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let lock = slf.raw.lock(); - let state = lock.state.get(); - let len = lock.table.len(); - - let result = fifocache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(lock.iter()), - typ: 0, - }; - - pyo3::Py::new(py, result) - } - - /// Returns an iterable object of the cache's values. - /// - /// Notes: - /// - You should not make any changes in cache while using this iterable object. - pub fn values( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let lock = slf.raw.lock(); - let state = lock.state.get(); - let len = lock.table.len(); - - let result = fifocache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(lock.iter()), - typ: 1, - }; - - pyo3::Py::new(py, result) - } - - /// Returns the first key in cache; this is the one which will be removed by `popitem()` (if n == 0). - /// - /// By using `n` parameter, you can browse order index by index. - #[pyo3(signature=(n=0))] - pub fn first(&self, py: pyo3::Python<'_>, n: usize) -> Option { - let lock = self.raw.lock(); - if n == 0 { - lock.entries.front().map(|x| x.0.key.clone_ref(py)) - } else { - lock.entries.get(n).map(|x| x.0.key.clone_ref(py)) - } - } - - /// Returns the last key in cache. - pub fn last(&self, py: pyo3::Python<'_>) -> Option { - let lock = self.raw.lock(); - lock.entries.back().map(|x| x.0.key.clone_ref(py)) - } -} - -#[allow(non_camel_case_types)] -#[pyo3::pyclass(module = "cachebox._cachebox")] -pub struct fifocache_iterator { - ptr: _KeepForIter, - iter: crate::mutex::Mutex, - typ: u8, -} - -#[pyo3::pymethods] -impl fifocache_iterator { - pub fn __len__(&self) -> usize { - self.ptr.len - } - - pub fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { - slf - } - - #[allow(unused_mut)] - pub fn __next__( - mut slf: pyo3::PyRefMut<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { - slf.ptr.status(py)?; - - match slf.iter.lock().next() { - Some(ptr) => { - let (key, val) = unsafe { &*ptr }; - - match slf.typ { - 0 => Ok(key.key.clone_ref(py).into_ptr()), - 1 => Ok(val.clone_ref(py).into_ptr()), - 2 => { - tuple!( - py, - 2, - 0 => key.key.clone_ref(py).into_ptr(), - 1 => val.clone_ref(py).into_ptr(), - ) - } - _ => { - #[cfg(not(debug_assertions))] - unsafe { - core::hint::unreachable_unchecked() - }; - #[cfg(debug_assertions)] - unreachable!(); - } - } - } - None => Err(err!(pyo3::exceptions::PyStopIteration, ())), - } - } -} diff --git a/src/bridge/lfucache.rs b/src/bridge/lfucache.rs deleted file mode 100644 index 3028600..0000000 --- a/src/bridge/lfucache.rs +++ /dev/null @@ -1,514 +0,0 @@ -//! implement LFUCache, our lfu implementation - -use crate::{hashedkey::HashedKey, util::_KeepForIter}; - -/// LFU Cache implementation - Least frequantly used policy (thread-safe). -/// -/// In simple terms, the LFU cache will remove the element in the cache that has been accessed the least, regardless of time -#[pyo3::pyclass(module="cachebox._cachebox", extends=crate::bridge::baseimpl::BaseCacheImpl, frozen)] -pub struct LFUCache { - raw: crate::mutex::Mutex, -} - -#[pyo3::pymethods] -impl LFUCache { - /// LFU Cache implementation - Least frequantly used policy (thread-safe). - /// - /// By maxsize param, you can specify the limit size of the cache ( zero means infinity ); this is unchangable. - /// - /// By iterable param, you can create cache from a dict or an iterable. - /// - /// If capacity param is given, cache attempts to allocate a new hash table with at - /// least enough capacity for inserting the given number of elements without reallocating. - #[new] - #[pyo3(signature=(maxsize, iterable=None, *, capacity=0))] - pub fn __new__( - py: pyo3::Python<'_>, - maxsize: usize, - iterable: Option, - capacity: usize, - ) -> pyo3::PyResult<(Self, crate::bridge::baseimpl::BaseCacheImpl)> { - let mut raw = crate::internal::LFUPolicy::new(maxsize, capacity)?; - if iterable.is_some() { - raw.update(py, unsafe { iterable.unwrap_unchecked() })?; - } - - let self_ = Self { - raw: crate::mutex::Mutex::new(raw), - }; - Ok((self_, crate::bridge::baseimpl::BaseCacheImpl {})) - } - - /// Returns the cache maxsize - #[getter] - pub fn maxsize(&self) -> usize { - let lock = self.raw.lock(); - lock.maxsize.get() - } - - pub fn _state(&self) -> usize { - let lock = self.raw.lock(); - lock.state.get() - } - - /// Returns the number of elements in the table - len(self) - pub fn __len__(&self) -> usize { - let lock = self.raw.lock(); - lock.table.len() - } - - /// Returns allocated memory size - sys.getsizeof(self) - pub fn __sizeof__(&self) -> usize { - let lock = self.raw.lock(); - - core::mem::size_of::() - + lock.table.capacity() - * core::mem::size_of::< - core::ptr::NonNull< - crate::sorted_heap::Entry<(HashedKey, pyo3::PyObject, usize)>, - >, - >() - + lock.heap.capacity() - * core::mem::size_of::>( - ) - } - - /// Returns true if cache not empty - bool(self) - pub fn __bool__(&self) -> bool { - let lock = self.raw.lock(); - !lock.table.is_empty() - } - - /// Returns true if the cache have the key present - key in self - pub fn __contains__(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let lock = self.raw.lock(); - Ok(lock.contains_key(&hk)) - } - - /// Sets self\[key\] to value. - pub fn __setitem__( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - value: pyo3::PyObject, - ) -> pyo3::PyResult<()> { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - lock.insert(hk, value); - Ok(()) - } - - /// Returns self\[key\] - /// - /// Note: raises KeyError if key not found. - pub fn __getitem__( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.get(&hk) { - Some(val) => Ok(val.clone_ref(py)), - None => Err(err!(pyo3::exceptions::PyKeyError, hk.key)), - } - } - - /// Deletes self[key]. - /// - /// Note: raises KeyError if key not found. - pub fn __delitem__(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult<()> { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.remove(&hk) { - Some(_) => Ok(()), - None => Err(err!(pyo3::exceptions::PyKeyError, hk.key)), - } - } - - /// Returns repr(self) - pub fn __repr__(&self) -> String { - let lock = self.raw.lock(); - - format!( - "LFUCache({} / {}, capacity={})", - lock.table.len(), - lock.maxsize.get(), - lock.table.capacity(), - ) - } - - /// Returns iter(self) - pub fn __iter__( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let mut lock = slf.raw.lock(); - let (len, state) = (lock.table.len(), lock.state.get()); - - let result = lfucache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(lock.iter()), - typ: 0, - }; - - pyo3::Py::new(py, result) - } - - /// Supports == and != - pub fn __richcmp__( - slf: pyo3::PyRef<'_, Self>, - other: pyo3::PyRef<'_, Self>, - op: pyo3::class::basic::CompareOp, - ) -> pyo3::PyResult { - match op { - pyo3::class::basic::CompareOp::Eq => { - if slf.as_ptr() == other.as_ptr() { - return Ok(true); - } - - let (a1, a2) = (slf.raw.lock(), other.raw.lock()); - Ok(a1.eq(&a2)) - } - pyo3::class::basic::CompareOp::Ne => { - if slf.as_ptr() == other.as_ptr() { - return Ok(false); - } - - let (a1, a2) = (slf.raw.lock(), other.raw.lock()); - Ok(a1.ne(&a2)) - } - _ => Err(err!(pyo3::exceptions::PyNotImplementedError, ())), - } - } - - pub fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { - let mut lock = self.raw.lock(); - unsafe { - let state = lock.to_pickle(py)?; - Ok(pyo3::Py::from_owned_ptr(py, state)) - } - } - - pub fn __getnewargs__(&self) -> (usize,) { - (0,) - } - - pub fn __setstate__(&self, py: pyo3::Python<'_>, state: pyo3::PyObject) -> pyo3::PyResult<()> { - let mut lock = self.raw.lock(); - unsafe { lock.from_pickle(py, state.as_ptr()) } - } - - pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { - unsafe { - for bucket in self.raw.lock().table.iter() { - let node = bucket.as_ref(); - - visit.call(&(*node.as_ptr()).as_ref().0.key)?; - visit.call(&(*node.as_ptr()).as_ref().1)?; - } - } - - Ok(()) - } - - pub fn __clear__(&self) { - let mut lock = self.raw.lock(); - lock.table.clear(); - lock.heap.clear(); - } - - /// Returns the number of elements the map can hold without reallocating. - pub fn capacity(&self) -> usize { - let lock = self.raw.lock(); - lock.table.capacity() - } - - /// Equivalent directly to `len(self) == self.maxsize` - pub fn is_full(&self) -> bool { - let lock = self.raw.lock(); - lock.table.len() == lock.maxsize.get() - } - - /// Equivalent directly to `len(self) == 0` - pub fn is_empty(&self) -> bool { - let lock = self.raw.lock(); - lock.table.len() == 0 - } - - /// Equals to `self[key] = value`, but returns a value: - /// - /// - If the cache did not have this key present, None is returned. - /// - If the cache did have this key present, the value is updated, - /// and the old value is returned. The key is not updated, though; - pub fn insert( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - value: pyo3::PyObject, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - let op = lock.insert(hk, value); - Ok(op.unwrap_or_else(|| py.None())) - } - - /// Equals to `self[key]`, but returns `default` if the cache don't have this key present. - #[pyo3(signature = (key, default=None))] - pub fn get( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.get(&hk) { - Some(val) => Ok(val.clone_ref(py)), - None => Ok(default.unwrap_or_else(|| py.None())), - } - } - - /// Searches for a key-value in the cache and returns it (without moving the key to recently used). - #[pyo3(signature = (key, default=None))] - pub fn peek( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.peek(&hk) { - Some(val) => Ok(val.clone_ref(py)), - None => Ok(default.unwrap_or_else(|| py.None())), - } - } - - /// Removes specified key and return the corresponding value. - /// - /// If the key is not found, returns the default - #[pyo3(signature = (key, default=None))] - pub fn pop( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.remove(&hk) { - Some((_, val, _)) => Ok(val), - None => Ok(default.unwrap_or_else(|| py.None())), - } - } - - /// Inserts key with a value of default if key is not in the cache. - /// - /// Return the value for key if key is in the cache, else default. - #[pyo3(signature=(key, default=None))] - pub fn setdefault( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - if let Some(x) = lock.get(&hk) { - return Ok(x.clone_ref(py)); - } - - let defval = default.unwrap_or_else(|| py.None()); - lock.insert(hk, defval.clone_ref(py)); - Ok(defval) - } - - /// Removes the element that has been in the cache the longest - pub fn popitem(&self) -> pyo3::PyResult<(pyo3::PyObject, pyo3::PyObject)> { - let mut lock = self.raw.lock(); - match lock.popitem() { - Some((key, val, _)) => Ok((key.key, val)), - None => Err(err!(pyo3::exceptions::PyKeyError, ())), - } - } - - /// Does the `popitem()` `n` times and returns count of removed items. - pub fn drain(&self, n: usize) -> usize { - let mut lock = self.raw.lock(); - - for c in 0..n { - if lock.popitem().is_none() { - return c; - } - } - - 0 - } - - /// Removes all items from cache. - /// - /// If reuse is True, will not free the memory for reusing in the future. - #[pyo3(signature=(*, reuse=false))] - pub fn clear(&self, reuse: bool) { - let mut lock = self.raw.lock(); - lock.table.clear(); - lock.heap.clear(); - - if !reuse { - lock.shrink_to_fit(); - } - } - - /// Shrinks the cache to fit len(self) elements. - pub fn shrink_to_fit(&self) { - let mut lock = self.raw.lock(); - lock.shrink_to_fit(); - } - - /// Updates the cache with elements from a dictionary or an iterable object of key/value pairs. - pub fn update( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - iterable: pyo3::PyObject, - ) -> pyo3::PyResult<()> { - if slf.as_ptr() == iterable.as_ptr() { - return Ok(()); - } - - let mut lock = slf.raw.lock(); - lock.update(py, iterable) - } - - /// Returns an iterable object of the cache's items (key-value pairs). - /// - /// Notes: - /// - You should not make any changes in cache while using this iterable object. - pub fn items( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let mut lock = slf.raw.lock(); - let (len, state) = (lock.table.len(), lock.state.get()); - - let result = lfucache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(lock.iter()), - typ: 2, - }; - - pyo3::Py::new(py, result) - } - - /// Returns an iterable object of the cache's keys. - /// - /// Notes: - /// - You should not make any changes in cache while using this iterable object. - pub fn keys( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let mut lock = slf.raw.lock(); - let (len, state) = (lock.table.len(), lock.state.get()); - - let result = lfucache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(lock.iter()), - typ: 0, - }; - - pyo3::Py::new(py, result) - } - - /// Returns an iterable object of the cache's values. - /// - /// Notes: - /// - You should not make any changes in cache while using this iterable object. - pub fn values( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let mut lock = slf.raw.lock(); - let (len, state) = (lock.table.len(), lock.state.get()); - - let result = lfucache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(lock.iter()), - typ: 1, - }; - - pyo3::Py::new(py, result) - } - - /// Returns the key in the cache that has been accessed the least, regardless of time. - #[pyo3(signature=(n=0))] - pub fn least_frequently_used(&self, py: pyo3::Python<'_>, n: usize) -> Option { - let mut lock = self.raw.lock(); - lock.heap.sort(|a, b| a.2.cmp(&b.2)); - let node = lock.heap.get(n)?; - - Some(unsafe { (*node.as_ptr()).as_ref().0.key.clone_ref(py) }) - } -} - -#[allow(non_camel_case_types)] -#[pyo3::pyclass(module = "cachebox._cachebox")] -pub struct lfucache_iterator { - ptr: _KeepForIter, - iter: crate::mutex::Mutex>, - typ: u8, -} - -#[pyo3::pymethods] -impl lfucache_iterator { - pub fn __len__(&self) -> usize { - self.ptr.len - } - - pub fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { - slf - } - - #[allow(unused_mut)] - pub fn __next__( - mut slf: pyo3::PyRefMut<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { - slf.ptr.status(py)?; - - match slf.iter.lock().next() { - Some(ptr) => { - let node = unsafe { &*ptr.as_ptr() }; - - match slf.typ { - 0 => Ok(node.as_ref().0.key.clone_ref(py).into_ptr()), - 1 => Ok(node.as_ref().1.clone_ref(py).into_ptr()), - 2 => { - tuple!( - py, - 2, - 0 => node.as_ref().0.key.clone_ref(py).into_ptr(), - 1 => node.as_ref().1.clone_ref(py).into_ptr(), - ) - } - _ => { - #[cfg(not(debug_assertions))] - unsafe { - core::hint::unreachable_unchecked() - }; - #[cfg(debug_assertions)] - unreachable!(); - } - } - } - None => Err(err!(pyo3::exceptions::PyStopIteration, ())), - } - } -} diff --git a/src/bridge/lrucache.rs b/src/bridge/lrucache.rs deleted file mode 100644 index 5f7effd..0000000 --- a/src/bridge/lrucache.rs +++ /dev/null @@ -1,532 +0,0 @@ -//! implement LRUCache, our lru implementation - -use crate::{hashedkey::HashedKey, util::_KeepForIter}; - -/// LRU Cache implementation - Least recently used policy (thread-safe). -/// -/// In simple terms, the LRU cache will remove the element in the cache that has not been accessed in the longest time. -#[pyo3::pyclass(module="cachebox._cachebox", extends=crate::bridge::baseimpl::BaseCacheImpl, frozen)] -pub struct LRUCache { - raw: crate::mutex::Mutex, -} - -#[pyo3::pymethods] -impl LRUCache { - /// LRU Cache implementation - Least recently used policy (thread-safe). - /// - /// By maxsize param, you can specify the limit size of the cache ( zero means infinity ); this is unchangable. - /// - /// By iterable param, you can create cache from a dict or an iterable. - /// - /// If capacity param is given, cache attempts to allocate a new hash table with at - /// least enough capacity for inserting the given number of elements without reallocating. - #[new] - #[pyo3(signature=(maxsize, iterable=None, *, capacity=0))] - pub fn __new__( - py: pyo3::Python<'_>, - maxsize: usize, - iterable: Option, - capacity: usize, - ) -> pyo3::PyResult<(Self, crate::bridge::baseimpl::BaseCacheImpl)> { - let mut raw = crate::internal::LRUPolicy::new(maxsize, capacity)?; - if iterable.is_some() { - raw.update(py, unsafe { iterable.unwrap_unchecked() })?; - } - - let self_ = Self { - raw: crate::mutex::Mutex::new(raw), - }; - Ok((self_, crate::bridge::baseimpl::BaseCacheImpl {})) - } - - /// Returns the cache maxsize - #[getter] - pub fn maxsize(&self) -> usize { - let lock = self.raw.lock(); - lock.maxsize.get() - } - - pub fn _state(&self) -> usize { - let lock = self.raw.lock(); - lock.state.get() - } - - /// Returns the number of elements in the table - len(self) - pub fn __len__(&self) -> usize { - let lock = self.raw.lock(); - lock.table.len() - } - - /// Returns allocated memory size - sys.getsizeof(self) - pub fn __sizeof__(&self) -> usize { - let lock = self.raw.lock(); - - core::mem::size_of::() - + lock.table.capacity() - * core::mem::size_of::>() - + lock.list.len() * core::mem::size_of::() - } - - /// Returns true if cache not empty - bool(self) - pub fn __bool__(&self) -> bool { - let lock = self.raw.lock(); - !lock.table.is_empty() - } - - /// Returns true if the cache have the key present - key in self - pub fn __contains__(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let lock = self.raw.lock(); - Ok(lock.contains_key(&hk)) - } - - /// Sets self\[key\] to value. - pub fn __setitem__( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - value: pyo3::PyObject, - ) -> pyo3::PyResult<()> { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - lock.insert(hk, value); - Ok(()) - } - - /// Returns self\[key\] - /// - /// Note: raises KeyError if key not found. - pub fn __getitem__( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.get(&hk) { - Some(val) => Ok(val.clone_ref(py)), - None => Err(err!(pyo3::exceptions::PyKeyError, hk.key)), - } - } - - /// Deletes self[key]. - /// - /// Note: raises KeyError if key not found. - pub fn __delitem__(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult<()> { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.remove(&hk) { - Some(_) => Ok(()), - None => Err(err!(pyo3::exceptions::PyKeyError, hk.key)), - } - } - - /// Returns repr(self) - pub fn __repr__(&self) -> String { - let lock = self.raw.lock(); - - format!( - "LRUCache({} / {}, capacity={})", - lock.table.len(), - lock.maxsize.get(), - lock.table.capacity(), - ) - } - - /// Returns iter(self) - pub fn __iter__( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let lock = slf.raw.lock(); - let (len, state) = (lock.table.len(), lock.state.get()); - - let result = lrucache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(lock.list.iter()), - typ: 0, - }; - - pyo3::Py::new(py, result) - } - - /// Supports == and != - pub fn __richcmp__( - slf: pyo3::PyRef<'_, Self>, - other: pyo3::PyRef<'_, Self>, - op: pyo3::class::basic::CompareOp, - ) -> pyo3::PyResult { - match op { - pyo3::class::basic::CompareOp::Eq => { - if slf.as_ptr() == other.as_ptr() { - return Ok(true); - } - - let (a1, a2) = (slf.raw.lock(), other.raw.lock()); - Ok(a1.eq(&a2)) - } - pyo3::class::basic::CompareOp::Ne => { - if slf.as_ptr() == other.as_ptr() { - return Ok(false); - } - - let (a1, a2) = (slf.raw.lock(), other.raw.lock()); - Ok(a1.ne(&a2)) - } - _ => Err(err!(pyo3::exceptions::PyNotImplementedError, ())), - } - } - - pub fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { - let lock = self.raw.lock(); - unsafe { - let state = lock.to_pickle(py)?; - Ok(pyo3::Py::from_owned_ptr(py, state)) - } - } - - pub fn __getnewargs__(&self) -> (usize,) { - (0,) - } - - pub fn __setstate__(&self, py: pyo3::Python<'_>, state: pyo3::PyObject) -> pyo3::PyResult<()> { - let mut lock = self.raw.lock(); - unsafe { lock.from_pickle(py, state.as_ptr()) } - } - - pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { - unsafe { - for bucket in self.raw.lock().table.iter() { - let node = bucket.as_ref(); - - visit.call(&(*node.as_ptr()).element.0.key)?; - visit.call(&(*node.as_ptr()).element.1)?; - } - } - - Ok(()) - } - - pub fn __clear__(&self) { - let mut lock = self.raw.lock(); - lock.table.clear(); - lock.list.clear(); - } - - /// Returns the number of elements the map can hold without reallocating. - pub fn capacity(&self) -> usize { - let lock = self.raw.lock(); - lock.table.capacity() - } - - /// Equivalent directly to `len(self) == self.maxsize` - pub fn is_full(&self) -> bool { - let lock = self.raw.lock(); - lock.table.len() == lock.maxsize.get() - } - - /// Equivalent directly to `len(self) == 0` - pub fn is_empty(&self) -> bool { - let lock = self.raw.lock(); - lock.table.len() == 0 - } - - /// Equals to `self[key] = value`, but returns a value: - /// - /// - If the cache did not have this key present, None is returned. - /// - If the cache did have this key present, the value is updated, - /// and the old value is returned. The key is not updated, though; - pub fn insert( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - value: pyo3::PyObject, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - let op = lock.insert(hk, value); - Ok(op.unwrap_or_else(|| py.None())) - } - - /// Equals to `self[key]`, but returns `default` if the cache don't have this key present. - #[pyo3(signature = (key, default=None))] - pub fn get( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.get(&hk) { - Some(val) => Ok(val.clone_ref(py)), - None => Ok(default.unwrap_or_else(|| py.None())), - } - } - - /// Searches for a key-value in the cache and returns it (without moving the key to recently used). - #[pyo3(signature = (key, default=None))] - pub fn peek( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.peek(&hk) { - Some(val) => Ok(val.clone_ref(py)), - None => Ok(default.unwrap_or_else(|| py.None())), - } - } - - /// Removes specified key and return the corresponding value. - /// - /// If the key is not found, returns the default - #[pyo3(signature = (key, default=None))] - pub fn pop( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.remove(&hk) { - Some((_, val)) => Ok(val), - None => Ok(default.unwrap_or_else(|| py.None())), - } - } - - /// Inserts key with a value of default if key is not in the cache. - /// - /// Return the value for key if key is in the cache, else default. - #[pyo3(signature=(key, default=None))] - pub fn setdefault( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - if let Some(x) = lock.get(&hk) { - return Ok(x.clone_ref(py)); - } - - let defval = default.unwrap_or_else(|| py.None()); - lock.insert(hk, defval.clone_ref(py)); - Ok(defval) - } - - /// Removes the element that has been in the cache the longest - pub fn popitem(&self) -> pyo3::PyResult<(pyo3::PyObject, pyo3::PyObject)> { - let mut lock = self.raw.lock(); - match lock.popitem() { - Some((key, val)) => Ok((key.key, val)), - None => Err(err!(pyo3::exceptions::PyKeyError, ())), - } - } - - /// Does the `popitem()` `n` times and returns count of removed items. - pub fn drain(&self, n: usize) -> usize { - let mut lock = self.raw.lock(); - - for c in 0..n { - if lock.popitem().is_none() { - return c; - } - } - - 0 - } - - /// Removes all items from cache. - /// - /// If reuse is True, will not free the memory for reusing in the future. - #[pyo3(signature=(*, reuse=false))] - pub fn clear(&self, reuse: bool) { - let mut lock = self.raw.lock(); - lock.table.clear(); - lock.list.clear(); - - if !reuse { - lock.shrink_to_fit(); - } - } - - /// Shrinks the cache to fit len(self) elements. - pub fn shrink_to_fit(&self) { - let mut lock = self.raw.lock(); - lock.shrink_to_fit(); - } - - /// Updates the cache with elements from a dictionary or an iterable object of key/value pairs. - pub fn update( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - iterable: pyo3::PyObject, - ) -> pyo3::PyResult<()> { - if slf.as_ptr() == iterable.as_ptr() { - return Ok(()); - } - - let mut lock = slf.raw.lock(); - lock.update(py, iterable) - } - - /// Returns an iterable object of the cache's items (key-value pairs). - /// - /// Notes: - /// - You should not make any changes in cache while using this iterable object. - pub fn items( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let lock = slf.raw.lock(); - let (len, state) = (lock.table.len(), lock.state.get()); - - let result = lrucache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(lock.list.iter()), - typ: 2, - }; - - pyo3::Py::new(py, result) - } - - /// Returns an iterable object of the cache's keys. - /// - /// Notes: - /// - You should not make any changes in cache while using this iterable object. - pub fn keys( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let lock = slf.raw.lock(); - let (len, state) = (lock.table.len(), lock.state.get()); - - let result = lrucache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(lock.list.iter()), - typ: 0, - }; - - pyo3::Py::new(py, result) - } - - /// Returns an iterable object of the cache's values. - /// - /// Notes: - /// - You should not make any changes in cache while using this iterable object. - pub fn values( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let lock = slf.raw.lock(); - let (len, state) = (lock.table.len(), lock.state.get()); - - let result = lrucache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(lock.list.iter()), - typ: 1, - }; - - pyo3::Py::new(py, result) - } - - /// Returns the key in the cache that has not been accessed in the longest time. - #[pyo3(signature=(n=0))] - pub fn least_recently_used( - &self, - py: pyo3::Python<'_>, - mut n: usize, - ) -> Option { - let lock = self.raw.lock(); - - if n >= lock.list.len() { - None - } else { - let mut node = lock.list.head?; - - unsafe { - while n > 0 { - debug_assert!((*node.as_ptr()).next.is_some()); // we checked length, so it have to available - node = (*node.as_ptr()).next.unwrap(); - n -= 1; - } - - Some((*node.as_ptr()).element.0.key.clone_ref(py)) - } - } - } - - /// Returns the key in the cache that has been accessed in the shortest time. - pub fn most_recently_used(&self, py: pyo3::Python<'_>) -> Option { - let lock = self.raw.lock(); - lock.list - .tail - .map(|node| unsafe { (*node.as_ptr()).element.0.key.clone_ref(py) }) - } -} - -#[allow(non_camel_case_types)] -#[pyo3::pyclass(module = "cachebox._cachebox")] -pub struct lrucache_iterator { - ptr: _KeepForIter, - iter: crate::mutex::Mutex, - typ: u8, -} - -#[pyo3::pymethods] -impl lrucache_iterator { - pub fn __len__(&self) -> usize { - self.ptr.len - } - - pub fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { - slf - } - - #[allow(unused_mut)] - pub fn __next__( - mut slf: pyo3::PyRefMut<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { - slf.ptr.status(py)?; - - match slf.iter.lock().next() { - Some(ptr) => { - let (key, val) = unsafe { &(*ptr.as_ptr()).element }; - - match slf.typ { - 0 => Ok(key.key.clone_ref(py).into_ptr()), - 1 => Ok(val.clone_ref(py).into_ptr()), - 2 => { - tuple!( - py, - 2, - 0 => key.key.clone_ref(py).into_ptr(), - 1 => val.clone_ref(py).into_ptr(), - ) - } - _ => { - #[cfg(not(debug_assertions))] - unsafe { - core::hint::unreachable_unchecked() - }; - #[cfg(debug_assertions)] - unreachable!(); - } - } - } - None => Err(err!(pyo3::exceptions::PyStopIteration, ())), - } - } -} diff --git a/src/bridge/mod.rs b/src/bridge/mod.rs deleted file mode 100644 index a395999..0000000 --- a/src/bridge/mod.rs +++ /dev/null @@ -1,10 +0,0 @@ -//! bridge Rust cache implementations to Python. - -pub mod baseimpl; -pub mod cache; -pub mod fifocache; -pub mod lfucache; -pub mod lrucache; -pub mod rrcache; -pub mod ttlcache; -pub mod vttlcache; diff --git a/src/bridge/rrcache.rs b/src/bridge/rrcache.rs deleted file mode 100644 index 32108bc..0000000 --- a/src/bridge/rrcache.rs +++ /dev/null @@ -1,492 +0,0 @@ -//! implement RRCache, random replacement policy - -use crate::bridge::cache::cache_iterator; -use crate::hashedkey::HashedKey; -use crate::util::_KeepForIter; - -macro_rules! popitem_rr { - ($lock:expr) => { - if $lock.table.is_empty() { - Err(err!(pyo3::exceptions::PyNotImplementedError, ())) - } else { - let nth = fastrand::usize(0..$lock.table.len()); - - #[cfg(debug_assertions)] - let bucket = unsafe { $lock.table.iter().nth(nth).unwrap() }; - - #[cfg(not(debug_assertions))] - let bucket = unsafe { $lock.table.iter().nth(nth).unwrap_unchecked() }; - - let (h, _) = unsafe { $lock.table.remove(bucket) }; - - Ok(h) - } - }; -} - -macro_rules! insert_rr { - ($lock:expr, $hk:expr, $value:expr) => {{ - if $lock.table.len() >= $lock.maxsize.get() - && $lock.table.find($hk.hash, |x| x.0 == $hk).is_none() - { - #[allow(unused_unsafe)] - unsafe { - popitem_rr!($lock).unwrap_unchecked() - }; - } - - unsafe { $lock.insert_unchecked($hk, $value) } - }}; -} - -/// RRCache implementation - Random Replacement policy (thread-safe). -/// -/// In simple terms, the RR cache will choice randomly element to remove it to make space when necessary. -#[pyo3::pyclass(module="cachebox._cachebox", extends=crate::bridge::baseimpl::BaseCacheImpl, frozen)] -pub struct RRCache { - raw: crate::mutex::Mutex, -} - -#[pyo3::pymethods] -impl RRCache { - /// By maxsize param, you can specify the limit size of the cache ( zero means infinity ); this is unchangable. - /// - /// By iterable param, you can create cache from a dict or an iterable. - /// - /// If capacity param is given, cache attempts to allocate a new hash table with at - /// least enough capacity for inserting the given number of elements without reallocating. - #[new] - #[pyo3(signature=(maxsize, iterable=None, *, capacity=0))] - pub fn __new__( - py: pyo3::Python<'_>, - maxsize: usize, - iterable: Option, - capacity: usize, - ) -> pyo3::PyResult<(Self, crate::bridge::baseimpl::BaseCacheImpl)> { - let mut raw = crate::internal::NoPolicy::new(maxsize, capacity)?; - if iterable.is_some() { - raw.update(py, unsafe { iterable.unwrap_unchecked() })?; - } - - let self_ = Self { - raw: crate::mutex::Mutex::new(raw), - }; - Ok((self_, crate::bridge::baseimpl::BaseCacheImpl {})) - } - - /// Returns the cache maxsize - #[getter] - pub fn maxsize(&self) -> usize { - let lock = self.raw.lock(); - lock.maxsize.get() - } - - pub fn _state(&self) -> usize { - let lock = self.raw.lock(); - lock.state.get() - } - - /// Returns the number of elements in the table - len(self) - pub fn __len__(&self) -> usize { - let lock = self.raw.lock(); - lock.table.len() - } - - /// Returns allocated memory size - sys.getsizeof(self) - pub fn __sizeof__(&self) -> usize { - let lock = self.raw.lock(); - let cap = lock.table.capacity(); - - core::mem::size_of::() + cap * (crate::HASHEDKEY_SIZE + crate::PYOBJECT_SIZE) - } - - /// Returns true if cache not empty - bool(self) - pub fn __bool__(&self) -> bool { - let lock = self.raw.lock(); - !lock.table.is_empty() - } - - /// Returns true if the cache have the key present - key in self - pub fn __contains__(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let lock = self.raw.lock(); - Ok(lock.contains_key(&hk)) - } - - /// Sets self\[key\] to value. - /// - /// Note: raises OverflowError if the cache reached the maxsize limit, - /// because this class does not have any algorithm. - pub fn __setitem__( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - value: pyo3::PyObject, - ) -> pyo3::PyResult<()> { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - insert_rr!(lock, hk, value); - Ok(()) - } - - /// Returns self\[key\] - /// - /// Note: raises KeyError if key not found. - pub fn __getitem__( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.get(&hk) { - Some(val) => Ok(val.clone_ref(py)), - None => Err(err!(pyo3::exceptions::PyKeyError, hk.key)), - } - } - - /// Deletes self[key]. - /// - /// Note: raises KeyError if key not found. - pub fn __delitem__(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult<()> { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.remove(&hk) { - Some(_) => Ok(()), - None => Err(err!(pyo3::exceptions::PyKeyError, hk.key)), - } - } - - /// Returns repr(self) - pub fn __repr__(&self) -> String { - let lock = self.raw.lock(); - - format!( - "RRCache({} / {}, capacity={})", - lock.table.len(), - lock.maxsize.get(), - lock.table.capacity(), - ) - } - - /// Returns iter(self) - pub fn __iter__( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let lock = slf.raw.lock(); - let (len, state) = (lock.table.len(), lock.state.get()); - let iter = unsafe { lock.table.iter() }; - - let result = cache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(iter), - typ: 0, - }; - - pyo3::Py::new(py, result) - } - - /// Supports == and != - pub fn __richcmp__( - slf: pyo3::PyRef<'_, Self>, - other: pyo3::PyRef<'_, Self>, - op: pyo3::class::basic::CompareOp, - ) -> pyo3::PyResult { - match op { - pyo3::class::basic::CompareOp::Eq => { - if slf.as_ptr() == other.as_ptr() { - return Ok(true); - } - - let (a1, a2) = (slf.raw.lock(), other.raw.lock()); - Ok(a1.eq(&a2)) - } - pyo3::class::basic::CompareOp::Ne => { - if slf.as_ptr() == other.as_ptr() { - return Ok(false); - } - - let (a1, a2) = (slf.raw.lock(), other.raw.lock()); - Ok(a1.ne(&a2)) - } - _ => Err(err!(pyo3::exceptions::PyNotImplementedError, ())), - } - } - - pub fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { - let lock = self.raw.lock(); - unsafe { - let state = lock.to_pickle(py)?; - Ok(pyo3::Py::from_owned_ptr(py, state)) - } - } - - pub fn __getnewargs__(&self) -> (usize,) { - (0,) - } - - pub fn __setstate__(&self, py: pyo3::Python<'_>, state: pyo3::PyObject) -> pyo3::PyResult<()> { - let mut lock = self.raw.lock(); - unsafe { lock.from_pickle(py, state.as_ptr()) } - } - - pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { - for value in unsafe { self.raw.lock().table.iter() } { - let (key, value) = unsafe { value.as_ref() }; - visit.call(&key.key)?; - visit.call(value)?; - } - Ok(()) - } - - pub fn __clear__(&self) { - let mut lock = self.raw.lock(); - lock.table.clear() - } - - /// Returns the number of elements the map can hold without reallocating. - pub fn capacity(&self) -> usize { - let lock = self.raw.lock(); - lock.table.capacity() - } - - /// Equivalent directly to `len(self) == self.maxsize` - pub fn is_full(&self) -> bool { - let lock = self.raw.lock(); - lock.table.len() == lock.maxsize.get() - } - - /// Equivalent directly to `len(self) == 0` - pub fn is_empty(&self) -> bool { - let lock = self.raw.lock(); - lock.table.len() == 0 - } - - /// Equals to `self[key] = value`, but returns a value: - /// - /// - If the cache did not have this key present, None is returned. - /// - If the cache did have this key present, the value is updated, - /// and the old value is returned. The key is not updated, though; - /// - /// Note: raises `OverflowError` if the cache reached the maxsize limit, - /// because this class does not have any algorithm. - pub fn insert( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - value: pyo3::PyObject, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - let op = insert_rr!(lock, hk, value); - Ok(op.unwrap_or_else(|| py.None())) - } - - /// Equals to `self[key]`, but returns `default` if the cache don't have this key present. - #[pyo3(signature = (key, default=None))] - pub fn get( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.get(&hk) { - Some(val) => Ok(val.clone_ref(py)), - None => Ok(default.unwrap_or_else(|| py.None())), - } - } - - /// Removes specified key and return the corresponding value. - /// - /// If the key is not found, returns the default - #[pyo3(signature = (key, default=None))] - pub fn pop( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.remove(&hk) { - Some((_, val)) => Ok(val), - None => Ok(default.unwrap_or_else(|| py.None())), - } - } - - /// Inserts key with a value of default if key is not in the cache. - /// - /// Return the value for key if key is in the cache, else default. - #[pyo3(signature=(key, default=None))] - pub fn setdefault( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - if let Some(x) = lock.get(&hk) { - return Ok(x.clone_ref(py)); - } - - let defval = default.unwrap_or_else(|| py.None()); - insert_rr!(lock, hk, defval.clone_ref(py)); - Ok(defval) - } - - /// Removes and returns an element from cache, completely randomly. - pub fn popitem(&self) -> pyo3::PyResult<(pyo3::PyObject, pyo3::PyObject)> { - let mut lock = self.raw.lock(); - let (hk, val) = popitem_rr!(lock)?; - Ok((hk.key, val)) - } - - /// Does the `popitem()` `n` times and returns count of removed items. - pub fn drain(&self, n: usize) -> usize { - let mut lock = self.raw.lock(); - - for c in 0..n { - if popitem_rr!(lock).is_err() { - return c; - } - } - - 0 - } - - /// Removes all items from cache. - /// - /// If reuse is True, will not free the memory for reusing in the future. - #[pyo3(signature=(*, reuse=false))] - pub fn clear(&self, reuse: bool) { - let mut lock = self.raw.lock(); - lock.table.clear(); - - if !reuse { - lock.table.shrink_to(0, |x| x.0.hash); - } - } - - /// Shrinks the cache to fit len(self) elements. - pub fn shrink_to_fit(&self) { - let mut lock = self.raw.lock(); - lock.table.shrink_to(0, |x| x.0.hash); - lock.state.change(); - } - - /// Updates the cache with elements from a dictionary or an iterable object of key/value pairs. - /// - /// Note: raises `OverflowError` if the cache reached the maxsize limit. - pub fn update( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - iterable: pyo3::PyObject, - ) -> pyo3::PyResult<()> { - use pyo3::types::{PyAnyMethods, PyDictMethods}; - - if slf.as_ptr() == iterable.as_ptr() { - return Ok(()); - } - - let mut lock = slf.raw.lock(); - - if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { - let dict = unsafe { - iterable - .downcast_bound::(py) - .unwrap_unchecked() - }; - - for (key, value) in dict.iter() { - let hk = unsafe { HashedKey::from_pyobject(py, key.unbind()).unwrap_unchecked() }; - insert_rr!(lock, hk, value.unbind()); - } - - Ok(()) - } else { - for pair in iterable.bind(py).try_iter()? { - let (key, value) = pair?.extract::<(pyo3::PyObject, pyo3::PyObject)>()?; - - let hk = HashedKey::from_pyobject(py, key)?; - insert_rr!(lock, hk, value); - } - - Ok(()) - } - } - - /// Returns an iterable object of the cache's items (key-value pairs). - /// - /// Notes: - /// - You should not make any changes in cache while using this iterable object. - /// - Items are not ordered. - pub fn items( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let lock = slf.raw.lock(); - let (len, state) = (lock.table.len(), lock.state.get()); - let iter = unsafe { lock.table.iter() }; - - let result = cache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(iter), - typ: 2, - }; - - pyo3::Py::new(py, result) - } - - /// Returns an iterable object of the cache's keys. - /// - /// Notes: - /// - You should not make any changes in cache while using this iterable object. - /// - Keys are not ordered. - pub fn keys( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let lock = slf.raw.lock(); - let (len, state) = (lock.table.len(), lock.state.get()); - let iter = unsafe { lock.table.iter() }; - - let result = cache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(iter), - typ: 0, - }; - - pyo3::Py::new(py, result) - } - - /// Returns an iterable object of the cache's values. - /// - /// Notes: - /// - You should not make any changes in cache while using this iterable object. - /// - Values are not ordered. - pub fn values( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let lock = slf.raw.lock(); - let (len, state) = (lock.table.len(), lock.state.get()); - let iter = unsafe { lock.table.iter() }; - - let result = cache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(iter), - typ: 1, - }; - - pyo3::Py::new(py, result) - } -} diff --git a/src/bridge/ttlcache.rs b/src/bridge/ttlcache.rs deleted file mode 100644 index cf9d553..0000000 --- a/src/bridge/ttlcache.rs +++ /dev/null @@ -1,619 +0,0 @@ -//! implement TTLCache, our ttl implementation - -use crate::{hashedkey::HashedKey, internal::TTLElement, util::_KeepForIter}; - -/// TTL Cache implementation - Time-To-Live Policy (thread-safe). -/// -/// In simple terms, the TTL cache will automatically remove the element in the cache that has expired:: -#[pyo3::pyclass(module="cachebox._cachebox", extends=crate::bridge::baseimpl::BaseCacheImpl, frozen)] -pub struct TTLCache { - raw: crate::mutex::Mutex, -} - -#[pyo3::pymethods] -impl TTLCache { - /// TTL Cache implementation - First-In First-Out Policy (thread-safe). - /// - /// By maxsize param, you can specify the limit size of the cache ( zero means infinity ); this is unchangable. - /// - /// The ttl param specifies the time-to-live value for each element in cache (in seconds); cannot be zero or negative. - /// - /// By iterable param, you can create cache from a dict or an iterable. - /// - /// If capacity param is given, cache attempts to allocate a new hash table with at - /// least enough capacity for inserting the given number of elements without reallocating. - #[new] - #[pyo3(signature=(maxsize, ttl, iterable=None, *, capacity=0))] - pub fn __new__( - py: pyo3::Python<'_>, - maxsize: usize, - ttl: f64, - iterable: Option, - capacity: usize, - ) -> pyo3::PyResult<(Self, crate::bridge::baseimpl::BaseCacheImpl)> { - if ttl <= 0.0 { - return Err(err!( - pyo3::exceptions::PyValueError, - "ttl cannot be zero or negative" - )); - } - - let mut raw = crate::internal::TTLPolicy::new(maxsize, capacity, ttl)?; - if iterable.is_some() { - raw.update(py, unsafe { iterable.unwrap_unchecked() })?; - } - - let self_ = Self { - raw: crate::mutex::Mutex::new(raw), - }; - Ok((self_, crate::bridge::baseimpl::BaseCacheImpl {})) - } - - /// Returns the cache maxsize - #[getter] - pub fn maxsize(&self) -> usize { - let lock = self.raw.lock(); - lock.maxsize.get() - } - - pub fn _state(&self) -> usize { - let lock = self.raw.lock(); - lock.state.get() - } - - /// Returns the cache ttl - #[getter] - pub fn ttl(&self) -> f64 { - let lock = self.raw.lock(); - lock.ttl.as_secs_f64() - } - - /// Returns the number of elements in the table - len(self) - pub fn __len__(&self) -> usize { - let mut lock = self.raw.lock(); - lock.expire(); - lock.table.len() - } - - /// Returns allocated memory size - sys.getsizeof(self) - pub fn __sizeof__(&self) -> usize { - let lock = self.raw.lock(); - - core::mem::size_of::() - + lock.table.capacity() * core::mem::size_of::() - + lock.entries.capacity() * (crate::HASHEDKEY_SIZE + crate::PYOBJECT_SIZE) - } - - /// Returns true if cache not empty - bool(self) - pub fn __bool__(&self) -> bool { - let mut lock = self.raw.lock(); - lock.expire(); - !lock.table.is_empty() - } - - /// Returns true if the cache have the key present - key in self - pub fn __contains__(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let lock = self.raw.lock(); - Ok(lock.contains_key(&hk)) - } - - /// Sets self\[key\] to value. - pub fn __setitem__( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - value: pyo3::PyObject, - ) -> pyo3::PyResult<()> { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - lock.insert(hk, value, true); - Ok(()) - } - - /// Returns self\[key\] - /// - /// Note: raises KeyError if key not found. - pub fn __getitem__( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.get(&hk) { - Some(val) => Ok(val.value.clone_ref(py)), - None => Err(err!(pyo3::exceptions::PyKeyError, hk.key)), - } - } - - /// Deletes self[key]. - /// - /// Note: raises KeyError if key not found. - pub fn __delitem__(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult<()> { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.remove(&hk) { - Some(_) => Ok(()), - None => Err(err!(pyo3::exceptions::PyKeyError, hk.key)), - } - } - - /// Returns repr(self) - pub fn __repr__(&self) -> String { - let mut lock = self.raw.lock(); - lock.expire(); - - format!( - "TTLCache({} / {}, ttl={}, capacity={})", - lock.table.len(), - lock.maxsize.get(), - lock.ttl.as_secs_f64(), - lock.table.capacity(), - ) - } - - /// Returns `iter(self)` - /// - /// Notes: - /// - You should not make any changes in cache while using this iterable object. - /// - Don't call `len(cache)`, `bool(cache)`, `cache.is_full()` or `cache.is_empty()` while using this iterable object. - pub fn __iter__( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let mut lock = slf.raw.lock(); - - lock.expire(); - - let (len, state) = (lock.table.len(), lock.state.get()); - - let result = ttlcache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(lock.as_ptr()), - typ: 0, - }; - - pyo3::Py::new(py, result) - } - - /// Supports == and != - pub fn __richcmp__( - slf: pyo3::PyRef<'_, Self>, - other: pyo3::PyRef<'_, Self>, - op: pyo3::class::basic::CompareOp, - ) -> pyo3::PyResult { - match op { - pyo3::class::basic::CompareOp::Eq => { - if slf.as_ptr() == other.as_ptr() { - return Ok(true); - } - - let (a1, a2) = (slf.raw.lock(), other.raw.lock()); - Ok(a1.eq(&a2)) - } - pyo3::class::basic::CompareOp::Ne => { - if slf.as_ptr() == other.as_ptr() { - return Ok(false); - } - - let (a1, a2) = (slf.raw.lock(), other.raw.lock()); - Ok(a1.ne(&a2)) - } - _ => Err(err!(pyo3::exceptions::PyNotImplementedError, ())), - } - } - - pub fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { - let lock = self.raw.lock(); - - unsafe { - let state = lock.to_pickle(py)?; - Ok(pyo3::Py::from_owned_ptr(py, state)) - } - } - - pub fn __getnewargs__(&self) -> (usize, f64) { - (0, 1.0) - } - - pub fn __setstate__(&self, py: pyo3::Python<'_>, state: pyo3::PyObject) -> pyo3::PyResult<()> { - let mut lock = self.raw.lock(); - unsafe { lock.from_pickle(py, state.as_ptr()) } - } - - pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { - for element in self.raw.lock().entries.iter() { - visit.call(&element.key.key)?; - visit.call(&element.value)?; - } - Ok(()) - } - - pub fn __clear__(&self) { - let mut lock = self.raw.lock(); - lock.table.clear(); - lock.entries.clear(); - } - - /// Returns the number of elements the map can hold without reallocating. - pub fn capacity(&self) -> usize { - let lock = self.raw.lock(); - lock.table.capacity() - } - - /// Equivalent directly to `len(self) == self.maxsize` - pub fn is_full(&self) -> bool { - let mut lock = self.raw.lock(); - lock.expire(); - lock.table.len() == lock.maxsize.get() - } - - /// Equivalent directly to `len(self) == 0` - pub fn is_empty(&self) -> bool { - let mut lock = self.raw.lock(); - lock.expire(); - lock.table.len() == 0 - } - - /// Equals to `self[key] = value`, but returns a value: - /// - /// - If the cache did not have this key present, None is returned. - /// - If the cache did have this key present, the value is updated, - /// and the old value is returned. The key is not updated, though; - pub fn insert( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - value: pyo3::PyObject, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - let op = lock.insert(hk, value, true); - Ok(op.unwrap_or_else(|| py.None())) - } - - /// Equals to `self[key]`, but returns `default` if the cache don't have this key present. - #[pyo3(signature = (key, default=None))] - pub fn get( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.get(&hk) { - Some(val) => Ok(val.value.clone_ref(py)), - None => Ok(default.unwrap_or_else(|| py.None())), - } - } - - /// Removes specified key and return the corresponding value. - /// - /// If the key is not found, returns the default - #[pyo3(signature = (key, default=None))] - pub fn pop( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.remove(&hk) { - Some(element) => Ok(element.value), - None => Ok(default.unwrap_or_else(|| py.None())), - } - } - - /// Inserts key with a value of default if key is not in the cache. - /// - /// Return the value for key if key is in the cache, else default. - #[pyo3(signature=(key, default=None))] - pub fn setdefault( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - if let Some(x) = lock.get(&hk) { - return Ok(x.value.clone_ref(py)); - } - - let defval = default.unwrap_or_else(|| py.None()); - lock.insert(hk, defval.clone_ref(py), true); - Ok(defval) - } - - /// Removes the element that has been in the cache the longest - pub fn popitem(&self) -> pyo3::PyResult<(pyo3::PyObject, pyo3::PyObject)> { - let mut lock = self.raw.lock(); - match lock.popitem() { - Some(element) => Ok((element.key.key, element.value)), - None => Err(err!(pyo3::exceptions::PyKeyError, ())), - } - } - - /// Does the `popitem()` `n` times and returns count of removed items. - pub fn drain(&self, n: usize) -> usize { - let mut lock = self.raw.lock(); - - for c in 0..n { - if lock.popitem().is_none() { - return c; - } - } - - 0 - } - - /// Removes all items from cache. - /// - /// If reuse is True, will not free the memory for reusing in the future. - #[pyo3(signature=(*, reuse=false))] - pub fn clear(&self, reuse: bool) { - let mut lock = self.raw.lock(); - lock.table.clear(); - lock.entries.clear(); - lock.n_shifts = 0; - - if !reuse { - lock.shrink_to_fit(); - } - } - - /// Shrinks the cache to fit len(self) elements. - pub fn shrink_to_fit(&self) { - let mut lock = self.raw.lock(); - lock.shrink_to_fit(); - } - - /// Updates the cache with elements from a dictionary or an iterable object of key/value pairs. - pub fn update( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - iterable: pyo3::PyObject, - ) -> pyo3::PyResult<()> { - if slf.as_ptr() == iterable.as_ptr() { - return Ok(()); - } - - let mut lock = slf.raw.lock(); - lock.update(py, iterable) - } - - /// Returns an iterable object of the cache's items (key-value pairs). - /// - /// Notes: - /// - You should not make any changes in cache while using this iterable object. - /// - Don't call `len(cache)`, `bool(cache)`, `cache.is_full()` or `cache.is_empty()` while using this iterable object. - pub fn items( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let mut lock = slf.raw.lock(); - - lock.expire(); - - let (len, state) = (lock.table.len(), lock.state.get()); - let result = ttlcache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(lock.as_ptr()), - typ: 2, - }; - - pyo3::Py::new(py, result) - } - - /// Returns an iterable object of the cache's keys. - /// - /// Notes: - /// - You should not make any changes in cache while using this iterable object. - /// - Don't call `len(cache)`, `bool(cache)`, `cache.is_full()` or `cache.is_empty()` while using this iterable object. - pub fn keys( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let mut lock = slf.raw.lock(); - - lock.expire(); - - let (len, state) = (lock.table.len(), lock.state.get()); - let result = ttlcache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(lock.as_ptr()), - typ: 0, - }; - - pyo3::Py::new(py, result) - } - - /// Returns an iterable object of the cache's values. - /// - /// Notes: - /// - You should not make any changes in cache while using this iterable object. - /// - Don't call `len(cache)`, `bool(cache)`, `cache.is_full()` or `cache.is_empty()` while using this iterable object. - pub fn values( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let mut lock = slf.raw.lock(); - - lock.expire(); - - let (len, state) = (lock.table.len(), lock.state.get()); - let result = ttlcache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(lock.as_ptr()), - typ: 1, - }; - - pyo3::Py::new(py, result) - } - - /// Returns the oldest key in cache; this is the one which will be removed by `popitem()` (if n == 0). - /// - /// By using `n` parameter, you can browse order index by index. - #[pyo3(signature=(n=0))] - pub fn first(&self, py: pyo3::Python<'_>, n: usize) -> Option { - let lock = self.raw.lock(); - if n == 0 { - lock.entries.front().map(|x| x.key.key.clone_ref(py)) - } else { - lock.entries.get(n).map(|x| x.key.key.clone_ref(py)) - } - } - - /// Returns the newest key in cache. - pub fn last(&self, py: pyo3::Python<'_>) -> Option { - let lock = self.raw.lock(); - lock.entries.back().map(|x| x.key.key.clone_ref(py)) - } - - /// Works like `.get()`, but also returns the remaining time-to-live. - #[pyo3(signature = (key, default=None))] - pub fn get_with_expire( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult<(pyo3::PyObject, f64)> { - let hk = HashedKey::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.get(&hk) { - #[rustfmt::skip] - Some(val) => Ok( - ( - val.value.clone_ref(py), - unsafe { - val.expire.duration_since(std::time::SystemTime::now()) - .unwrap_unchecked() - .as_secs_f64() - } - ) - ), - None => Ok((default.unwrap_or_else(|| py.None()), 0.0)), - } - } - - /// Works like `.pop()`, but also returns the remaining time-to-live. - #[pyo3(signature = (key, default=None))] - pub fn pop_with_expire( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult<(pyo3::PyObject, f64)> { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.remove(&hk) { - #[rustfmt::skip] - Some(element) => Ok( - ( - element.value, - unsafe { - element.expire.duration_since(std::time::SystemTime::now()) - .unwrap_unchecked() - .as_secs_f64() - } - ) - ), - None => Ok((default.unwrap_or_else(|| py.None()), 0.0)), - } - } - - /// Works like `.popitem()`, but also returns the remaining time-to-live. - pub fn popitem_with_expire(&self) -> pyo3::PyResult<(pyo3::PyObject, pyo3::PyObject, f64)> { - let mut lock = self.raw.lock(); - match lock.popitem() { - #[rustfmt::skip] - Some(element) => Ok( - ( - element.key.key, - element.value, - unsafe { - element.expire.duration_since(std::time::SystemTime::now()) - .unwrap_unchecked() - .as_secs_f64() - } - ) - ), - None => Err(err!(pyo3::exceptions::PyKeyError, ())), - } - } -} - -#[allow(non_camel_case_types)] -#[pyo3::pyclass(module = "cachebox._cachebox")] -pub struct ttlcache_iterator { - ptr: _KeepForIter, - iter: crate::mutex::Mutex, - typ: u8, -} - -#[pyo3::pymethods] -impl ttlcache_iterator { - pub fn __len__(&self) -> usize { - self.ptr.len - } - - pub fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { - slf - } - - #[allow(unused_mut)] - pub fn __next__( - mut slf: pyo3::PyRefMut<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { - slf.ptr.status(py)?; - - let mut l = slf.iter.lock(); - - let mut element: &TTLElement; - loop { - element = unsafe { - if let Some(x) = l.next() { - &*x - } else { - return Err(err!(pyo3::exceptions::PyStopIteration, ())); - } - }; - - if element.expire > std::time::SystemTime::now() { - break; - } - } - - match slf.typ { - 0 => Ok(element.key.key.clone_ref(py).into_ptr()), - 1 => Ok(element.value.clone_ref(py).into_ptr()), - 2 => { - tuple!( - py, - 2, - 0 => element.key.key.clone_ref(py).into_ptr(), - 1 => element.value.clone_ref(py).into_ptr(), - ) - } - _ => { - #[cfg(not(debug_assertions))] - unsafe { - core::hint::unreachable_unchecked() - }; - #[cfg(debug_assertions)] - unreachable!(); - } - } - } -} diff --git a/src/bridge/vttlcache.rs b/src/bridge/vttlcache.rs deleted file mode 100644 index f144bcb..0000000 --- a/src/bridge/vttlcache.rs +++ /dev/null @@ -1,600 +0,0 @@ -//! implement VTTLCache, our vttl implementation - -use crate::{hashedkey::HashedKey, internal::VTTLElement, util::_KeepForIter}; - -/// VTTL Cache Implementation - Time-To-Live Per-Key Policy (thread-safe). -/// -/// In simple terms, the TTL cache will automatically remove the element in the cache that has expired when need. -#[pyo3::pyclass(module="cachebox._cachebox", extends=crate::bridge::baseimpl::BaseCacheImpl, frozen)] -pub struct VTTLCache { - raw: crate::mutex::Mutex, -} - -#[pyo3::pymethods] -impl VTTLCache { - /// VTTL Cache Implementation - Time-To-Live Per-Key Policy (thread-safe). - /// - /// By maxsize param, you can specify the limit size of the cache ( zero means infinity ); this is unchangable. - /// - /// By iterable param, you can create cache from a dict or an iterable. - /// - /// If capacity param is given, cache attempts to allocate a new hash table with at - /// least enough capacity for inserting the given number of elements without reallocating. - #[new] - #[pyo3(signature=(maxsize, iterable=None, ttl=None, *, capacity=0))] - pub fn __new__( - py: pyo3::Python<'_>, - maxsize: usize, - iterable: Option, - ttl: Option, - capacity: usize, - ) -> pyo3::PyResult<(Self, crate::bridge::baseimpl::BaseCacheImpl)> { - let mut raw = crate::internal::VTTLPolicy::new(maxsize, capacity)?; - if iterable.is_some() { - raw.update(py, unsafe { iterable.unwrap_unchecked() }, ttl)?; - } - - let self_ = Self { - raw: crate::mutex::Mutex::new(raw), - }; - Ok((self_, crate::bridge::baseimpl::BaseCacheImpl {})) - } - - /// Returns the cache maxsize - #[getter] - pub fn maxsize(&self) -> usize { - let lock = self.raw.lock(); - lock.maxsize.get() - } - pub fn _state(&self) -> usize { - let lock = self.raw.lock(); - lock.state.get() - } - - /// Returns the number of elements in the table - len(self) - pub fn __len__(&self) -> usize { - let mut lock = self.raw.lock(); - lock.expire(); - lock.table.len() - } - - /// Returns allocated memory size - sys.getsizeof(self) - pub fn __sizeof__(&self) -> usize { - let lock = self.raw.lock(); - - core::mem::size_of::() - + lock.table.capacity() - * core::mem::size_of::>>() - + lock.heap.capacity() * core::mem::size_of::>() - } - - /// Returns true if cache not empty - bool(self) - pub fn __bool__(&self) -> bool { - let mut lock = self.raw.lock(); - lock.expire(); - !lock.table.is_empty() - } - - /// Returns true if the cache have the key present - key in self - pub fn __contains__(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let lock = self.raw.lock(); - Ok(lock.contains_key(&hk)) - } - - /// Sets self\[key\] to value. - /// - /// Recommended to use `.insert()` method here. - pub fn __setitem__( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - value: pyo3::PyObject, - ) -> pyo3::PyResult<()> { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - lock.insert(hk, value, None, true); - Ok(()) - } - - /// Returns self\[key\] - /// - /// Note: raises KeyError if key not found. - pub fn __getitem__( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.get(&hk) { - Some(val) => Ok(val.value.clone_ref(py)), - None => Err(err!(pyo3::exceptions::PyKeyError, hk.key)), - } - } - - /// Deletes self[key]. - /// - /// Note: raises KeyError if key not found. - pub fn __delitem__(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult<()> { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.remove(&hk) { - Some(_) => Ok(()), - None => Err(err!(pyo3::exceptions::PyKeyError, hk.key)), - } - } - - /// Returns repr(self) - pub fn __repr__(&self) -> String { - let lock = self.raw.lock(); - - format!( - "VTTLCache({} / {}, capacity={})", - lock.table.len(), - lock.maxsize.get(), - lock.table.capacity(), - ) - } - - /// Returns `iter(cache)` - /// - /// Notes: - /// - You should not make any changes in cache while using this iterable object. - /// - Don't call `len(cache)`, `bool(cache)`, `cache.is_full()` or `cache.is_empty()` while using this iterable object. - pub fn __iter__( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let mut lock = slf.raw.lock(); - - lock.expire(); - - let (len, state) = (lock.table.len(), lock.state.get()); - let result = vttlcache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(lock.iter()), - typ: 0, - }; - - pyo3::Py::new(py, result) - } - - /// Supports == and != - pub fn __richcmp__( - slf: pyo3::PyRef<'_, Self>, - other: pyo3::PyRef<'_, Self>, - op: pyo3::class::basic::CompareOp, - ) -> pyo3::PyResult { - match op { - pyo3::class::basic::CompareOp::Eq => { - if slf.as_ptr() == other.as_ptr() { - return Ok(true); - } - - let (a1, a2) = (slf.raw.lock(), other.raw.lock()); - Ok(a1.eq(&a2)) - } - pyo3::class::basic::CompareOp::Ne => { - if slf.as_ptr() == other.as_ptr() { - return Ok(false); - } - - let (a1, a2) = (slf.raw.lock(), other.raw.lock()); - Ok(a1.ne(&a2)) - } - _ => Err(err!(pyo3::exceptions::PyNotImplementedError, ())), - } - } - - pub fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { - let mut lock = self.raw.lock(); - unsafe { - let state = lock.to_pickle(py)?; - Ok(pyo3::Py::from_owned_ptr(py, state)) - } - } - - pub fn __getnewargs__(&self) -> (usize,) { - (0,) - } - - pub fn __setstate__(&self, py: pyo3::Python<'_>, state: pyo3::PyObject) -> pyo3::PyResult<()> { - let mut lock = self.raw.lock(); - unsafe { lock.from_pickle(py, state.as_ptr()) } - } - - pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { - unsafe { - for bucket in self.raw.lock().table.iter() { - let node = bucket.as_ref(); - - visit.call(&(*node.as_ptr()).as_ref().key.key)?; - visit.call(&(*node.as_ptr()).as_ref().value)?; - } - } - - Ok(()) - } - - pub fn __clear__(&self) { - let mut lock = self.raw.lock(); - lock.table.clear(); - lock.heap.clear(); - } - - /// Returns the number of elements the map can hold without reallocating. - pub fn capacity(&self) -> usize { - let lock = self.raw.lock(); - lock.table.capacity() - } - - /// Equivalent directly to `len(self) == self.maxsize` - pub fn is_full(&self) -> bool { - let mut lock = self.raw.lock(); - lock.expire(); - lock.table.len() == lock.maxsize.get() - } - - /// Equivalent directly to `len(self) == 0` - pub fn is_empty(&self) -> bool { - let mut lock = self.raw.lock(); - lock.expire(); - lock.table.len() == 0 - } - - /// Equals to `self[key] = value`, but: - /// - Here you can set ttl for key-value ( with `self[key] = value` you can't ) - /// - If the cache did not have this key present, None is returned. - /// - If the cache did have this key present, the value is updated, - /// and the old value is returned. The key is not updated, though; - #[pyo3(signature=(key, value, ttl=None))] - pub fn insert( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - value: pyo3::PyObject, - ttl: Option, - ) -> pyo3::PyResult { - if let Some(secs) = ttl { - if secs == 0.0 { - return Err(err!( - pyo3::exceptions::PyValueError, - "ttl cannot be zero, if you do not want to set ttl, use `None`" - )); - } else if secs < 0.0 { - return Err(err!( - pyo3::exceptions::PyValueError, - "ttl cannot be negative" - )); - } - } - - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - let op = lock.insert(hk, value, ttl, true); - Ok(op.unwrap_or_else(|| py.None())) - } - - /// Equals to `self[key]`, but returns `default` if the cache don't have this key present. - #[pyo3(signature = (key, default=None))] - pub fn get( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.get(&hk) { - Some(val) => Ok(val.value.clone_ref(py)), - None => Ok(default.unwrap_or_else(|| py.None())), - } - } - - /// Removes specified key and return the corresponding value. - /// - /// If the key is not found, returns the default - #[pyo3(signature = (key, default=None))] - pub fn pop( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.remove(&hk) { - Some(element) => Ok(element.value), - None => Ok(default.unwrap_or_else(|| py.None())), - } - } - - /// Inserts key with a value of default if key is not in the cache. - /// - /// Return the value for key if key is in the cache, else default. - #[pyo3(signature=(key, default=None, ttl=None))] - pub fn setdefault( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ttl: Option, - ) -> pyo3::PyResult { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - if let Some(x) = lock.get(&hk) { - return Ok(x.value.clone_ref(py)); - } - - let defval = default.unwrap_or_else(|| py.None()); - lock.insert(hk, defval.clone_ref(py), ttl, true); - Ok(defval) - } - - /// Removes the element that has been in the cache the longest - pub fn popitem(&self) -> pyo3::PyResult<(pyo3::PyObject, pyo3::PyObject)> { - let mut lock = self.raw.lock(); - - match lock.popitem() { - Some(element) => Ok((element.key.key, element.value)), - None => Err(err!(pyo3::exceptions::PyKeyError, ())), - } - } - - /// Does the `popitem()` `n` times and returns count of removed items. - pub fn drain(&self, n: usize) -> usize { - let mut lock = self.raw.lock(); - - for c in 0..n { - if lock.popitem().is_none() { - return c; - } - } - - 0 - } - - /// Removes all items from cache. - /// - /// If reuse is True, will not free the memory for reusing in the future. - #[pyo3(signature=(*, reuse=false))] - pub fn clear(&self, reuse: bool) { - let mut lock = self.raw.lock(); - lock.table.clear(); - lock.heap.clear(); - - if !reuse { - lock.shrink_to_fit(); - } - } - - /// Shrinks the cache to fit len(self) elements. - pub fn shrink_to_fit(&self) { - let mut lock = self.raw.lock(); - lock.shrink_to_fit(); - } - - /// Updates the cache with elements from a dictionary or an iterable object of key/value pairs. - #[pyo3(signature=(iterable, ttl=None))] - pub fn update( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - iterable: pyo3::PyObject, - ttl: Option, - ) -> pyo3::PyResult<()> { - if slf.as_ptr() == iterable.as_ptr() { - return Ok(()); - } - - let mut lock = slf.raw.lock(); - lock.update(py, iterable, ttl) - } - - /// Returns an iterable object of the cache's items (key-value pairs). - /// - /// Notes: - /// - You should not make any changes in cache while using this iterable object. - /// - Don't call `len(cache)`, `bool(cache)`, `cache.is_full()` or `cache.is_empty()` while using this iterable object. - pub fn items( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let mut lock = slf.raw.lock(); - lock.expire(); - - let (len, state) = (lock.table.len(), lock.state.get()); - - let result = vttlcache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(lock.iter()), - typ: 2, - }; - - pyo3::Py::new(py, result) - } - - /// Returns an iterable object of the cache's keys. - /// - /// Notes: - /// - You should not make any changes in cache while using this iterable object. - /// - Don't call `len(cache)`, `bool(cache)`, `cache.is_full()` or `cache.is_empty()` while using this iterable object. - pub fn keys( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let mut lock = slf.raw.lock(); - lock.expire(); - - let (len, state) = (lock.table.len(), lock.state.get()); - - let result = vttlcache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(lock.iter()), - typ: 0, - }; - - pyo3::Py::new(py, result) - } - - /// Returns an iterable object of the cache's values. - /// - /// Notes: - /// - You should not make any changes in cache while using this iterable object. - /// - Don't call `len(cache)`, `bool(cache)`, `cache.is_full()` or `cache.is_empty()` while using this iterable object. - pub fn values( - slf: pyo3::PyRef<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult> { - let mut lock = slf.raw.lock(); - lock.expire(); - - let (len, state) = (lock.table.len(), lock.state.get()); - - let result = vttlcache_iterator { - ptr: _KeepForIter::new(slf.as_ptr(), state, len), - iter: crate::mutex::Mutex::new(lock.iter()), - typ: 1, - }; - - pyo3::Py::new(py, result) - } - - /// Works like `.get()`, but also returns the remaining time-to-live. - #[pyo3(signature = (key, default=None))] - pub fn get_with_expire( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult<(pyo3::PyObject, f64)> { - let hk = HashedKey::from_pyobject(py, key)?; - let lock = self.raw.lock(); - - match lock.get(&hk) { - Some(val) => Ok((val.value.clone_ref(py), unsafe { - val.expire_at.map_or(0.0, |x| { - x.duration_since(std::time::SystemTime::now()) - .unwrap_unchecked() - .as_secs_f64() - }) - })), - None => Ok((default.unwrap_or_else(|| py.None()), 0.0)), - } - } - - /// Works like `.pop()`, but also returns the remaining time-to-live. - #[pyo3(signature = (key, default=None))] - pub fn pop_with_expire( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: Option, - ) -> pyo3::PyResult<(pyo3::PyObject, f64)> { - let hk = HashedKey::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.remove(&hk) { - #[rustfmt::skip] - Some(element) => Ok( - ( - element.value, - unsafe { - element.expire_at.map_or( - 0.0, |x| { - x.duration_since(std::time::SystemTime::now()) - .unwrap_unchecked() - .as_secs_f64() - } - ) - } - ) - ), - None => Ok((default.unwrap_or_else(|| py.None()), 0.0)), - } - } - - /// Works like `.popitem()`, but also returns the remaining time-to-live. - pub fn popitem_with_expire(&self) -> pyo3::PyResult<(pyo3::PyObject, pyo3::PyObject, f64)> { - let mut lock = self.raw.lock(); - match lock.popitem() { - #[rustfmt::skip] - Some(element) => Ok( - ( - element.key.key, - element.value, - unsafe { - element.expire_at.map_or( - 0.0, |x| { - x.duration_since(std::time::SystemTime::now()) - .unwrap_unchecked() - .as_secs_f64() - } - ) - } - ) - ), - None => Err(err!(pyo3::exceptions::PyKeyError, ())), - } - } -} - -#[allow(non_camel_case_types)] -#[pyo3::pyclass(module = "cachebox._cachebox")] -pub struct vttlcache_iterator { - ptr: _KeepForIter, - iter: crate::mutex::Mutex>, - typ: u8, -} - -#[pyo3::pymethods] -impl vttlcache_iterator { - pub fn __len__(&self) -> usize { - self.ptr.len - } - - pub fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { - slf - } - - #[allow(unused_mut)] - pub fn __next__( - mut slf: pyo3::PyRefMut<'_, Self>, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { - slf.ptr.status(py)?; - - match slf.iter.lock().next() { - Some(ptr) => { - let node = unsafe { &*ptr.as_ptr() }; - - match slf.typ { - 0 => Ok(node.as_ref().key.key.clone_ref(py).into_ptr()), - 1 => Ok(node.as_ref().value.clone_ref(py).into_ptr()), - 2 => { - tuple!( - py, - 2, - 0 => node.as_ref().key.key.clone_ref(py).into_ptr(), - 1 => node.as_ref().value.clone_ref(py).into_ptr(), - ) - } - _ => { - #[cfg(not(debug_assertions))] - unsafe { - core::hint::unreachable_unchecked() - }; - #[cfg(debug_assertions)] - unreachable!(); - } - } - } - None => Err(err!(pyo3::exceptions::PyStopIteration, ())), - } - } -} diff --git a/src/hashedkey.rs b/src/hashedkey.rs deleted file mode 100644 index 2a3cb8f..0000000 --- a/src/hashedkey.rs +++ /dev/null @@ -1,46 +0,0 @@ -#[derive(Debug)] -pub struct HashedKey { - pub key: pyo3::PyObject, - - // The `key` hash in Rust. - // Why u64? because hash type in Rust is u64 and hashbrown only accepts u64 as hash, - // I didn't found any better way. - pub hash: u64, -} - -impl HashedKey { - #[inline] - pub fn from_key_and_hash(key: pyo3::PyObject, hash: u64) -> Self { - Self { key, hash } - } - - #[inline] - pub fn from_pyobject(py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { - unsafe { - let py_hash = pyo3::ffi::PyObject_Hash(key.as_ptr()); - - if py_hash == -1 { - // There's no need to check PyErr_Occurred, - // PyObject_Hash never returns -1 when success. - return Err(pyo3::PyErr::take(py).unwrap()); - } - - Ok(Self::from_key_and_hash(key, fxhash::hash64(&py_hash))) - } - } - - pub fn clone_ref(&self, py: pyo3::Python<'_>) -> Self { - Self { - key: self.key.clone_ref(py), - hash: self.hash, - } - } -} - -impl PartialEq for HashedKey { - fn eq(&self, other: &Self) -> bool { - pyobject_eq!(self.key, other.key) - } -} - -impl Eq for HashedKey {} diff --git a/src/internal/fifo.rs b/src/internal/fifo.rs deleted file mode 100644 index a4a927b..0000000 --- a/src/internal/fifo.rs +++ /dev/null @@ -1,360 +0,0 @@ -//! The FIFO policy, This is inspired by Rust's indexmap with some changes. - -use crate::hashedkey::HashedKey; -use hashbrown::raw::RawTable; -use std::collections::VecDeque; - -pub struct FIFOPolicy { - /// We set [Vec] objects indexes in hashtable to make search O(1). hashtable is unordered, - /// that is why we are using [Vec]. - pub table: RawTable, - - /// Keep objects in order. - pub entries: VecDeque<(HashedKey, pyo3::PyObject)>, - pub maxsize: core::num::NonZeroUsize, - - /// When we pop front an object from entries, two operations have to do: - /// 1. Shift all elements in vector. - /// 2. Decrement all indexes in hashtable. - /// - /// these are expensive operations in large elements; - /// - We removed first operation by using [`std::collections::VecDeque`] instead of [`Vec`] - /// - We removed second operation by using this variable: Instead of decrement indexes in hashtable, - /// we will increment this variable. - pub n_shifts: usize, - - /// This is for detecting changes; needed for iterators - pub state: crate::util::CacheState, -} - -impl FIFOPolicy { - #[inline] - pub fn new(maxsize: usize, mut capacity: usize) -> pyo3::PyResult { - let maxsize = non_zero_or!(maxsize, isize::MAX as usize); - capacity = capacity.min(maxsize.get()); - - Ok(Self { - table: new_table!(capacity)?, - entries: VecDeque::new(), - maxsize, - n_shifts: 0, - state: crate::util::CacheState::new(), - }) - } - - #[inline] - fn decrement_indexes(&mut self, start: usize, end: usize) { - if start <= 1 && end == self.entries.len() && self.n_shifts < super::MAX_N_SHIFT { - self.n_shifts += 1; - return; - } - - if (end - start) > self.table.buckets() / 2 { - unsafe { - for bucket in self.table.iter() { - let i = bucket.as_mut(); - if start <= (*i) - self.n_shifts && (*i) - self.n_shifts < end { - *i -= 1; - } - } - } - } else { - let shifted = self.entries.range(start..end); - for (i, entry) in (start..end).zip(shifted) { - #[cfg(debug_assertions)] - let old = self - .table - .get_mut(entry.0.hash, |x| (*x) - self.n_shifts == i) - .expect("index not found"); - - #[cfg(not(debug_assertions))] - let old = unsafe { - self.table - .get_mut(entry.0.hash, |x| (*x) - self.n_shifts == i) - .unwrap_unchecked() - }; - - *old -= 1; - } - } - } - - /// # Safety - /// - /// This method is unsafe because does not checks the maxsize and this - /// may occurred errors and bad situations in future if you don't care about - /// maxsize. - #[inline] - unsafe fn insert_unchecked( - &mut self, - key: HashedKey, - value: pyo3::PyObject, - ) -> Option { - match self.table.find_or_find_insert_slot( - key.hash, - |index| key == self.entries[(*index) - self.n_shifts].0, - |index| self.entries[(*index) - self.n_shifts].0.hash, - ) { - Ok(bucket) => { - let index = unsafe { bucket.as_ref() }; - Some(core::mem::replace( - &mut self.entries[(*index) - self.n_shifts].1, - value, - )) - } - Err(slot) => { - self.state.change(); - - unsafe { - self.table - .insert_in_slot(key.hash, slot, self.entries.len() + self.n_shifts); - } - self.entries.push_back((key, value)); - None - } - } - } - - #[inline] - pub fn insert(&mut self, key: HashedKey, value: pyo3::PyObject) -> Option { - if self.table.len() >= self.maxsize.get() && !self.contains_key(&key) { - #[cfg(debug_assertions)] - self.popitem().unwrap(); - - #[cfg(not(debug_assertions))] - unsafe { - self.popitem().unwrap_unchecked(); - } - } - - unsafe { self.insert_unchecked(key, value) } - } - - #[inline] - pub fn popitem(&mut self) -> Option<(HashedKey, pyo3::PyObject)> { - let ret = self.entries.pop_front()?; - self.state.change(); - - #[cfg(debug_assertions)] - self.table - .remove_entry(ret.0.hash, |index| (*index) - self.n_shifts == 0) - .expect("popitem key not found."); - - #[cfg(not(debug_assertions))] - unsafe { - self.table - .remove_entry(ret.0.hash, |index| (*index) - self.n_shifts == 0) - .unwrap_unchecked(); - } - - self.decrement_indexes(1, self.entries.len()); - Some(ret) - } - - #[inline] - pub fn get(&self, key: &HashedKey) -> Option<&pyo3::PyObject> { - match self - .table - .find(key.hash, |x| &self.entries[(*x) - self.n_shifts].0 == key) - .map(|bucket| unsafe { bucket.as_ref() }) - { - Some(index) => Some(&self.entries[(*index) - self.n_shifts].1), - None => None, - } - } - - #[inline] - pub fn remove(&mut self, key: &HashedKey) -> Option<(HashedKey, pyo3::PyObject)> { - match self - .table - .remove_entry(key.hash, |x| key == &self.entries[(*x) - self.n_shifts].0) - .map(|x| x - self.n_shifts) - { - Some(index) => { - self.state.change(); - - self.decrement_indexes(index + 1, self.entries.len()); - - #[cfg(debug_assertions)] - let m = self.entries.remove(index).unwrap(); - - #[cfg(not(debug_assertions))] - let m = unsafe { self.entries.remove(index).unwrap_unchecked() }; - - Some(m) - } - None => None, - } - } - - #[inline] - pub fn contains_key(&self, key: &HashedKey) -> bool { - self.table - .find(key.hash, |x| &self.entries[(*x) - self.n_shifts].0 == key) - .is_some() - } - - #[inline] - pub fn update(&mut self, py: pyo3::Python<'_>, iterable: pyo3::PyObject) -> pyo3::PyResult<()> { - use pyo3::types::{PyAnyMethods, PyDictMethods}; - - if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { - let dict = unsafe { - iterable - .downcast_bound::(py) - .unwrap_unchecked() - }; - - for (key, value) in dict.iter() { - let hk = unsafe { HashedKey::from_pyobject(py, key.unbind()).unwrap_unchecked() }; - self.insert(hk, value.unbind()); - } - - Ok(()) - } else { - for pair in iterable.bind(py).try_iter()? { - let (key, value) = pair?.extract::<(pyo3::PyObject, pyo3::PyObject)>()?; - - let hk = HashedKey::from_pyobject(py, key)?; - self.insert(hk, value); - } - - Ok(()) - } - } - - #[inline(always)] - pub fn iter(&self) -> FIFOIterator { - let (a, b) = self.entries.as_slices(); - - FIFOIterator { - first: crate::util::NoLifetimeSliceIter { - slice: a.as_ptr(), - index: 0, - len: a.len(), - }, - second: crate::util::NoLifetimeSliceIter { - slice: b.as_ptr(), - index: 0, - len: b.len(), - }, - } - } - - #[inline(always)] - pub fn shrink_to_fit(&mut self) { - self.entries.shrink_to_fit(); - self.table - .shrink_to(0, |x| self.entries[(*x) - self.n_shifts].0.hash); - - self.state.change(); - } - - #[inline] - pub unsafe fn to_pickle( - &self, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { - let list = pyo3::ffi::PyList_New(0); - if list.is_null() { - return Err(pyo3::PyErr::fetch(py)); - } - - for (hk, val) in self.entries.iter() { - let tp = tuple!( - py, - 2, - 0 => hk.key.clone_ref(py).as_ptr(), - 1 => val.clone_ref(py).as_ptr(), - ); - - if let Err(x) = tp { - pyo3::ffi::Py_DECREF(list); - return Err(x); - } - - if pyo3::ffi::PyList_Append(list, tp.unwrap_unchecked()) == -1 { - pyo3::ffi::Py_DECREF(list); - return Err(pyo3::PyErr::fetch(py)); - } - } - - let maxsize = pyo3::ffi::PyLong_FromSize_t(self.maxsize.get()); - let capacity = pyo3::ffi::PyLong_FromSize_t(self.table.capacity()); - - tuple!( - py, - 3, - 0 => maxsize, - 1 => list, - 2 => capacity, - ) - } - - #[allow(clippy::wrong_self_convention)] - #[inline] - pub unsafe fn from_pickle( - &mut self, - py: pyo3::Python<'_>, - state: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<()> { - tuple!(check state, size=3)?; - let (maxsize, iterable, capacity) = extract_pickle_tuple!(py, state); - - let mut new = Self::new(maxsize, capacity)?; - new.update(py, iterable)?; - - *self = new; - Ok(()) - } -} - -impl PartialEq for FIFOPolicy { - fn eq(&self, other: &Self) -> bool { - if self.maxsize != other.maxsize { - return false; - } - - if self.entries.len() != other.entries.len() { - return false; - } - - for index in 0..self.entries.len() { - let (key1, val1) = &self.entries[index]; - let (key2, val2) = &other.entries[index]; - - if key1.hash != key2.hash - || !pyobject_eq!(key1.key, key2.key) - || !pyobject_eq!(val1, val2) - { - return false; - } - } - - true - } -} - -impl Eq for FIFOPolicy {} - -pub struct FIFOIterator { - pub first: crate::util::NoLifetimeSliceIter<(HashedKey, pyo3::PyObject)>, - pub second: crate::util::NoLifetimeSliceIter<(HashedKey, pyo3::PyObject)>, -} - -impl Iterator for FIFOIterator { - type Item = *const (HashedKey, pyo3::PyObject); - - fn next(&mut self) -> Option { - match self.first.next() { - Some(val) => Some(val), - None => { - core::mem::swap(&mut self.first, &mut self.second); - self.first.next() - } - } - } -} - -unsafe impl Send for FIFOIterator {} -unsafe impl Sync for FIFOIterator {} diff --git a/src/internal/lfu.rs b/src/internal/lfu.rs deleted file mode 100644 index 8a972f9..0000000 --- a/src/internal/lfu.rs +++ /dev/null @@ -1,339 +0,0 @@ -//! The LFU Policy - -use std::ptr::NonNull; - -use crate::hashedkey::HashedKey; -use crate::sorted_heap::{Entry, Iter, SortedHeap}; -use hashbrown::raw::RawTable; - -macro_rules! compare_fn { - () => { - |a, b| a.2.cmp(&b.2) - }; -} - -pub struct LFUPolicy { - pub table: RawTable>>, - pub heap: SortedHeap<(HashedKey, pyo3::PyObject, usize)>, - pub maxsize: core::num::NonZeroUsize, - pub state: crate::util::CacheState, -} - -impl LFUPolicy { - #[inline] - pub fn new(maxsize: usize, mut capacity: usize) -> pyo3::PyResult { - let maxsize = non_zero_or!(maxsize, isize::MAX as usize); - capacity = capacity.min(maxsize.get()); - - Ok(Self { - table: new_table!(capacity)?, - heap: SortedHeap::new(), - maxsize, - state: crate::util::CacheState::new(), - }) - } - - /// # Safety - /// - /// This method is unsafe because does not checks the maxsize and this - /// may occurred errors and bad situations in future if you don't care about - /// maxsize. - #[inline] - unsafe fn insert_unchecked( - &mut self, - key: HashedKey, - value: pyo3::PyObject, - default_frequency: usize, - ) -> Option { - match self.table.find_or_find_insert_slot( - key.hash, - |node| (*node.as_ptr()).as_ref().0 == key, - |node| (*node.as_ptr()).as_ref().0.hash, - ) { - Ok(bucket) => { - let node = bucket.as_mut(); - - (node.as_mut()).as_mut().2 += 1; - let oldval = core::mem::replace(&mut (node.as_mut()).as_mut().1, value); - - Some(oldval) - } - Err(slot) => { - self.state.change(); - - // copy key hash - let hash = key.hash; - - let node = self.heap.push((key, value, default_frequency)); - unsafe { - self.table.insert_in_slot(hash, slot, node); - } - - None - } - } - } - - #[inline] - pub fn insert(&mut self, key: HashedKey, value: pyo3::PyObject) -> Option { - if self.table.len() >= self.maxsize.get() && !self.contains_key(&key) { - self.popitem().unwrap(); - } - - unsafe { self.insert_unchecked(key, value, 1) } - } - - #[inline] - pub fn popitem(&mut self) -> Option<(HashedKey, pyo3::PyObject, usize)> { - self.heap.sort(compare_fn!()); - let first = self.heap.0.first()?; - self.state.change(); - - unsafe { - self.table - .remove_entry((*first.as_ptr()).as_ref().0.hash, |node| { - core::ptr::eq(node.as_ptr(), first.as_ptr()) - }) - .expect("popitem key not found."); - } - - Some(self.heap.pop_front(compare_fn!()).unwrap()) - } - - #[inline] - pub fn get(&mut self, key: &HashedKey) -> Option<&pyo3::PyObject> { - match unsafe { - self.table - .find(key.hash, |node| (*node.as_ptr()).as_ref().0 == *key) - } { - Some(bucket) => { - let node = unsafe { bucket.as_mut() }; - - unsafe { - (node.as_mut()).as_mut().2 += 1; - } - - self.heap.1 = false; - - Some(unsafe { &(*node.as_ptr()).as_ref().1 }) - } - None => None, - } - } - - #[inline] - pub fn peek(&self, key: &HashedKey) -> Option<&pyo3::PyObject> { - match unsafe { - self.table - .find(key.hash, |node| (*node.as_ptr()).as_ref().0 == *key) - } { - Some(bucket) => { - let node = unsafe { bucket.as_ref() }; - - Some(unsafe { &(*node.as_ptr()).as_ref().1 }) - } - None => None, - } - } - - #[inline] - pub fn remove(&mut self, key: &HashedKey) -> Option<(HashedKey, pyo3::PyObject, usize)> { - match unsafe { - self.table - .remove_entry(key.hash, |node| (*node.as_ptr()).as_ref().0 == *key) - } { - Some(node) => { - self.state.change(); - - Some(self.heap.remove(node, compare_fn!())) - } - None => None, - } - } - - #[inline] - pub fn contains_key(&self, key: &HashedKey) -> bool { - unsafe { - self.table - .find(key.hash, |node| (*node.as_ptr()).as_ref().0 == *key) - .is_some() - } - } - - #[inline] - pub fn update(&mut self, py: pyo3::Python<'_>, iterable: pyo3::PyObject) -> pyo3::PyResult<()> { - use pyo3::types::{PyAnyMethods, PyDictMethods}; - - if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { - let dict = unsafe { - iterable - .downcast_bound::(py) - .unwrap_unchecked() - }; - - for (key, value) in dict.iter() { - let hk = unsafe { HashedKey::from_pyobject(py, key.unbind()).unwrap_unchecked() }; - self.insert(hk, value.unbind()); - } - - Ok(()) - } else { - for pair in iterable.bind(py).try_iter()? { - let (key, value) = pair?.extract::<(pyo3::PyObject, pyo3::PyObject)>()?; - - let hk = HashedKey::from_pyobject(py, key)?; - self.insert(hk, value); - } - - Ok(()) - } - } - - #[inline(always)] - pub fn shrink_to_fit(&mut self) { - self.table - .shrink_to(0, |node| unsafe { (*node.as_ptr()).as_ref().0.hash }); - self.heap.0.shrink_to_fit(); - self.state.change(); - } - - pub fn iter(&mut self) -> Iter<(HashedKey, pyo3::PyObject, usize)> { - self.heap.sort(compare_fn!()); - self.heap.iter() - } - - #[allow(clippy::wrong_self_convention)] - #[inline] - pub unsafe fn to_pickle( - &mut self, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { - self.heap.sort(compare_fn!()); - - let list = pyo3::ffi::PyList_New(0); - if list.is_null() { - return Err(pyo3::PyErr::fetch(py)); - } - - for ptr in self.heap.iter() { - let node = &(*ptr.as_ptr()); - - let frequency = pyo3::ffi::PyLong_FromSize_t(node.as_ref().2); - if frequency.is_null() { - pyo3::ffi::Py_DECREF(list); - return Err(pyo3::PyErr::fetch(py)); - } - - let tp = tuple!( - py, - 3, - 0 => node.as_ref().0.key.clone_ref(py).as_ptr(), - 1 => node.as_ref().1.clone_ref(py).as_ptr(), - 2 => frequency, - ); - - if let Err(x) = tp { - pyo3::ffi::Py_DECREF(list); - return Err(x); - } - - if pyo3::ffi::PyList_Append(list, tp.unwrap_unchecked()) == -1 { - pyo3::ffi::Py_DECREF(list); - return Err(pyo3::PyErr::fetch(py)); - } - } - - let maxsize = pyo3::ffi::PyLong_FromSize_t(self.maxsize.get()); - let capacity = pyo3::ffi::PyLong_FromSize_t(self.table.capacity()); - - tuple!( - py, - 3, - 0 => maxsize, - 1 => list, - 2 => capacity, - ) - } - - #[allow(clippy::wrong_self_convention)] - #[inline] - pub unsafe fn from_pickle( - &mut self, - py: pyo3::Python<'_>, - state: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<()> { - use pyo3::types::PyAnyMethods; - - tuple!(check state, size=3)?; - let (maxsize, iterable, capacity) = extract_pickle_tuple!(py, state); - - // SAFETY: we check `iterable` type in `extract_pickle_tuple` macro - if maxsize < (pyo3::ffi::PyObject_Size(iterable.as_ptr()) as usize) { - return Err(err!( - pyo3::exceptions::PyValueError, - "iterable object size is greater than maxsize" - )); - } - - let mut new = Self::new(maxsize, capacity)?; - - for pair in iterable.bind(py).try_iter()? { - let (key, value, fr) = pair?.extract::<(pyo3::PyObject, pyo3::PyObject, usize)>()?; - - let hk = HashedKey::from_pyobject(py, key)?; - - // SAFETY: we don't need to check maxsize, we sure `len(iterable) <= maxsize` - new.insert_unchecked(hk, value, fr); - } - - *self = new; - Ok(()) - } -} - -impl PartialEq for LFUPolicy { - fn eq(&self, other: &Self) -> bool { - if self.maxsize != other.maxsize { - return false; - } - - if self.heap.len() != other.heap.len() { - return false; - } - - unsafe { - for bucket in self.table.iter() { - let node1 = bucket.as_ref(); - - let node2 = other.table.get((*node1.as_ptr()).as_ref().0.hash, |x| { - (*x.as_ptr()).as_ref().0 == (*node1.as_ptr()).as_ref().0 - }); - if node2.is_none() { - return false; - } - - let node2 = node2.unwrap_unchecked(); - - if (*node1.as_ptr()).as_ref().0.hash != (*node2.as_ptr()).as_ref().0.hash - || !pyobject_eq!( - (*node1.as_ptr()).as_ref().0.key, - (*node2.as_ptr()).as_ref().0.key - ) - || !pyobject_eq!((*node1.as_ptr()).as_ref().1, (*node2.as_ptr()).as_ref().1) - { - return false; - } - } - } - - true - } -} - -impl Eq for LFUPolicy {} - -// because we use it in Mutex -unsafe impl Sync for LFUPolicy {} - -// because we use it in Mutex -unsafe impl Send for LFUPolicy {} diff --git a/src/internal/lru.rs b/src/internal/lru.rs deleted file mode 100644 index 982b97d..0000000 --- a/src/internal/lru.rs +++ /dev/null @@ -1,284 +0,0 @@ -//! The LRU Policy - -use crate::hashedkey::HashedKey; -use crate::linked_list; -use hashbrown::raw::RawTable; - -pub struct LRUPolicy { - pub table: RawTable>, - pub list: linked_list::LinkedList, - pub maxsize: core::num::NonZeroUsize, - pub state: crate::util::CacheState, -} - -impl LRUPolicy { - #[inline] - pub fn new(maxsize: usize, mut capacity: usize) -> pyo3::PyResult { - let maxsize = non_zero_or!(maxsize, isize::MAX as usize); - capacity = capacity.min(maxsize.get()); - - Ok(Self { - table: new_table!(capacity)?, - list: linked_list::LinkedList::new(), - maxsize, - state: crate::util::CacheState::new(), - }) - } - - /// # Safety - /// - /// This method is unsafe because does not checks the maxsize and this - /// may occurred errors and bad situations in future if you don't care about - /// maxsize. - #[inline] - unsafe fn insert_unchecked( - &mut self, - key: HashedKey, - value: pyo3::PyObject, - ) -> Option { - match self.table.find_or_find_insert_slot( - key.hash, - |node| (*node.as_ptr()).element.0 == key, - |node| (*node.as_ptr()).element.0.hash, - ) { - Ok(bucket) => { - let node = bucket.as_mut(); - - let oldval = core::mem::replace(&mut (node.as_mut()).element.1, value); - self.list.move_back(*node); - - Some(oldval) - } - Err(slot) => { - self.state.change(); - - // copy key hash - let hash = key.hash; - - let node = self.list.push_back(key, value); - unsafe { - self.table.insert_in_slot(hash, slot, node); - } - None - } - } - } - - #[inline] - pub fn insert(&mut self, key: HashedKey, value: pyo3::PyObject) -> Option { - if self.table.len() >= self.maxsize.get() && !self.contains_key(&key) { - #[cfg(debug_assertions)] - self.popitem().unwrap(); - - #[cfg(not(debug_assertions))] - unsafe { - self.popitem().unwrap_unchecked(); - } - } - - unsafe { self.insert_unchecked(key, value) } - } - - #[inline] - pub fn popitem(&mut self) -> Option<(HashedKey, pyo3::PyObject)> { - let ret = self.list.head?; - self.state.change(); - - unsafe { - self.table - .remove_entry((*ret.as_ptr()).element.0.hash, |node| { - core::ptr::eq(node.as_ptr(), ret.as_ptr()) - }) - .expect("popitem key not found."); - } - - Some(self.list.pop_front().unwrap()) - } - - #[inline] - pub fn get(&mut self, key: &HashedKey) -> Option<&pyo3::PyObject> { - match unsafe { - self.table - .find(key.hash, |node| (*node.as_ptr()).element.0 == *key) - } { - Some(bucket) => { - let node = unsafe { bucket.as_mut() }; - - unsafe { - self.list.move_back(*node); - } - - Some(unsafe { &(*node.as_ptr()).element.1 }) - } - None => None, - } - } - - #[inline] - pub fn peek(&self, key: &HashedKey) -> Option<&pyo3::PyObject> { - match unsafe { - self.table - .find(key.hash, |node| (*node.as_ptr()).element.0 == *key) - } { - Some(bucket) => { - let node = unsafe { bucket.as_ref() }; - - Some(unsafe { &(*node.as_ptr()).element.1 }) - } - None => None, - } - } - - #[inline] - pub fn remove(&mut self, key: &HashedKey) -> Option<(HashedKey, pyo3::PyObject)> { - match unsafe { - self.table - .remove_entry(key.hash, |node| (*node.as_ptr()).element.0 == *key) - } { - Some(node) => { - self.state.change(); - Some(unsafe { self.list.remove(node) }) - } - None => None, - } - } - - #[inline] - pub fn contains_key(&self, key: &HashedKey) -> bool { - unsafe { - self.table - .find(key.hash, |node| (*node.as_ptr()).element.0 == *key) - .is_some() - } - } - - #[inline] - pub fn update(&mut self, py: pyo3::Python<'_>, iterable: pyo3::PyObject) -> pyo3::PyResult<()> { - use pyo3::types::{PyAnyMethods, PyDictMethods}; - - if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { - let dict = unsafe { - iterable - .downcast_bound::(py) - .unwrap_unchecked() - }; - - for (key, value) in dict.iter() { - let hk = unsafe { HashedKey::from_pyobject(py, key.unbind()).unwrap_unchecked() }; - self.insert(hk, value.unbind()); - } - - Ok(()) - } else { - for pair in iterable.bind(py).try_iter()? { - let (key, value) = pair?.extract::<(pyo3::PyObject, pyo3::PyObject)>()?; - - let hk = HashedKey::from_pyobject(py, key)?; - self.insert(hk, value); - } - - Ok(()) - } - } - - #[inline(always)] - pub fn shrink_to_fit(&mut self) { - self.table - .shrink_to(0, |node| unsafe { (*node.as_ptr()).element.0.hash }); - self.state.change(); - } - - #[inline] - pub unsafe fn to_pickle( - &self, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { - let list = pyo3::ffi::PyList_New(0); - if list.is_null() { - return Err(pyo3::PyErr::fetch(py)); - } - - for node in self.list.iter() { - let (hk, val) = &(*node.as_ptr()).element; - - let tp = tuple!( - py, - 2, - 0 => hk.key.clone_ref(py).as_ptr(), - 1 => val.clone_ref(py).as_ptr(), - ); - - if let Err(x) = tp { - pyo3::ffi::Py_DECREF(list); - return Err(x); - } - - if pyo3::ffi::PyList_Append(list, tp.unwrap_unchecked()) == -1 { - pyo3::ffi::Py_DECREF(list); - return Err(pyo3::PyErr::fetch(py)); - } - } - - let maxsize = pyo3::ffi::PyLong_FromSize_t(self.maxsize.get()); - let capacity = pyo3::ffi::PyLong_FromSize_t(self.table.capacity()); - - tuple!( - py, - 3, - 0 => maxsize, - 1 => list, - 2 => capacity, - ) - } - - #[allow(clippy::wrong_self_convention)] - #[inline] - pub unsafe fn from_pickle( - &mut self, - py: pyo3::Python<'_>, - state: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<()> { - tuple!(check state, size=3)?; - let (maxsize, iterable, capacity) = extract_pickle_tuple!(py, state); - - let mut new = Self::new(maxsize, capacity)?; - new.update(py, iterable)?; - - *self = new; - Ok(()) - } -} - -impl PartialEq for LRUPolicy { - fn eq(&self, other: &Self) -> bool { - if self.maxsize != other.maxsize { - return false; - } - - if self.list.len() != other.list.len() { - return false; - } - - for (node1, node2) in self.list.iter().zip(other.list.iter()) { - let (key1, val1) = unsafe { &(*node1.as_ptr()).element }; - let (key2, val2) = unsafe { &(*node2.as_ptr()).element }; - - if key1.hash != key2.hash - || !pyobject_eq!(key1.key, key2.key) - || !pyobject_eq!(val1, val2) - { - return false; - } - } - - true - } -} - -impl Eq for LRUPolicy {} - -// because we use it in Mutex -unsafe impl Sync for LRUPolicy {} - -// because we use it in Mutex -unsafe impl Send for LRUPolicy {} diff --git a/src/internal/mod.rs b/src/internal/mod.rs deleted file mode 100644 index 34a7ae6..0000000 --- a/src/internal/mod.rs +++ /dev/null @@ -1,17 +0,0 @@ -//! Rust cache implemetations, these will be bridged to python in `bridge/` path. - -pub(super) const MAX_N_SHIFT: usize = usize::MAX - (isize::MAX as usize); - -mod fifo; -mod lfu; -mod lru; -mod nopolicy; -mod ttl; -mod vttl; - -pub use fifo::{FIFOIterator, FIFOPolicy}; -pub use lfu::LFUPolicy; -pub use lru::LRUPolicy; -pub use nopolicy::NoPolicy; -pub use ttl::{TTLElement, TTLIterator, TTLPolicy}; -pub use vttl::{VTTLElement, VTTLPolicy}; diff --git a/src/internal/nopolicy.rs b/src/internal/nopolicy.rs deleted file mode 100644 index d51c25f..0000000 --- a/src/internal/nopolicy.rs +++ /dev/null @@ -1,181 +0,0 @@ -//! The bounded cache, away from any algorithms ... - -use crate::hashedkey::HashedKey; -use hashbrown::raw::RawTable; - -pub struct NoPolicy { - pub table: RawTable<(HashedKey, pyo3::PyObject)>, - pub maxsize: core::num::NonZeroUsize, - pub state: crate::util::CacheState, -} - -impl NoPolicy { - #[inline] - pub fn new(maxsize: usize, mut capacity: usize) -> pyo3::PyResult { - let maxsize = non_zero_or!(maxsize, isize::MAX as usize); - capacity = capacity.min(maxsize.get()); - - Ok(Self { - table: new_table!(capacity)?, - maxsize, - state: crate::util::CacheState::new(), - }) - } - - /// # Safety - /// - /// This method is unsafe because does not checks the maxsize and this - /// may occurred errors and bad situations in future if you don't care about - /// maxsize. - #[inline] - pub unsafe fn insert_unchecked( - &mut self, - key: HashedKey, - value: pyo3::PyObject, - ) -> Option { - match self - .table - .find_or_find_insert_slot(key.hash, |x| x.0 == key, |x| x.0.hash) - { - Ok(bucket) => Some(core::mem::replace(&mut (bucket.as_mut().1), value)), - Err(slot) => { - self.state.change(); - self.table.insert_in_slot(key.hash, slot, (key, value)); - None - } - } - } - - #[inline] - pub fn insert( - &mut self, - key: HashedKey, - value: pyo3::PyObject, - ) -> pyo3::PyResult> { - if self.table.len() >= self.maxsize.get() - && self.table.find(key.hash, |x| x.0 == key).is_none() - { - // There's no algorithm for removing a key-value pair, so we raise PyOverflowError. - return Err(err!( - pyo3::exceptions::PyOverflowError, - "The cache has reached the bound" - )); - } - - Ok(unsafe { self.insert_unchecked(key, value) }) - } - - #[inline] - pub fn get(&self, key: &HashedKey) -> Option<&pyo3::PyObject> { - self.table - .find(key.hash, |x| x.0 == *key) - .map(|bucket| unsafe { &bucket.as_ref().1 }) - } - - #[inline] - pub fn remove(&mut self, key: &HashedKey) -> Option<(HashedKey, pyo3::PyObject)> { - self.state.change(); - self.table.remove_entry(key.hash, |x| x.0 == *key) - } - - #[inline] - pub fn contains_key(&self, key: &HashedKey) -> bool { - self.table.find(key.hash, |x| x.0 == *key).is_some() - } - - #[inline] - pub fn update(&mut self, py: pyo3::Python<'_>, iterable: pyo3::PyObject) -> pyo3::PyResult<()> { - use pyo3::types::{PyAnyMethods, PyDictMethods}; - - if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { - let dict = unsafe { - iterable - .downcast_bound::(py) - .unwrap_unchecked() - }; - - for (key, value) in dict.iter() { - let hk = unsafe { HashedKey::from_pyobject(py, key.unbind()).unwrap_unchecked() }; - self.insert(hk, value.unbind())?; - } - - Ok(()) - } else { - for pair in iterable.bind(py).try_iter()? { - let (key, value) = pair?.extract::<(pyo3::PyObject, pyo3::PyObject)>()?; - - let hk = HashedKey::from_pyobject(py, key)?; - self.insert(hk, value)?; - } - - Ok(()) - } - } - - pub unsafe fn to_pickle( - &self, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { - let mp = pyo3::ffi::PyDict_New(); - - if mp.is_null() { - return Err(pyo3::PyErr::fetch(py)); - } - - for bucket in self.table.iter() { - let (key, val) = bucket.as_ref(); - // SAFETY: we don't need to check error because we sure about key that is hashable. - pyo3::ffi::PyDict_SetItem(mp, key.key.as_ptr(), val.as_ptr()); - } - - let maxsize = pyo3::ffi::PyLong_FromSize_t(self.maxsize.get()); - let capacity = pyo3::ffi::PyLong_FromSize_t(self.table.capacity()); - - tuple!( - py, - 3, - 0 => maxsize, - 1 => mp, - 2 => capacity, - ) - } - - #[allow(clippy::wrong_self_convention)] - pub unsafe fn from_pickle( - &mut self, - py: pyo3::Python<'_>, - state: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<()> { - tuple!(check state, size=3)?; - let (maxsize, iterable, capacity) = extract_pickle_tuple!(py, state); - - let mut new = Self::new(maxsize, capacity)?; - new.update(py, iterable)?; - - *self = new; - Ok(()) - } -} - -impl PartialEq for NoPolicy { - fn eq(&self, other: &Self) -> bool { - if self.maxsize != other.maxsize { - return false; - } - - if self.table.len() != other.table.len() { - return false; - } - - #[allow(unused_unsafe)] - unsafe { - self.table.iter().all(|bucket| { - let (key, value) = bucket.as_ref(); - - other.get(key).map_or(false, |x| pyobject_eq!(value, x)) - }) - } - } -} - -impl Eq for NoPolicy {} diff --git a/src/internal/ttl.rs b/src/internal/ttl.rs deleted file mode 100644 index defd232..0000000 --- a/src/internal/ttl.rs +++ /dev/null @@ -1,427 +0,0 @@ -//! The TTL Policy - -use crate::hashedkey::HashedKey; -use hashbrown::raw::RawTable; -use std::{collections::VecDeque, time}; - -pub struct TTLElement { - pub key: HashedKey, - pub value: pyo3::PyObject, - pub expire: time::SystemTime, -} - -/// see [`FIFOPolicy`](struct@crate::internal::FIFOPolicy) to find out fields -pub struct TTLPolicy { - pub table: RawTable, - pub entries: VecDeque, - pub maxsize: core::num::NonZeroUsize, - pub ttl: time::Duration, - pub n_shifts: usize, - pub state: crate::util::CacheState, -} - -impl TTLPolicy { - #[inline] - pub fn new(maxsize: usize, mut capacity: usize, ttl: f64) -> pyo3::PyResult { - let maxsize = non_zero_or!(maxsize, isize::MAX as usize); - capacity = capacity.min(maxsize.get()); - - Ok(Self { - table: new_table!(capacity)?, - entries: VecDeque::new(), - maxsize, - n_shifts: 0, - ttl: time::Duration::from_secs_f64(ttl), - state: crate::util::CacheState::new(), - }) - } - - #[inline] - fn decrement_indexes(&mut self, start: usize, end: usize) { - if start <= 1 && end == self.entries.len() && self.n_shifts < super::MAX_N_SHIFT { - self.n_shifts += 1; - return; - } - - if (end - start) > self.table.buckets() / 2 { - unsafe { - for bucket in self.table.iter() { - let i = bucket.as_mut(); - if start <= (*i) - self.n_shifts && (*i) - self.n_shifts < end { - *i -= 1; - } - } - } - } else { - let shifted = self.entries.range(start..end); - for (i, entry) in (start..end).zip(shifted) { - #[cfg(debug_assertions)] - let old = self - .table - .get_mut(entry.key.hash, |x| (*x) - self.n_shifts == i) - .expect("index not found"); - - #[cfg(not(debug_assertions))] - let old = unsafe { - self.table - .get_mut(entry.key.hash, |x| (*x) - self.n_shifts == i) - .unwrap_unchecked() - }; - - *old -= 1; - } - } - } - - /// # Safety - /// - /// This method is unsafe because does not checks the maxsize and this - /// may occurred errors and bad situations in future if you don't care about - /// maxsize. - #[inline] - unsafe fn insert_unchecked(&mut self, element: TTLElement) -> Option { - match self.table.find_or_find_insert_slot( - element.key.hash, - |index| element.key == self.entries[(*index) - self.n_shifts].key, - |index| self.entries[(*index) - self.n_shifts].key.hash, - ) { - Ok(bucket) => { - let index = unsafe { bucket.as_ref() }; - let m = &mut self.entries[(*index) - self.n_shifts]; - - m.expire = element.expire; - Some(core::mem::replace(&mut m.value, element.value)) - } - Err(slot) => { - self.state.change(); - - unsafe { - self.table.insert_in_slot( - element.key.hash, - slot, - self.entries.len() + self.n_shifts, - ); - } - - self.entries.push_back(element); - None - } - } - } - - #[inline] - pub fn insert( - &mut self, - key: HashedKey, - value: pyo3::PyObject, - expire: bool, - ) -> Option { - if expire { - self.expire(); - } - - if self.table.len() >= self.maxsize.get() && !self.contains_key(&key) { - self.popitem().unwrap(); - } - - unsafe { - self.insert_unchecked(TTLElement { - key, - value, - expire: time::SystemTime::now() + self.ttl, - }) - } - } - - #[inline] - pub fn expire(&mut self) { - while !self.entries.is_empty() { - if self.entries[0].expire > time::SystemTime::now() { - break; - } - - unsafe { - self.popitem().unwrap_unchecked(); - } - } - } - - #[inline] - pub fn popitem(&mut self) -> Option { - let ret = self.entries.pop_front()?; - self.state.change(); - - #[cfg(debug_assertions)] - self.table - .remove_entry(ret.key.hash, |index| (*index) - self.n_shifts == 0) - .expect("popitem key not found."); - - #[cfg(not(debug_assertions))] - unsafe { - self.table - .remove_entry(ret.key.hash, |index| (*index) - self.n_shifts == 0) - .unwrap_unchecked(); - } - - self.decrement_indexes(1, self.entries.len()); - Some(ret) - } - - #[inline] - pub fn contains_key(&self, key: &HashedKey) -> bool { - match self - .table - .find(key.hash, |x| &self.entries[(*x) - self.n_shifts].key == key) - .map(|x| unsafe { x.as_ref() }) - { - Some(index) => self.entries[(*index) - self.n_shifts].expire > time::SystemTime::now(), - None => false, - } - } - - #[inline] - pub fn get(&self, key: &HashedKey) -> Option<&TTLElement> { - match self - .table - .find(key.hash, |x| &self.entries[(*x) - self.n_shifts].key == key) - .map(|bucket| unsafe { bucket.as_ref() }) - { - Some(index) => { - let m = &self.entries[(*index) - self.n_shifts]; - if m.expire > time::SystemTime::now() { - Some(m) - } else { - None - } - } - None => None, - } - } - - #[inline] - pub fn remove(&mut self, key: &HashedKey) -> Option { - match self - .table - .remove_entry(key.hash, |x| key == &self.entries[(*x) - self.n_shifts].key) - .map(|x| x - self.n_shifts) - { - Some(index) => { - self.decrement_indexes(index + 1, self.entries.len()); - self.state.change(); - - let m = self.entries.remove(index).unwrap(); - - if m.expire > time::SystemTime::now() { - Some(m) - } else { - None - } - } - None => None, - } - } - - #[inline] - pub fn update(&mut self, py: pyo3::Python<'_>, iterable: pyo3::PyObject) -> pyo3::PyResult<()> { - use pyo3::types::{PyAnyMethods, PyDictMethods, PyIterator}; - - self.expire(); - - if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { - let dict = unsafe { - iterable - .downcast_bound::(py) - .unwrap_unchecked() - }; - - for (key, value) in dict.iter() { - let hk = unsafe { HashedKey::from_pyobject(py, key.unbind()).unwrap_unchecked() }; - self.insert(hk, value.unbind(), false); - } - - Ok(()) - } else { - let iterator = PyIterator::from_object(iterable.bind(py))?; - - for pair in iterator { - let (key, value) = pair?.extract::<(pyo3::PyObject, pyo3::PyObject)>()?; - - let hk = HashedKey::from_pyobject(py, key)?; - self.insert(hk, value, false); - } - - Ok(()) - } - } - - #[inline(always)] - pub fn as_ptr(&self) -> TTLIterator { - let (a, b) = self.entries.as_slices(); - - TTLIterator { - first: crate::util::NoLifetimeSliceIter { - slice: a.as_ptr(), - index: 0, - len: a.len(), - }, - second: crate::util::NoLifetimeSliceIter { - slice: b.as_ptr(), - index: 0, - len: b.len(), - }, - } - } - - #[inline(always)] - pub fn shrink_to_fit(&mut self) { - self.expire(); - - self.entries.shrink_to_fit(); - self.table - .shrink_to(0, |x| self.entries[(*x) - self.n_shifts].key.hash); - self.state.change(); - } - - #[inline] - pub unsafe fn to_pickle( - &self, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { - let list = pyo3::ffi::PyList_New(0); - if list.is_null() { - return Err(pyo3::PyErr::fetch(py)); - } - - for element in self.entries.iter() { - let tp = tuple!( - py, - 3, - 0 => element.key.key.clone_ref(py).as_ptr(), - 1 => element.value.clone_ref(py).as_ptr(), - 2 => pyo3::ffi::PyFloat_FromDouble( - element.expire.duration_since(time::UNIX_EPOCH).unwrap_unchecked().as_secs_f64() - ), - ); - - if let Err(x) = tp { - pyo3::ffi::Py_DECREF(list); - return Err(x); - } - - if pyo3::ffi::PyList_Append(list, tp.unwrap_unchecked()) == -1 { - pyo3::ffi::Py_DECREF(list); - return Err(pyo3::PyErr::fetch(py)); - } - } - - let maxsize = pyo3::ffi::PyLong_FromSize_t(self.maxsize.get()); - let capacity = pyo3::ffi::PyLong_FromSize_t(self.table.capacity()); - let ttl = pyo3::ffi::PyFloat_FromDouble(self.ttl.as_secs_f64()); - - tuple!( - py, - 4, - 0 => maxsize, - 1 => list, - 2 => capacity, - 3 => ttl, - ) - } - - #[allow(clippy::wrong_self_convention)] - #[inline] - pub unsafe fn from_pickle( - &mut self, - py: pyo3::Python<'_>, - state: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<()> { - use pyo3::types::PyAnyMethods; - - tuple!(check state, size=4)?; - let (maxsize, iterable, capacity) = extract_pickle_tuple!(py, state); - - // SAFETY: we check `iterable` type in `extract_pickle_tuple` macro - if maxsize < (pyo3::ffi::PyObject_Size(iterable.as_ptr()) as usize) { - return Err(err!( - pyo3::exceptions::PyValueError, - "the iterable object size is more than maxsize!" - )); - } - - let ttl = { - let obj = pyo3::ffi::PyTuple_GetItem(state, 3); - pyo3::ffi::PyFloat_AsDouble(obj) - }; - - let mut new = Self::new(maxsize, capacity, ttl)?; - - for pair in iterable.bind(py).try_iter()? { - let (key, value, timestamp) = - pair?.extract::<(pyo3::PyObject, pyo3::PyObject, f64)>()?; - - let hk = HashedKey::from_pyobject(py, key)?; - - // SAFETY: we don't need to check maxsize, we sure `len(iterable) <= maxsize` - new.insert_unchecked(TTLElement { - key: hk, - value, - expire: time::UNIX_EPOCH + time::Duration::from_secs_f64(timestamp), - }); - } - - new.shrink_to_fit(); - - *self = new; - Ok(()) - } -} - -impl PartialEq for TTLPolicy { - fn eq(&self, other: &Self) -> bool { - if self.maxsize != other.maxsize || self.ttl != other.ttl { - return false; - } - - if self.entries.len() != other.entries.len() { - return false; - } - - for index in 0..self.entries.len() { - let element1 = &self.entries[index]; - let element2 = &other.entries[index]; - - if element1.key.hash != element2.key.hash - || !pyobject_eq!(element1.key.key, element2.key.key) - || !pyobject_eq!(element1.value, element2.value) - { - return false; - } - } - - true - } -} - -impl Eq for TTLPolicy {} - -pub struct TTLIterator { - pub first: crate::util::NoLifetimeSliceIter, - pub second: crate::util::NoLifetimeSliceIter, -} - -impl Iterator for TTLIterator { - type Item = *const TTLElement; - - fn next(&mut self) -> Option { - match self.first.next() { - Some(val) => Some(val), - None => { - core::mem::swap(&mut self.first, &mut self.second); - self.first.next() - } - } - } -} - -unsafe impl Send for TTLIterator {} -unsafe impl Sync for TTLIterator {} diff --git a/src/internal/vttl.rs b/src/internal/vttl.rs deleted file mode 100644 index 7310a69..0000000 --- a/src/internal/vttl.rs +++ /dev/null @@ -1,452 +0,0 @@ -//! The VTTL Policy - -use crate::hashedkey::HashedKey; -use crate::sorted_heap; -use hashbrown::raw::RawTable; -use std::ptr::NonNull; -use std::time; - -pub struct VTTLElement { - pub key: HashedKey, - pub value: pyo3::PyObject, - pub expire_at: Option, -} - -impl VTTLElement { - #[inline] - pub fn new(key: HashedKey, value: pyo3::PyObject, ttl: Option) -> Self { - Self { - key, - value, - expire_at: ttl - .map(|secs| time::SystemTime::now() + time::Duration::from_secs_f64(secs)), - } - } - - #[inline] - pub fn reset(&mut self, value: pyo3::PyObject, ttl: Option) -> pyo3::PyObject { - self.expire_at = - ttl.map(|secs| time::SystemTime::now() + time::Duration::from_secs_f64(secs)); - core::mem::replace(&mut self.value, value) - } - - #[inline] - pub fn expired(&self) -> bool { - self.expire_at - .filter(|x| std::time::SystemTime::now() >= *x) - .is_some() - } - - #[inline] - pub fn or_none(self) -> Option { - if self.expired() { - None - } else { - Some(self) - } - } - - #[inline] - pub fn or_none_ref(&self) -> Option<&Self> { - if self.expired() { - None - } else { - Some(self) - } - } -} - -pub struct VTTLPolicy { - pub table: RawTable>>, - pub heap: sorted_heap::SortedHeap, - pub maxsize: core::num::NonZeroUsize, - pub state: crate::util::CacheState, -} - -macro_rules! compare_fn { - () => { - |a, b| { - if a.expire_at.is_none() && b.expire_at.is_none() { - return std::cmp::Ordering::Equal; - } - if b.expire_at.is_none() { - return std::cmp::Ordering::Less; - } - if b.expire_at.is_none() { - return std::cmp::Ordering::Greater; - } - a.expire_at.cmp(&b.expire_at) - } - }; -} - -impl VTTLPolicy { - #[inline] - pub fn new(maxsize: usize, mut capacity: usize) -> pyo3::PyResult { - let maxsize = non_zero_or!(maxsize, isize::MAX as usize); - capacity = capacity.min(maxsize.get()); - - Ok(Self { - table: new_table!(capacity)?, - heap: sorted_heap::SortedHeap::new(), - maxsize, - state: crate::util::CacheState::new(), - }) - } - - #[inline(always)] - pub fn expire(&mut self) { - self.heap.sort(compare_fn!()); - - while let Some(x) = self.heap.0.first() { - unsafe { - if !(*x.as_ptr()).as_ref().expired() { - break; - } - - self.table - .remove_entry((*x.as_ptr()).as_ref().key.hash, |node| node == x) - .unwrap(); - - self.heap.pop_front(compare_fn!()); - self.state.change(); - } - } - } - - /// # Safety - /// - /// This method is unsafe because does not checks the maxsize and this - /// may occurred errors and bad situations in future if you don't care about - /// maxsize. - #[inline] - unsafe fn insert_unchecked( - &mut self, - key: HashedKey, - value: pyo3::PyObject, - ttl: Option, - ) -> Option { - match self.table.find_or_find_insert_slot( - key.hash, - |node| (*node.as_ptr()).as_ref().key == key, - |node| (*node.as_ptr()).as_ref().key.hash, - ) { - Ok(bucket) => { - let node = bucket.as_mut(); - - let oldval = (*node.as_ptr()).as_mut().reset(value, ttl); - self.heap.1 = false; - - Some(oldval) - } - Err(slot) => { - self.state.change(); - - // copy key hash - let hash = key.hash; - - let node = self.heap.push(VTTLElement::new(key, value, ttl)); - unsafe { - self.table.insert_in_slot(hash, slot, node); - } - - self.heap.1 = false; - - None - } - } - } - - #[inline] - pub fn insert( - &mut self, - key: HashedKey, - value: pyo3::PyObject, - ttl: Option, - expire: bool, - ) -> Option { - if expire { - self.expire(); - } - - if self.table.len() >= self.maxsize.get() - && self - .table - .find(key.hash, |node| unsafe { - (*node.as_ptr()).as_ref().key == key - }) - .is_none() - { - self.popitem().unwrap(); - } - - unsafe { self.insert_unchecked(key, value, ttl) } - } - - #[inline] - pub fn popitem(&mut self) -> Option { - self.expire(); - - let first = self.heap.0.first()?; - self.state.change(); - - unsafe { - self.table - .remove_entry((*first.as_ptr()).as_ref().key.hash, |node| { - core::ptr::eq(node.as_ptr(), first.as_ptr()) - }) - .expect("popitem key not found."); - } - - Some(self.heap.pop_front(compare_fn!()).unwrap()) - } - - #[inline] - pub fn get(&self, key: &HashedKey) -> Option<&VTTLElement> { - match unsafe { - self.table - .find(key.hash, |node| (*node.as_ptr()).as_ref().key == *key) - } { - Some(bucket) => unsafe { - let node = bucket.as_ref(); - - let element = (*node.as_ptr()).as_ref(); - element.or_none_ref() - }, - None => None, - } - } - - #[inline] - pub fn remove(&mut self, key: &HashedKey) -> Option { - match unsafe { - self.table - .remove_entry(key.hash, |node| (*node.as_ptr()).as_ref().key == *key) - } { - Some(node) => { - self.state.change(); - let element = self.heap.remove(node, compare_fn!()); - element.or_none() - } - None => None, - } - } - - #[inline] - pub fn contains_key(&self, key: &HashedKey) -> bool { - unsafe { - self.table - .get(key.hash, |node| (*node.as_ptr()).as_ref().key == *key) - .filter(|node| !(*node.as_ptr()).as_ref().expired()) - .is_some() - } - } - - #[inline] - pub fn update( - &mut self, - py: pyo3::Python<'_>, - iterable: pyo3::PyObject, - ttl: Option, - ) -> pyo3::PyResult<()> { - use pyo3::types::{PyAnyMethods, PyDictMethods}; - - self.expire(); - - if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { - let dict = unsafe { - iterable - .downcast_bound::(py) - .unwrap_unchecked() - }; - - for (key, value) in dict.iter() { - let hk = unsafe { HashedKey::from_pyobject(py, key.unbind()).unwrap_unchecked() }; - self.insert(hk, value.unbind(), ttl, false); - } - - Ok(()) - } else { - for pair in iterable.bind(py).try_iter()? { - let (key, value) = pair?.extract::<(pyo3::PyObject, pyo3::PyObject)>()?; - - let hk = HashedKey::from_pyobject(py, key)?; - self.insert(hk, value, ttl, false); - } - - Ok(()) - } - } - - #[inline(always)] - pub fn shrink_to_fit(&mut self) { - self.table - .shrink_to(0, |node| unsafe { (*node.as_ptr()).as_ref().key.hash }); - self.heap.0.shrink_to_fit(); - self.state.change(); - } - - pub fn iter(&mut self) -> sorted_heap::Iter { - self.heap.sort(compare_fn!()); - self.heap.iter() - } - - #[allow(clippy::wrong_self_convention)] - #[inline] - pub unsafe fn to_pickle( - &mut self, - py: pyo3::Python<'_>, - ) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { - self.expire(); - - let list = pyo3::ffi::PyList_New(0); - if list.is_null() { - return Err(pyo3::PyErr::fetch(py)); - } - - for ptr in self.heap.iter() { - let node = &(*ptr.as_ptr()); - - let ttlobject = - pyo3::ffi::PyLong_FromDouble(node.as_ref().expire_at.map_or(0.0, |x| { - x.duration_since(time::UNIX_EPOCH).unwrap().as_secs_f64() - })); - if ttlobject.is_null() { - pyo3::ffi::Py_DECREF(list); - return Err(pyo3::PyErr::fetch(py)); - } - - let tp = tuple!( - py, - 3, - 0 => node.as_ref().key.key.clone_ref(py).as_ptr(), - 1 => node.as_ref().value.clone_ref(py).as_ptr(), - 2 => ttlobject, - ); - - if let Err(x) = tp { - pyo3::ffi::Py_DECREF(list); - return Err(x); - } - - if pyo3::ffi::PyList_Append(list, tp.unwrap_unchecked()) == -1 { - pyo3::ffi::Py_DECREF(list); - return Err(pyo3::PyErr::fetch(py)); - } - } - - let maxsize = pyo3::ffi::PyLong_FromSize_t(self.maxsize.get()); - let capacity = pyo3::ffi::PyLong_FromSize_t(self.table.capacity()); - - tuple!( - py, - 3, - 0 => maxsize, - 1 => list, - 2 => capacity, - ) - } - - #[allow(clippy::wrong_self_convention)] - #[inline] - pub unsafe fn from_pickle( - &mut self, - py: pyo3::Python<'_>, - state: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<()> { - use pyo3::types::PyAnyMethods; - - tuple!(check state, size=3)?; - let (maxsize, iterable, capacity) = extract_pickle_tuple!(py, state); - - // SAFETY: we check `iterable` type in `extract_pickle_tuple` macro - if maxsize < (pyo3::ffi::PyObject_Size(iterable.as_ptr()) as usize) { - return Err(err!( - pyo3::exceptions::PyValueError, - "iterable object size is greater than maxsize" - )); - } - - let mut new = Self::new(maxsize, capacity)?; - - for pair in iterable.bind(py).try_iter()? { - let (key, value, timestamp) = - pair?.extract::<(pyo3::PyObject, pyo3::PyObject, f64)>()?; - - let hk = HashedKey::from_pyobject(py, key)?; - - let ttl = { - if timestamp == 0.0 { - None - } else { - let now = time::SystemTime::now(); - let as_system_time = - time::UNIX_EPOCH + time::Duration::from_secs_f64(timestamp); - - if now >= as_system_time { - // key is expired - continue; - } - - Some(as_system_time.duration_since(now).unwrap().as_secs_f64()) - } - }; - - // SAFETY: we don't need to check maxsize, we sure `len(iterable) <= maxsize` - new.insert_unchecked(hk, value, ttl); - } - - *self = new; - Ok(()) - } -} - -impl PartialEq for VTTLPolicy { - fn eq(&self, other: &Self) -> bool { - if self.maxsize != other.maxsize { - return false; - } - - if self.heap.len() != other.heap.len() { - return false; - } - - unsafe { - for bucket in self.table.iter() { - let node1 = bucket.as_ref(); - - let node2 = other.table.get((*node1.as_ptr()).as_ref().key.hash, |x| { - (*x.as_ptr()).as_ref().key == (*node1.as_ptr()).as_ref().key - }); - if node2.is_none() { - return false; - } - - let node2 = node2.unwrap_unchecked(); - - if (*node1.as_ptr()).as_ref().key.hash != (*node2.as_ptr()).as_ref().key.hash - || !pyobject_eq!( - (*node1.as_ptr()).as_ref().key.key, - (*node2.as_ptr()).as_ref().key.key - ) - || !pyobject_eq!( - (*node1.as_ptr()).as_ref().value, - (*node2.as_ptr()).as_ref().value - ) - { - return false; - } - } - } - - true - } -} - -impl Eq for VTTLPolicy {} - -// because we use it in Mutex -unsafe impl Sync for VTTLPolicy {} - -// because we use it in Mutex -unsafe impl Send for VTTLPolicy {} diff --git a/src/lib.rs b/src/lib.rs index 91dfa37..f61e5b1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,66 +1,11 @@ use pyo3::prelude::*; -#[macro_use] -mod util; -mod bridge; -mod hashedkey; -mod internal; -mod linked_list; -mod mutex; -mod sorted_heap; - -const PYOBJECT_SIZE: usize = core::mem::size_of::(); -const HASHEDKEY_SIZE: usize = core::mem::size_of::(); - -const CACHEBOX_VERSION: &str = env!("CARGO_PKG_VERSION"); - -pub fn version_info() -> (u8, u8, u8, bool) { - let mut t: (u8, u8, u8, bool) = (0, 0, 0, false); - - for (index, mut sub) in CACHEBOX_VERSION.splitn(3, '.').enumerate() { - if index == 2 { - // -alpha, -beta, ... - if let Some(x) = sub.find('-') { - t.3 = true; - sub = &sub[..x]; - } - } - - match index { - 0 => t.0 = sub.parse().unwrap(), - 1 => t.1 = sub.parse().unwrap(), - 2 => t.2 = sub.parse().unwrap(), - _ => unsafe { std::hint::unreachable_unchecked() }, - } - } - - t -} - /// cachebox core ( written in Rust ) -#[pymodule] -#[pyo3(name = "_cachebox")] -fn _cachebox(m: &Bound<'_, PyModule>) -> PyResult<()> { - m.gil_used(false)?; - - m.add("__version__", CACHEBOX_VERSION)?; - m.add("version_info", version_info())?; - m.add("__author__", "awolverp")?; - - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - +#[pymodule(gil_used = false)] +#[cold] +fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add("__author__", env!("CARGO_PKG_AUTHORS"))?; + m.add("__version__", env!("CARGO_PKG_VERSION"))?; + Ok(()) } diff --git a/src/linked_list.rs b/src/linked_list.rs deleted file mode 100644 index 1cf893e..0000000 --- a/src/linked_list.rs +++ /dev/null @@ -1,193 +0,0 @@ -use crate::hashedkey::HashedKey; -use std::ptr::NonNull; - -pub struct LinkedList { - pub head: Option>, // front - pub tail: Option>, // back - len: usize, -} - -pub struct Node { - pub prev: Option>, - pub next: Option>, - pub element: (HashedKey, pyo3::PyObject), -} - -impl LinkedList { - #[inline] - pub fn new() -> Self { - Self { - head: None, - tail: None, - len: 0, - } - } - - #[inline] - pub fn len(&self) -> usize { - self.len - } - - pub fn push_back(&mut self, key: HashedKey, val: pyo3::PyObject) -> NonNull { - unsafe { - let node = NonNull::new_unchecked(Box::into_raw(Box::new(Node { - prev: None, - next: None, - element: (key, val), - }))); - - if let Some(old) = self.tail { - (*old.as_ptr()).next = Some(node); - (*node.as_ptr()).prev = Some(old); - } else { - // means list is empty, so this node is also can be the front of list - debug_assert!(self.head.is_none(), "head is not None"); - self.head = Some(node); - } - - self.tail = Some(node); - self.len += 1; - node - } - } - - pub fn pop_front(&mut self) -> Option<(HashedKey, pyo3::PyObject)> { - unsafe { - self.head.map(|node| { - let boxed_node = Box::from_raw(node.as_ptr()); - debug_assert!(boxed_node.prev.is_none(), "head.prev is not None"); - - self.head = boxed_node.next; - - match self.head { - None => self.tail = None, - // Not creating new mutable (unique!) references overlapping `element`. - Some(head) => (*head.as_ptr()).prev = None, - } - - debug_assert!(self.len > 0, "self.len is zero"); - self.len -= 1; - boxed_node.element - }) - } - } - - #[inline] - pub fn clear(&mut self) { - while self.pop_front().is_some() {} - } - - pub unsafe fn remove(&mut self, node: NonNull) -> (HashedKey, pyo3::PyObject) { - let node = Box::from_raw(node.as_ptr()); - let result = node.element; - - match node.next { - Some(next) => (*next.as_ptr()).prev = node.prev, - None => { - // Means this node is our self.tail - self.tail = node.prev; - } - } - - match node.prev { - Some(prev) => (*prev.as_ptr()).next = node.next, - None => { - // Means this node is our self.head - self.head = node.next; - } - } - - self.len -= 1; - result - } - - pub unsafe fn move_back(&mut self, node: NonNull) { - if (*node.as_ptr()).next.is_none() { - // Means this node is our self.tail - return; - } - - // unlink - match (*node.as_ptr()).next { - Some(next) => (*next.as_ptr()).prev = (*node.as_ptr()).prev, - None => std::hint::unreachable_unchecked(), - } - - match (*node.as_ptr()).prev { - Some(prev) => (*prev.as_ptr()).next = (*node.as_ptr()).next, - None => { - // Means this node is our self.head - self.head = (*node.as_ptr()).next; - } - } - - (*node.as_ptr()).next = None; - (*node.as_ptr()).prev = None; - - // push_back again - if let Some(old) = self.tail { - (*old.as_ptr()).next = Some(node); - (*node.as_ptr()).prev = Some(old); - } else { - // means list is empty, so this node is also can be the front of list - debug_assert!(self.head.is_none(), "head is not None"); - self.head = Some(node); - } - - self.tail = Some(node); - } - - #[inline] - pub fn iter(&self) -> Iter { - Iter { - head: self.head, - len: self.len, - } - } -} - -pub struct Iter { - head: Option>, - len: usize, -} - -impl Iterator for Iter { - type Item = NonNull; - - #[inline] - fn next(&mut self) -> Option { - if self.len == 0 { - None - } else { - self.head.inspect(|node| unsafe { - self.len -= 1; - self.head = (*node.as_ptr()).next; - }) - } - } -} - -impl Drop for LinkedList { - fn drop(&mut self) { - struct DropGuard<'a>(&'a mut LinkedList); - - impl<'a> Drop for DropGuard<'a> { - fn drop(&mut self) { - // Continue the same loop we do below. This only runs when a destructor has - // panicked. If another one panics this will abort. - while self.0.pop_front().is_some() {} - } - } - - // Wrap self so that if a destructor panics, we can try to keep looping - let guard = DropGuard(self); - while guard.0.pop_front().is_some() {} - core::mem::forget(guard); - } -} - -// because we use it in Mutex -unsafe impl Sync for Iter {} - -// because we use it in Mutex -unsafe impl Send for Iter {} diff --git a/src/mutex.rs b/src/mutex.rs deleted file mode 100644 index e218522..0000000 --- a/src/mutex.rs +++ /dev/null @@ -1,173 +0,0 @@ -//! Mutex lock -//! -//! Thanks to `Amanieu d'Antras` for this beautiful implementation. - -use core::sync::atomic::{AtomicU8, Ordering}; -use parking_lot_core::deadlock; -use parking_lot_core::{self, ParkResult, SpinWait, UnparkResult, UnparkToken, DEFAULT_PARK_TOKEN}; -use std::time::Instant; - -const TOKEN_NORMAL: UnparkToken = UnparkToken(0); -const TOKEN_HANDOFF: UnparkToken = UnparkToken(1); - -const LOCKED_BIT: u8 = 0b01; -const PARKED_BIT: u8 = 0b10; - -pub struct RawMutex { - state: AtomicU8, -} - -unsafe impl lock_api::RawMutex for RawMutex { - #[allow(clippy::declare_interior_mutable_const)] - const INIT: RawMutex = RawMutex { - state: AtomicU8::new(0), - }; - - type GuardMarker = lock_api::GuardSend; - - #[inline] - fn lock(&self) { - if self - .state - .compare_exchange_weak(0, LOCKED_BIT, Ordering::Acquire, Ordering::Relaxed) - .is_err() - { - self.lock_slow(None); - } - unsafe { deadlock::acquire_resource(self as *const _ as usize) }; - } - - #[inline] - fn try_lock(&self) -> bool { - let mut state = self.state.load(Ordering::Relaxed); - loop { - if state & LOCKED_BIT != 0 { - return false; - } - match self.state.compare_exchange_weak( - state, - state | LOCKED_BIT, - Ordering::Acquire, - Ordering::Relaxed, - ) { - Ok(_) => { - unsafe { deadlock::acquire_resource(self as *const _ as usize) }; - return true; - } - Err(x) => state = x, - } - } - } - - #[inline] - unsafe fn unlock(&self) { - deadlock::release_resource(self as *const _ as usize); - if self - .state - .compare_exchange(LOCKED_BIT, 0, Ordering::Release, Ordering::Relaxed) - .is_ok() - { - return; - } - self.unlock_slow(false); - } - - #[inline] - fn is_locked(&self) -> bool { - let state = self.state.load(Ordering::Relaxed); - state & LOCKED_BIT != 0 - } -} - -impl RawMutex { - #[cold] - fn lock_slow(&self, timeout: Option) -> bool { - let mut spinwait = SpinWait::new(); - let mut state = self.state.load(Ordering::Relaxed); - loop { - if state & LOCKED_BIT == 0 { - match self.state.compare_exchange_weak( - state, - state | LOCKED_BIT, - Ordering::Acquire, - Ordering::Relaxed, - ) { - Ok(_) => return true, - Err(x) => state = x, - } - continue; - } - - if state & PARKED_BIT == 0 && spinwait.spin() { - state = self.state.load(Ordering::Relaxed); - continue; - } - - if state & PARKED_BIT == 0 { - if let Err(x) = self.state.compare_exchange_weak( - state, - state | PARKED_BIT, - Ordering::Relaxed, - Ordering::Relaxed, - ) { - state = x; - continue; - } - } - - let addr = self as *const _ as usize; - let validate = || self.state.load(Ordering::Relaxed) == LOCKED_BIT | PARKED_BIT; - let before_sleep = || {}; - let timed_out = |_, was_last_thread| { - if was_last_thread { - self.state.fetch_and(!PARKED_BIT, Ordering::Relaxed); - } - }; - - match unsafe { - parking_lot_core::park( - addr, - validate, - before_sleep, - timed_out, - DEFAULT_PARK_TOKEN, - timeout, - ) - } { - ParkResult::Unparked(TOKEN_HANDOFF) => return true, - ParkResult::Unparked(_) => (), - ParkResult::Invalid => (), - ParkResult::TimedOut => return false, - } - - spinwait.reset(); - state = self.state.load(Ordering::Relaxed); - } - } - - #[cold] - fn unlock_slow(&self, force_fair: bool) { - let addr = self as *const _ as usize; - let callback = |result: UnparkResult| { - if result.unparked_threads != 0 && (force_fair || result.be_fair) { - if !result.have_more_threads { - self.state.store(LOCKED_BIT, Ordering::Relaxed); - } - return TOKEN_HANDOFF; - } - - if result.have_more_threads { - self.state.store(PARKED_BIT, Ordering::Release); - } else { - self.state.store(0, Ordering::Release); - } - TOKEN_NORMAL - }; - - unsafe { - parking_lot_core::unpark_one(addr, callback); - } - } -} - -pub type Mutex = lock_api::Mutex; diff --git a/src/sorted_heap.rs b/src/sorted_heap.rs deleted file mode 100644 index 4c943f0..0000000 --- a/src/sorted_heap.rs +++ /dev/null @@ -1,177 +0,0 @@ -use std::ptr::NonNull; - -pub struct SortedHeap(pub Vec>>, pub bool); - -pub struct Entry(T); - -pub struct Iter { - slice: *const NonNull>, - index: usize, - len: usize, -} - -impl SortedHeap { - #[inline] - pub fn new() -> Self { - Self(Vec::new(), true) - } - - #[inline] - pub fn len(&self) -> usize { - self.0.len() - } - - #[inline] - pub fn capacity(&self) -> usize { - self.0.capacity() - } - - pub fn push(&mut self, value: T) -> NonNull> { - unsafe { - let node = NonNull::new_unchecked(Box::into_raw(Box::new(Entry(value)))); - - self.0.push(node); - self.1 = false; - - node - } - } - - #[inline] - pub fn sort(&mut self, mut compare: F) - where - F: FnMut(&T, &T) -> std::cmp::Ordering, - { - if !self.1 { - if self.0.len() > 1 { - unsafe { - self.0 - .sort_by(|a, b| compare(&(*a.as_ptr()).0, &(*b.as_ptr()).0)); - } - } - - self.1 = true; - } - } - - #[inline] - fn unlink_first(&mut self) -> Option { - if self.0.is_empty() { - return None; - } - - let node = self.0.remove(0); - let boxed_node = unsafe { Box::from_raw(node.as_ptr()) }; - Some(boxed_node.0) - } - - pub fn pop_front(&mut self, compare: F) -> Option - where - F: FnMut(&T, &T) -> std::cmp::Ordering, - { - self.sort(compare); - self.unlink_first() - } - - #[inline] - fn unlink_last(&mut self) -> Option { - let node = self.0.pop()?; - let boxed_node = unsafe { Box::from_raw(node.as_ptr()) }; - Some(boxed_node.0) - } - - pub fn pop_back(&mut self, compare: F) -> Option - where - F: FnMut(&T, &T) -> std::cmp::Ordering, - { - self.sort(compare); - self.unlink_last() - } - - #[inline] - pub fn get(&self, index: usize) -> Option<&NonNull>> { - self.0.get(index) - } - - pub fn remove(&mut self, node: NonNull>, compare: F) -> T - where - F: FnMut(&T, &T) -> std::cmp::Ordering, - { - debug_assert!(!self.0.is_empty()); - - if self.0.len() == 1 { - return self.pop_back(compare).unwrap(); - } - - self.sort(compare); - - let index = self.0.iter().position(|x| node == *x).unwrap(); - - let node = self.0.remove(index); - let boxed_node = unsafe { Box::from_raw(node.as_ptr()) }; - boxed_node.0 - } - - pub fn iter(&self) -> Iter { - Iter { - slice: self.0.as_ptr(), - index: 0, - len: self.0.len(), - } - } - - pub fn clear(&mut self) { - while self.unlink_last().is_some() {} - } -} - -impl Drop for SortedHeap { - fn drop(&mut self) { - struct DropGuard<'a, T>(&'a mut SortedHeap); - - impl<'a, T> Drop for DropGuard<'a, T> { - fn drop(&mut self) { - // Continue the same loop we do below. This only runs when a destructor has - // panicked. If another one panics this will abort. - while self.0.unlink_last().is_some() {} - } - } - - // Wrap self so that if a destructor panics, we can try to keep looping - let guard = DropGuard(self); - while guard.0.unlink_last().is_some() {} - core::mem::forget(guard); - } -} - -impl AsRef for Entry { - fn as_ref(&self) -> &T { - &self.0 - } -} - -impl AsMut for Entry { - fn as_mut(&mut self) -> &mut T { - &mut self.0 - } -} - -impl Iterator for Iter { - type Item = NonNull>; - - fn next(&mut self) -> Option { - if self.index == self.len { - None - } else { - let value = unsafe { self.slice.add(self.index) }; - self.index += 1; - Some(unsafe { *value }) - } - } -} - -// because we use it in Mutex -unsafe impl Sync for Iter {} - -// because we use it in Mutex -unsafe impl Send for Iter {} diff --git a/src/util.rs b/src/util.rs deleted file mode 100644 index daf99ec..0000000 --- a/src/util.rs +++ /dev/null @@ -1,253 +0,0 @@ -#[allow(unused_imports)] -use pyo3::IntoPyObject; - -macro_rules! err { - ($type:ty, $val:expr) => { - ::pyo3::PyErr::new::<$type, _>($val) - }; -} - -#[rustfmt::skip] -macro_rules! non_zero_or { - ($num:expr, $_else:expr) => { - unsafe { - core::num::NonZeroUsize::new_unchecked( - if $num == 0 { $_else } else { $num } - ) - } - }; -} - -macro_rules! new_table { - ($capacity:expr) => {{ - if $capacity > 0 { - hashbrown::raw::RawTable::try_with_capacity($capacity) - .map_err(|_| err!(pyo3::exceptions::PyMemoryError, ())) - } else { - Ok(hashbrown::raw::RawTable::new()) - } - }}; -} - -macro_rules! tuple { - ( - $py:expr, - $len:expr, - $($index:expr => $value:expr,)+ - ) => {{ - let tuple = unsafe { pyo3::ffi::PyTuple_New($len) }; - if tuple.is_null() { - Err(pyo3::PyErr::fetch($py)) - } else { - unsafe { - $( - pyo3::ffi::PyTuple_SetItem(tuple, $index, $value); - )+ - } - - Ok(tuple) - } - }}; - - (check $tuple:expr, size=$size:expr) => {{ - if unsafe { pyo3::ffi::PyTuple_CheckExact($tuple) } == 0 { - Err(err!(pyo3::exceptions::PyTypeError, "expected tuple, but got another type")) - } else if unsafe {pyo3::ffi::PyTuple_Size($tuple)} != $size { - Err(err!(pyo3::exceptions::PyTypeError, "tuple size is invalid")) - } else { - Ok(()) - } - }} -} - -macro_rules! extract_pickle_tuple { - ($py:expr, $state:expr) => {{ - let maxsize = { - let obj = pyo3::ffi::PyTuple_GetItem($state, 0); - pyo3::ffi::PyLong_AsSize_t(obj) - }; - - if let Some(e) = pyo3::PyErr::take($py) { - return Err(e); - } - - let iterable = { - let obj = pyo3::ffi::PyTuple_GetItem($state, 1); - - if pyo3::ffi::PyDict_CheckExact(obj) != 1 && pyo3::ffi::PyList_CheckExact(obj) != 1 { - return Err(err!( - pyo3::exceptions::PyTypeError, - "the iterable object is not an dict or list" - )); - } - - // Tuple returns borrowed references - pyo3::PyObject::from_borrowed_ptr($py, obj) - }; - - let capacity = { - let obj = pyo3::ffi::PyTuple_GetItem($state, 2); - pyo3::ffi::PyLong_AsSize_t(obj) - }; - - if let Some(e) = pyo3::PyErr::take($py) { - return Err(e); - } - - (maxsize, iterable, capacity) - }}; -} - -macro_rules! pyobject_eq { - ($arg1:expr, $arg2:expr) => { - if $arg1.as_ptr() == $arg2.as_ptr() { - true - } else { - #[allow(unused_unsafe)] - unsafe { - let cmp = pyo3::ffi::PyObject_RichCompare( - $arg1.as_ptr(), - $arg2.as_ptr(), - pyo3::ffi::Py_EQ, - ); - - if cmp.is_null() { - pyo3::ffi::PyErr_Clear(); - false - } else { - let boolean = pyo3::ffi::PyObject_IsTrue(cmp); - pyo3::ffi::Py_DECREF(cmp); - - if boolean == -1 { - pyo3::ffi::PyErr_Clear(); - false - } else { - boolean == 1 - } - } - } - } - }; -} - -pub struct CacheState(usize); - -impl CacheState { - pub fn new() -> Self { - Self(0) - } - pub fn change(&mut self) { - self.0 = self.0.wrapping_add(1); - } - pub fn get(&self) -> usize { - self.0 - } -} - -unsafe fn _get_state(py: pyo3::Python<'_>, ptr: *mut pyo3::ffi::PyObject) -> pyo3::PyResult { - unsafe fn inner( - py: pyo3::Python<'_>, - ptr: *mut pyo3::ffi::PyObject, - ) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { - cfg_if::cfg_if! { - if #[cfg(all(Py_3_9, not(any(Py_LIMITED_API, PyPy, GraalPy))))] { - let m_name: pyo3::Bound<'_, pyo3::types::PyString> = "_state".into_pyobject(py)?; - Ok(pyo3::ffi::PyObject_CallMethodNoArgs(ptr, m_name.as_ptr())) - } else { - let state_fn = - pyo3::ffi::PyObject_GetAttrString(ptr, pyo3::ffi::c_str!("_state").as_ptr()); - - if state_fn.is_null() { - return Err(pyo3::PyErr::take(py).unwrap_unchecked()); - } - - let empty_args = pyo3::ffi::PyTuple_New(0); - let result = pyo3::ffi::PyObject_Call(state_fn, empty_args, std::ptr::null_mut()); - pyo3::ffi::Py_XDECREF(empty_args); - pyo3::ffi::Py_XDECREF(state_fn); - - Ok(result) - } - } - } - - let result = inner(py, ptr)?; - - if result.is_null() { - return Err(pyo3::PyErr::take(py).unwrap_unchecked()); - } - - let c = pyo3::ffi::PyLong_AsSize_t(result); - pyo3::ffi::Py_XDECREF(result); - - Ok(c) -} - -pub struct _KeepForIter { - pub ptr: core::ptr::NonNull, - pub state: usize, - pub len: usize, - - phantom: core::marker::PhantomData, -} - -impl _KeepForIter { - pub fn new(ptr: *mut pyo3::ffi::PyObject, state: usize, len: usize) -> Self { - unsafe { - pyo3::ffi::Py_INCREF(ptr); - } - - Self { - #[cfg(debug_assertions)] - ptr: core::ptr::NonNull::new(ptr).unwrap(), - #[cfg(not(debug_assertions))] - ptr: unsafe { core::ptr::NonNull::new(ptr).unwrap_unchecked() }, - state, - len, - phantom: core::marker::PhantomData, - } - } - - pub fn status(&self, py: pyo3::Python<'_>) -> pyo3::PyResult<()> { - let state = unsafe { _get_state(py, self.ptr.as_ptr())? }; - if state != self.state { - return Err(err!( - pyo3::exceptions::PyRuntimeError, - "cache changed size during iteration" - )); - } - - Ok(()) - } -} - -impl Drop for _KeepForIter { - fn drop(&mut self) { - unsafe { - pyo3::ffi::Py_DECREF(self.ptr.as_ptr()); - } - } -} - -unsafe impl Send for _KeepForIter {} -unsafe impl Sync for _KeepForIter {} - -pub struct NoLifetimeSliceIter { - pub slice: *const T, - pub index: usize, - pub len: usize, -} - -impl Iterator for NoLifetimeSliceIter { - type Item = *const T; - - fn next(&mut self) -> Option { - if self.index == self.len { - None - } else { - let value = unsafe { self.slice.add(self.index) }; - self.index += 1; - Some(value) - } - } -} diff --git a/tests/mixin.py b/tests/mixin.py deleted file mode 100644 index 3e5a80f..0000000 --- a/tests/mixin.py +++ /dev/null @@ -1,455 +0,0 @@ -from cachebox import BaseCacheImpl, LRUCache, LFUCache -import dataclasses -import pytest -import typing -import sys - - -@dataclasses.dataclass -class EQ: - def __init__(self, val: int) -> None: - self.val = val - - def __eq__(self, other: "EQ") -> bool: - return self.val == other.val - - def __hash__(self) -> int: - return self.val - - -@dataclasses.dataclass -class NoEQ: - def __init__(self, val: int) -> None: - self.val = val - - def __hash__(self) -> int: - return self.val - - -def getsizeof(obj, use_sys=True): - try: - if use_sys: - return sys.getsizeof(obj) - else: - return obj.__sizeof__() - except TypeError: # PyPy doesn't implement getsizeof or __sizeof__ - return len(obj) - - -class _TestMixin: - CACHE: typing.Type[BaseCacheImpl] - - KWARGS: dict = {} - NO_POLICY: bool = False - ITERATOR_CLASS: typing.Optional[type] = None - - def test__new__(self): - cache = self.CACHE(10, **self.KWARGS, capacity=8) - assert cache.maxsize == 10 - assert 20 > cache.capacity() >= 8, "capacity: {}".format(cache.capacity()) - - cache = self.CACHE(20, **self.KWARGS, capacity=0) - assert cache.maxsize == 20 - assert 2 >= cache.capacity() >= 0 # This is depends on platform - - cache = self.CACHE(20, **self.KWARGS, capacity=100) - assert cache.maxsize == 20 - assert 30 > cache.capacity() >= 20 - - cache = self.CACHE(0, **self.KWARGS, capacity=8) - assert cache.maxsize == sys.maxsize - assert 20 > cache.capacity() >= 8 - - cache = self.CACHE(0, **self.KWARGS, capacity=0) - assert cache.maxsize == sys.maxsize - assert 2 >= cache.capacity() >= 0 # This is depends on platform - - def test_overflow(self): - if not self.NO_POLICY: - return - - cache = self.CACHE(10, **self.KWARGS, capacity=10) - - for i in range(10): - cache[i] = i - - with pytest.raises(OverflowError): - cache["new-key"] = "new-value" - - def test___len__(self): - cache = self.CACHE(10, **self.KWARGS, capacity=10) - - assert len(cache) == 0 - assert cache.is_empty() - - cache[0] = 0 - assert len(cache) == 1 - - cache[1] = 1 - cache[2] = 2 - cache[3] = 3 - assert len(cache) == 4 - - cache[0] = 10 - cache[1] = 5 - assert len(cache) == 4 - - for i in range(1000, 1000 + (10 - len(cache))): - cache[i] = i - - assert len(cache) == 10 - assert cache.is_full() - - def test___sizeof__(self): - cache = self.CACHE(10, **self.KWARGS, capacity=10) - - # all classes have to implement __sizeof__ - # __sizeof__ returns exactly allocated memory size by cache - # but sys.getsizeof add also garbage collector overhead to that, so sometimes - # sys.getsizeof is greater than __sizeof__ - getsizeof(cache, False) - - def test___bool__(self): - cache = self.CACHE(1, **self.KWARGS, capacity=1) - - if cache: - pytest.fail("bool(cache) returns invalid response") - - cache[1] = 1 - if not cache: - pytest.fail("not bool(cache) returns invalid response") - - def test___contains__(self): - cache = self.CACHE(1, **self.KWARGS, capacity=1) - - assert 1 not in cache - cache[1] = 1 - assert 1 in cache - - def test___setitem__(self): - cache = self.CACHE(10, **self.KWARGS, capacity=10) - - with pytest.raises(KeyError): - cache[1] - - cache[1] = 1 - cache[1] - cache[0] = 0 - cache[0] - cache[2] = 2 - cache[3] = 3 - - with pytest.raises(KeyError): - cache[4] - - del cache[1] - del cache[2] - del cache[3] - - cache[0] - - with pytest.raises(KeyError): - cache[2] - - def test___repr__(self): - cache = self.CACHE(2, **self.KWARGS, capacity=2) - assert str(cache) == repr(cache) - assert repr(cache).startswith(self.CACHE.__name__) - - def test_insert(self): - cache = self.CACHE(5, **self.KWARGS, capacity=5) - - assert cache.insert(1, 1) is None - assert cache.insert(1, 1) == 1 - assert cache.insert(1, 10) == 1 - assert cache.insert(1, 2) == 10 - - cache[5] = 5 - - assert cache.insert(5, "value") == 5 - assert cache.insert(5, 5) == "value" - - del cache[5] - - assert cache.insert(5, 5) is None - - def test_get(self): - cache = self.CACHE(5, **self.KWARGS, capacity=5) - - for i in range(5): - cache[i] = i - - assert cache.get(0, None) == 0 - assert cache.get(1, None) == 1 - assert cache.get("no-exists") is None - assert cache.get("no-exists", None) is None - assert cache.get("no-exists", 111) == 111 - - def test_pop(self): - cache = self.CACHE(5, **self.KWARGS, capacity=5) - - for i in range(5): - cache[i] = i * 2 - - assert cache.pop(1, None) == 2 - assert cache.get(1, None) is None - assert cache.pop(2, None) == 4 - assert cache.get(2, None) is None - - assert cache.pop(10, None) is None - assert cache.pop(10, 2) == 2 - - def test_setdefault(self): - obj = self.CACHE(2, **self.KWARGS, capacity=2) - - obj.setdefault("name", "nick") - obj["age"] = 18 - assert 18 == obj.setdefault("age", 1000) - assert 18 == obj["age"] - assert "nick" == obj["name"] - - if self.NO_POLICY: - with pytest.raises(OverflowError): - obj.setdefault("newkey", 0) - - def test_clear(self): - obj = self.CACHE(2, **self.KWARGS, capacity=2) - - obj[1] = 1 - obj[2] = 2 - assert 2 == len(obj) - - cap = getsizeof(obj, False) - obj.clear(reuse=True) - assert 0 == len(obj) - try: - assert getsizeof(obj, False) >= cap - except AssertionError as e: - if not isinstance(obj, (LRUCache, LFUCache)): - raise e - - obj[1] = 1 - obj[2] = 2 - assert 2 == len(obj) - - cap = getsizeof(obj, False) - obj.clear(reuse=False) - assert 0 == len(obj) - # this is not stable and - # may increases the capacity! - try: - assert cap != getsizeof(obj, False) - except AssertionError as e: - if not isinstance(obj, (LRUCache, LFUCache)): - raise e - - def test_update(self): - obj = self.CACHE(2, **self.KWARGS, capacity=2) - - obj.update({1: 1, 2: 2}) - assert 2 == len(obj) - assert 1 == obj[1] - assert 2 == obj[2] - - obj.update({1: 1, 2: 2}) - assert 2 == len(obj) - assert 1 == obj[1] - assert 2 == obj[2] - - obj.update([(1, "a"), (2, "b")]) - assert 2 == len(obj) - assert "a" == obj[1] - assert "b" == obj[2] - - if self.NO_POLICY: - with pytest.raises(OverflowError): - obj.update([(3, "a"), (4, "b")]) - else: - obj.update([(3, "a"), (4, "b")]) - - kw = self.KWARGS.copy() - kw["iterable"] = {1: 1, 2: 2} - obj = self.CACHE(2, **kw, capacity=2) - assert 2 == len(obj) - assert 1 == obj[1] - assert 2 == obj[2] - - kw["iterable"] = [(1, "a"), (2, "b")] - obj = self.CACHE(2, **kw, capacity=2) - assert 2 == len(obj) - assert "a" == obj[1] - assert "b" == obj[2] - - def test_eq_implemetation(self): - # see https://github.com/awolverp/cachebox/issues/5 - - size = 1000 - cache = self.CACHE(size, **self.KWARGS, capacity=size) - - for i in range(size): - cache.insert(NoEQ(val=i), i) - cache.get(NoEQ(val=i)) - - cache = self.CACHE(size, **self.KWARGS, capacity=size) - - for i in range(size): - cache.insert(EQ(val=i), i) - cache.get(EQ(val=i)) - - def test_iterators(self): - obj = self.CACHE(100, **self.KWARGS, capacity=100) - - if self.ITERATOR_CLASS: - assert isinstance(iter(obj), self.ITERATOR_CLASS) - - for i in range(6): - obj[i] = i * 2 - - k = list(range(6)) - v = list(i * 2 for i in range(6)) - assert k == sorted(obj.keys()) - assert v == sorted(obj.values()) - assert list(zip(k, v)) == sorted(obj.items()) - - with pytest.raises(RuntimeError): - for i in obj: - del obj[i] - - for i in range(100): - obj[i] = i * 2 - - for i in range(50): - del obj[i] - - p = iter(obj) - next(p) - - obj.shrink_to_fit() - - with pytest.raises(RuntimeError): - next(p) - - obj = self.CACHE(0, **self.KWARGS) - obj.update({i: i for i in range(20)}) - - for key, value in obj.items(): - assert obj[key] == value - - for key, value in obj.items(): - obj[key] = value * 2 - - with pytest.raises(RuntimeError): - for key, value in obj.items(): - obj[str(key)] = value - - def test___eq__(self): - cache = self.CACHE(100, **self.KWARGS, capacity=100) - - with pytest.raises(NotImplementedError): - cache > cache - - with pytest.raises(NotImplementedError): - cache < cache - - with pytest.raises(NotImplementedError): - cache >= cache - - with pytest.raises(NotImplementedError): - cache <= cache - - assert cache == cache - assert not cache != cache - - for i in range(90): - cache[i] = i - - assert cache == cache - assert not cache != cache - - c2 = self.CACHE(100, **self.KWARGS, capacity=100) - for i in range(90): - c2[i] = i - - assert cache == c2 - assert not c2 != cache - - c2 = self.CACHE(1000, **self.KWARGS, capacity=100) - for i in range(90): - c2[i] = i - - assert not cache == c2 - assert c2 != cache - - def test_generic(self): - obj: self.CACHE[int, int] = self.CACHE(maxsize=0, **self.KWARGS) - _ = obj - - def _test_pickle(self, check_order: typing.Callable): - import pickle - import tempfile - - c1 = self.CACHE(maxsize=0, **self.KWARGS) - c2 = pickle.loads(pickle.dumps(c1)) - assert c1 == c2 - assert c1.capacity() == c2.capacity() - - c1 = self.CACHE(maxsize=100, **self.KWARGS) - c1.update({i: i for i in range(10)}) - - for _ in range(10): - c1[0] - for _ in range(9): - c1[1] - for _ in range(8): - c1[2] - for _ in range(7): - c1[3] - for _ in range(6): - c1[4] - for _ in range(5): - c1[5] - for _ in range(4): - c1[6] - for _ in range(3): - c1[7] - for _ in range(2): - c1[8] - for _ in range(1): - c1[9] - - c2 = pickle.loads(pickle.dumps(c1)) - assert c1 == c2 - assert c1.capacity() == c2.capacity() - check_order(c1, c2) - - with tempfile.TemporaryFile("w+b") as fd: - c1 = self.CACHE(maxsize=100, **self.KWARGS) - c1.update({i: i for i in range(10)}) - - for _ in range(10): - c1[1] - for _ in range(9): - c1[2] - for _ in range(8): - c1[0] - for _ in range(7): - c1[3] - for _ in range(6): - c1[5] - for _ in range(5): - c1[4] - for _ in range(4): - c1[6] - for _ in range(3): - c1[7] - for _ in range(2): - c1[9] - for _ in range(1): - c1[8] - - pickle.dump(c1, fd) - fd.seek(0) - c2 = pickle.load(fd) - assert c1 == c2 - assert c1.capacity() == c2.capacity() - check_order(c1, c2) diff --git a/tests/test_caches.py b/tests/test_caches.py deleted file mode 100644 index f374c8a..0000000 --- a/tests/test_caches.py +++ /dev/null @@ -1,573 +0,0 @@ -from cachebox import ( - BaseCacheImpl, - Cache, - FIFOCache, - RRCache, - TTLCache, - LRUCache, - LFUCache, - VTTLCache, - cache_iterator, - fifocache_iterator, - ttlcache_iterator, - lrucache_iterator, - lfucache_iterator, -) - -import pytest -import time - -from .mixin import _TestMixin - - -def test___new__(): - with pytest.raises(NotImplementedError): - BaseCacheImpl() - - -def test_subclass(): - class _TestSubclass(BaseCacheImpl): - def __init__(self) -> None: - self.a = 1 - - def inc(self, x: int): - self.a += x - - t = _TestSubclass() - t.inc(10) - assert t.a == 11 - - -class TestCache(_TestMixin): - CACHE = Cache - NO_POLICY = True - ITERATOR_CLASS = cache_iterator - - def test_pickle(self): - self._test_pickle(lambda c1, c2: None) - - -class TestFIFOCache(_TestMixin): - CACHE = FIFOCache - ITERATOR_CLASS = fifocache_iterator - - def test_policy(self): - cache = FIFOCache(5) - - cache[0] = 0 - cache[1] = 1 - cache[2] = 2 - - assert cache[0] == 0 - assert cache[1] == 1 - - assert cache.popitem() == (0, 0) - - cache[3] = 3 - - assert cache.popitem() == (1, 1) - assert cache.popitem() == (2, 2) - assert cache.popitem() == (3, 3) - - with pytest.raises(KeyError): - cache.popitem() - - for i in range(5): - cache[i] = i - - for i in range(5): - assert i in cache - - cache[10] = 10 - - assert 0 not in cache - assert 10 in cache - - assert cache.popitem() == (1, 1) - - del cache[2] - del cache[3] - del cache[4] - - assert cache.popitem() == (10, 10) - - def test_ordered_iterators(self): - obj = self.CACHE(100, **self.KWARGS, capacity=100) - - for i in range(6): - obj[i] = i * 2 - - k = list(range(6)) - v = list(i * 2 for i in range(6)) - assert k == list(obj.keys()) - assert v == list(obj.values()) - assert list(zip(k, v)) == list(obj.items()) - - def test_pickle(self): - def inner(c1, c2): - assert list(c1.items()) == list(c2.items()) - - self._test_pickle(inner) - - def test_first_last(self): - obj = self.CACHE(5, **self.KWARGS, capacity=5) - - for i in range(5): - obj[i] = i * 2 - - assert obj.first() == 0 - assert obj.last() == 4 - - obj[10] = 20 - - assert obj.first() == 1 - assert obj.last() == 10 - - -class TestRRCache(_TestMixin): - CACHE = RRCache - ITERATOR_CLASS = cache_iterator - - def test_pickle(self): - self._test_pickle(lambda c1, c2: None) - - -class TestTTLCache(_TestMixin): - CACHE = TTLCache - KWARGS = {"ttl": 10} - ITERATOR_CLASS = ttlcache_iterator - - def test_policy(self): - obj = self.CACHE(2, 0.5) - assert obj.ttl == 0.5 - - obj.insert(0, 1) - time.sleep(0.8) - - with pytest.raises(KeyError): - obj[0] - - obj = self.CACHE(2, 20) - - obj.insert(0, 0) - obj.insert(1, 1) - obj.insert(2, 2) - - assert 0 not in obj - assert (1, 1) == obj.popitem() - - def test_update_with_ttl(self): - obj = self.CACHE(2, 0.5) - - # obj.update({1: 1, 2: 2, 3: 3}) - obj.update((i + 1, i + 1) for i in range(3)) - - with pytest.raises(KeyError): - obj[1] - - time.sleep(0.8) - - with pytest.raises(KeyError): - obj[2] - - with pytest.raises(KeyError): - obj[3] - - def test_policy_ttl_no_care(self): - cache = TTLCache(5, 10) - - cache[0] = 0 - cache[1] = 1 - cache[2] = 2 - - assert cache[0] == 0 - assert cache[1] == 1 - - assert cache.popitem() == (0, 0) - - cache[3] = 3 - - assert cache.popitem() == (1, 1) - assert cache.popitem() == (2, 2) - assert cache.popitem() == (3, 3) - - with pytest.raises(KeyError): - cache.popitem() - - for i in range(5): - cache[i] = i - - for i in range(5): - assert i in cache - - cache[10] = 10 - - assert 0 not in cache - assert 10 in cache - - assert cache.popitem() == (1, 1) - - del cache[2] - del cache[3] - del cache[4] - - assert cache.popitem() == (10, 10) - - def test_pickle(self): - def inner(c1, c2): - assert list(c1.items()) == list(c2.items()) - - self._test_pickle(inner) - - def test_first_last(self): - obj = self.CACHE(5, **self.KWARGS, capacity=5) - - for i in range(5): - obj[i] = i * 2 - - assert obj.first() == 0 - assert obj.last() == 4 - - obj[10] = 20 - - assert obj.first() == 1 - assert obj.last() == 10 - - def test_get_with_expire(self): - obj = TTLCache(2, 10) - - obj.insert(1, 1) - time.sleep(0.1) - value, dur = obj.get_with_expire(1) - assert 1 == value - assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur - - value, dur = obj.get_with_expire("no-exists") - assert value is None - assert 0 == dur - - value, dur = obj.get_with_expire("no-exists", "value") - assert "value" == value - assert 0 == dur - - def test_pop_with_expire(self): - obj = TTLCache(2, 10) - - obj.insert(1, 1) - time.sleep(0.1) - value, dur = obj.pop_with_expire(1) - assert 1 == value - assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur - - value, dur = obj.pop_with_expire("no-exists") - assert value is None - assert 0 == dur - - value, dur = obj.pop_with_expire("no-exists", "value") - assert "value" == value - assert 0 == dur - - def test_popitem_with_expire(self): - obj = TTLCache(2, 10) - - obj.insert(1, 1) - obj.insert(2, 2) - time.sleep(0.1) - key, value, dur = obj.popitem_with_expire() - assert (1, 1) == (key, value) - assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur - - key, value, dur = obj.popitem_with_expire() - assert (2, 2) == (key, value) - assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur - - with pytest.raises(KeyError): - obj.popitem_with_expire() - - -class TestLRUCache(_TestMixin): - CACHE = LRUCache - ITERATOR_CLASS = lrucache_iterator - - def test_policy(self): - obj = self.CACHE(3) - - obj[1] = 1 - obj[2] = 2 - obj[3] = 3 - - assert (1, 1) == obj.popitem() - - obj[1] = 1 - obj[2] - - assert (3, 3) == obj.popitem() - - obj[4] = 4 - assert 1 == obj.get(1) - - obj[5] = 5 - assert 2 not in obj - - def test_ordered_iterators(self): - obj = self.CACHE(20, **self.KWARGS, capacity=20) - - for i in range(6): - obj[i] = i * 2 - - obj[1] - obj[5] - obj[3] = 7 - - k = [0, 2, 4, 1, 5, 3] - v = [0, 4, 8, 2, 10, 7] - assert k == list(obj.keys()) - assert v == list(obj.values()) - assert list(zip(k, v)) == list(obj.items()) - - def test_recently_used_funcs(self): - obj = LRUCache(10) - - for i in range(6): - obj[i] = i * 2 - - obj[1] - obj[5] - obj[3] = 7 - obj.peek(4) - - assert obj.most_recently_used() == 3 - assert obj.least_recently_used() == 0 - assert obj.least_recently_used(1) == 2 - assert obj.least_recently_used(5) == 3 - assert obj.least_recently_used(6) is None - - def test_pickle(self): - def inner(c1, c2): - assert list(c1.items()) == list(c2.items()) - - self._test_pickle(inner) - - -class TestLFUCache(_TestMixin): - CACHE = LFUCache - ITERATOR_CLASS = lfucache_iterator - - def test_policy(self): - obj = self.CACHE(5, {i: i for i in range(5)}) - - for i in range(5): - obj[i] = i - - for i in range(10): - assert 0 == obj[0] - for i in range(7): - assert 1 == obj[1] - for i in range(3): - assert 2 == obj[2] - for i in range(4): - assert 3 == obj[3] - for i in range(6): - assert 4 == obj[4] - - assert (2, 2) == obj.popitem() - assert (3, 3) == obj.popitem() - - for i in range(10): - assert 4 == obj.get(4) - - assert (1, 1) == obj.popitem() - - assert 2 == len(obj) - obj.clear() - - for i in range(5): - obj[i] = i - - assert [0, 1, 2, 3, 4] == list(obj.keys()) - - for i in range(10): - obj[0] += 1 - for i in range(7): - obj[1] += 1 - for i in range(3): - obj[2] += 1 - for i in range(4): - obj[3] += 1 - for i in range(6): - obj[4] += 1 - - obj[5] = 4 - assert [5, 3, 4, 1, 0] == list(obj.keys()) - - def test_least_frequently_used(self): - obj = LFUCache(10) - - for i in range(5): - obj[i] = i * 2 - - for i in range(10): - obj[0] += 1 - for i in range(7): - obj[1] += 1 - for i in range(3): - obj[2] += 1 - for i in range(4): - obj[3] += 1 - for i in range(6): - obj[4] += 1 - - assert obj.least_frequently_used() == 2 - assert obj.least_frequently_used(1) == 3 - assert obj.least_frequently_used(4) == 0 - assert obj.least_frequently_used(5) is None - - def test_pickle(self): - def inner(c1, c2): - assert list(c1.items()) == list(c2.items()) - - self._test_pickle(inner) - - -class TestVTTLCache(_TestMixin): - CACHE = VTTLCache - - def test_policy(self): - obj = VTTLCache(2) - - obj.insert(0, 1, 0.5) - time.sleep(0.501) - - with pytest.raises(KeyError): - obj[0] - - obj.insert("name", "nick", 0.3) - obj.insert("age", 18, None) - time.sleep(0.301) - - with pytest.raises(KeyError): - obj["name"] - - del obj["age"] - - obj.insert(0, 0, 70) - obj.insert(1, 1, 60) - obj.insert(2, 2, 90) - - assert 1 not in obj - assert (0, 0) == obj.popitem() - - def test_update_with_ttl(self): - obj = VTTLCache(3) - - obj.update({1: 1, 2: 2, 3: 3}, 0.5) - time.sleep(0.501) - - with pytest.raises(KeyError): - obj[1] - - with pytest.raises(KeyError): - obj[2] - - with pytest.raises(KeyError): - obj[3] - - def test_get_with_expire(self): - obj = VTTLCache(2) - - obj.insert(1, 1, 10) - time.sleep(0.1) - value, dur = obj.get_with_expire(1) - assert 1 == value - assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur - - value, dur = obj.get_with_expire("no-exists") - assert value is None - assert 0 == dur - - value, dur = obj.get_with_expire("no-exists", "value") - assert "value" == value - assert 0 == dur - - def test_pop_with_expire(self): - obj = VTTLCache(2) - - obj.insert(1, 1, 10) - time.sleep(0.1) - value, dur = obj.pop_with_expire(1) - assert 1 == value - assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur - - value, dur = obj.pop_with_expire("no-exists") - assert value is None - assert 0 == dur - - value, dur = obj.pop_with_expire("no-exists", "value") - assert "value" == value - assert 0 == dur - - def test_popitem_with_expire(self): - obj = VTTLCache(2) - - obj.insert(1, 1, 10) - obj.insert(2, 2, 6) - time.sleep(0.1) - key, value, dur = obj.popitem_with_expire() - assert (2, 2) == (key, value) - assert 6 > dur > 5, "6 > dur > 5 failed [dur: %f]" % dur - - key, value, dur = obj.popitem_with_expire() - assert (1, 1) == (key, value) - assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur - - with pytest.raises(KeyError): - obj.popitem_with_expire() - - def test_pickle(self): - def inner(c1, c2): - assert list(c1.items()) == list(c2.items()) - - import pickle - import tempfile - - c1 = self.CACHE(maxsize=0, **self.KWARGS) - c2 = pickle.loads(pickle.dumps(c1)) - assert c1 == c2 - assert c1.capacity() == c2.capacity() - - c1 = self.CACHE(maxsize=100, **self.KWARGS) - - for i in range(10): - c1.insert(i, i * 2, i + 2) - - c2 = pickle.loads(pickle.dumps(c1)) - assert c1 == c2 - assert c1.capacity() == c2.capacity() - inner(c1, c2) - - with tempfile.TemporaryFile("w+b") as fd: - c1 = self.CACHE(maxsize=100, **self.KWARGS) - c1.update({i: i for i in range(10)}) - - for i in range(10): - c1.insert(i, i * 2, i + 2) - - pickle.dump(c1, fd) - fd.seek(0) - c2 = pickle.load(fd) - assert c1 == c2 - assert c1.capacity() == c2.capacity() - inner(c1, c2) - - c1 = self.CACHE(maxsize=100, **self.KWARGS) - - for i in range(10): - c1.insert(i, i * 2, i + 0.5) - - time.sleep(0.51) - - c2 = pickle.loads(pickle.dumps(c1)) - - assert len(c2) == len(c1) - assert c1.capacity() == c2.capacity() - inner(c1, c2) diff --git a/tests/test_concurrency.py b/tests/test_concurrency.py deleted file mode 100644 index 2935ee1..0000000 --- a/tests/test_concurrency.py +++ /dev/null @@ -1,108 +0,0 @@ -from cachebox import cached, LRUCache -from concurrent import futures -import asyncio -import pytest -import time - - -def test_threading_return(): - calls = 0 - - @cached(LRUCache(0)) - def func(): - nonlocal calls - time.sleep(1) - calls += 1 - return "Hello" - - with futures.ThreadPoolExecutor(max_workers=10) as executor: - future_list = [executor.submit(func) for _ in range(10)] - for future in futures.as_completed(future_list): - assert future.result() == "Hello" - - assert calls == 1 - - -def test_threading_exc(): - calls = 0 - - @cached(LRUCache(0)) - def func(): - nonlocal calls - time.sleep(1) - calls += 1 - raise RuntimeError - - with futures.ThreadPoolExecutor(max_workers=5) as executor: - future_list = [executor.submit(func) for _ in range(5)] - for future in futures.as_completed(future_list): - assert isinstance(future.exception(), RuntimeError) - - assert calls == 1 - - with futures.ThreadPoolExecutor(max_workers=5) as executor: - future_list = [executor.submit(func) for _ in range(5)] - for future in futures.as_completed(future_list): - assert isinstance(future.exception(), RuntimeError) - - assert calls == 2 - - -@pytest.mark.asyncio -async def test_asyncio_return(): - calls = 0 - - @cached(LRUCache(0)) - async def func(): - nonlocal calls - await asyncio.sleep(1) - calls += 1 - return "Hello" - - await asyncio.gather( - func(), - func(), - func(), - func(), - func(), - ) - - assert calls == 1 - - -@pytest.mark.asyncio -async def test_asyncio_exc(): - calls = 0 - - @cached(LRUCache(0)) - async def func(): - nonlocal calls - await asyncio.sleep(1) - calls += 1 - raise RuntimeError - - tasks = await asyncio.gather( - func(), - func(), - func(), - func(), - func(), - return_exceptions=True, - ) - for future in tasks: - assert isinstance(future, RuntimeError) - - assert calls == 1 - - tasks = await asyncio.gather( - func(), - func(), - func(), - func(), - func(), - return_exceptions=True, - ) - for future in tasks: - assert isinstance(future, RuntimeError) - - assert calls == 2 diff --git a/tests/test_utils.py b/tests/test_utils.py deleted file mode 100644 index a6ba7aa..0000000 --- a/tests/test_utils.py +++ /dev/null @@ -1,301 +0,0 @@ -from cachebox import ( - Frozen, - LRUCache, - cached, - make_typed_key, - make_key, - cachedmethod, - EVENT_HIT, - EVENT_MISS, - is_cached, -) -import asyncio -import pytest -import time - - -def test_frozen(): - cache = LRUCache(10, {i: i for i in range(8)}) - f = Frozen(cache) - - assert f.maxsize == cache.maxsize - - with pytest.raises(TypeError): - f[0] = 0 - - with pytest.raises(TypeError): - f.pop(0) - - with pytest.raises(TypeError): - f.popitem() - - assert len(f) == 8 - assert len(f) == len(cache) - cache.insert(9, 9) - assert len(f) == 9 - assert len(f) == len(cache) - - -def test_cached(): - obj = LRUCache(3) # type: LRUCache[int, int] - - @cached(obj) - def factorial(n): - fact = 1 - for num in range(2, n + 1): - fact *= num - - time.sleep(0.1) # need for testing - return fact - - perf_1 = time.perf_counter() - factorial(15) - perf_1 = time.perf_counter() - perf_1 - - assert factorial.cache_info().length == 1 - assert factorial.cache_info().misses == 1 - - perf_2 = time.perf_counter() - factorial(15) - perf_2 = time.perf_counter() - perf_2 - - assert perf_1 > perf_2 - assert factorial.cache_info().hits == 1 - - factorial.cache_clear() - assert factorial.cache_info().hits == 0 - assert factorial.cache_info().misses == 0 - - perf_3 = time.perf_counter() - factorial(15) - perf_3 = time.perf_counter() - perf_3 - assert perf_3 > perf_2 - - # test cachebox__ignore - factorial.cache_clear() - assert len(factorial.cache) == 0 - factorial(15, cachebox__ignore=True) - assert len(factorial.cache) == 0 - - -def test_key_makers(): - @cached(LRUCache(125), key_maker=make_key) - def func(a, b, c): - return a, b, c - - func(1, 2, 3) - func(1.0, 2, 3.0) - func(3, 2, 1) - - assert len(func.cache) == 2 - - @cached(LRUCache(125), key_maker=make_typed_key) - def func(a, b, c): - return a, b, c - - func(1, 2, 3) - func(1.0, 2, 3.0) - func(3, 2, 1) - - assert len(func.cache) == 3 - - -@pytest.mark.asyncio -async def test_async_cached(): - obj = LRUCache(3) # type: LRUCache[int, int] - - @cached(obj) - async def factorial(n: int, _: str): - fact = 1 - for num in range(2, n + 1): - fact *= num - - await asyncio.sleep(0.1) # need for testing - return fact - - perf_1 = time.perf_counter() - await factorial(15, "cachebox") - perf_1 = time.perf_counter() - perf_1 - - assert factorial.cache_info().length == 1 - assert factorial.cache_info().misses == 1 - - perf_2 = time.perf_counter() - await factorial(15, "cachebox") - perf_2 = time.perf_counter() - perf_2 - - assert perf_1 > perf_2 - assert factorial.cache_info().hits == 1 - - factorial.cache_clear() - assert factorial.cache_info().hits == 0 - assert factorial.cache_info().misses == 0 - - perf_3 = time.perf_counter() - await factorial(15, "cachebox") - perf_3 = time.perf_counter() - perf_3 - assert perf_3 > perf_2 - - # test cachebox__ignore - factorial.cache_clear() - assert len(factorial.cache) == 0 - await factorial(15, "me", cachebox__ignore=True) - assert len(factorial.cache) == 0 - - -def test_cachedmethod(): - class TestCachedMethod: - def __init__(self, num) -> None: - self.num = num - - @cachedmethod(None) - def method(self, char: str): - assert type(self) is TestCachedMethod - return char * self.num - - cls = TestCachedMethod(10) - assert cls.method("a") == ("a" * 10) - - -@pytest.mark.asyncio -async def test_async_cachedmethod(): - class TestCachedMethod: - def __init__(self, num) -> None: - self.num = num - - @cachedmethod(LRUCache(0)) - async def method(self, char: str): - assert type(self) is TestCachedMethod - return char * self.num - - cls = TestCachedMethod(10) - assert (await cls.method("a")) == ("a" * 10) - - -def test_callback(): - obj = LRUCache(3) - - called = list() - - @cached( - obj, - key_maker=lambda args, _: args[0], - callback=lambda event, key, value: called.append((event, key, value)), - ) - def factorial(n: int, /): - fact = 1 - for num in range(2, n + 1): - fact *= num - - return fact - - assert factorial(5) == 120 - assert len(called) == 1 - assert called[0] == (EVENT_MISS, 5, 120) - - assert factorial(5) == 120 - assert len(called) == 2 - assert called[1] == (EVENT_HIT, 5, 120) - - assert factorial(3) == 6 - assert len(called) == 3 - assert called[2] == (EVENT_MISS, 3, 6) - - assert is_cached(factorial) - - -async def _test_async_callback(): - obj = LRUCache(3) - - called = list() - - async def _callback(event, key, value): - called.append((event, key, value)) - - @cached(obj, key_maker=lambda args, _: args[0], callback=_callback) - async def factorial(n: int, /): - fact = 1 - for num in range(2, n + 1): - fact *= num - - return fact - - assert await factorial(5) == 120 - assert len(called) == 1 - assert called[0] == (EVENT_MISS, 5, 120) - - assert await factorial(5) == 120 - assert len(called) == 2 - assert called[1] == (EVENT_HIT, 5, 120) - - assert await factorial(3) == 6 - assert len(called) == 3 - assert called[2] == (EVENT_MISS, 3, 6) - - assert is_cached(factorial) - assert not is_cached(_callback) - - -def test_async_callback(): - try: - loop = asyncio.get_running_loop() - except RuntimeError: - loop = asyncio.new_event_loop() - - loop.run_until_complete(_test_async_callback()) - - -def test_copy_level(): - class A: - def __init__(self, c: int) -> None: - self.c = c - - @cached(LRUCache(0)) - def func(c: int) -> A: - return A(c) - - result = func(1) - assert result.c == 1 - result.c = 2 - - result = func(1) - assert result.c == 2 # !!! - - @cached(LRUCache(0), copy_level=2) - def func(c: int) -> A: - return A(c) - - result = func(1) - assert result.c == 1 - result.c = 2 - - result = func(1) - assert result.c == 1 # :) - - -def test_classmethod(): - class MyClass: - def __init__(self, num: int) -> None: - self.num = num - - @classmethod - @cached(None, copy_level=2) - def new(cls, num: int): - return cls(num) - - a = MyClass.new(1) - assert isinstance(a, MyClass) and a.num == 1 - - -def test_staticmethod(): - class MyClass: - def __init__(self, num: int) -> None: - self.num = num - - @staticmethod - @cached(None, copy_level=2) - def new(num: int): - return num - - a = MyClass.new(1) - assert isinstance(a, int) and a == 1 From e3fb21cddfc645519351527cb7fe43cda6c186eb Mon Sep 17 00:00:00 2001 From: awolverp Date: Fri, 4 Apr 2025 16:07:03 +0330 Subject: [PATCH 03/37] - Rewrite the Cache class in Python - Optimize the `isize` to `u64` strategy - Now we don't ignore the errors white doing equal operations --- Cargo.lock | 24 +-- Cargo.toml | 8 +- python/cachebox/__init__.py | 4 + python/cachebox/_cachebox.py | 234 +++++++++++++++++++++++++ src/bridge/cache.rs | 319 +++++++++++++++++++++++++++++++++++ src/bridge/mod.rs | 1 + src/common.rs | 316 ++++++++++++++++++++++++++++++++++ src/lib.rs | 10 +- src/policies/mod.rs | 1 + src/policies/nopolicy.rs | 235 ++++++++++++++++++++++++++ 10 files changed, 1135 insertions(+), 17 deletions(-) create mode 100644 python/cachebox/_cachebox.py create mode 100644 src/bridge/cache.rs create mode 100644 src/bridge/mod.rs create mode 100644 src/common.rs create mode 100644 src/policies/mod.rs create mode 100644 src/policies/nopolicy.rs diff --git a/Cargo.lock b/Cargo.lock index f2632aa..344b18c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -40,9 +40,9 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "hashbrown" -version = "0.15.2" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" [[package]] name = "heck" @@ -127,9 +127,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.24.0" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f1c6c3591120564d64db2261bec5f910ae454f01def849b9c22835a84695e86" +checksum = "17da310086b068fbdcefbba30aeb3721d5bb9af8db4987d6735b2183ca567229" dependencies = [ "cfg-if", "indoc", @@ -145,9 +145,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.24.0" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9b6c2b34cf71427ea37c7001aefbaeb85886a074795e35f161f5aecc7620a7a" +checksum = "e27165889bd793000a098bb966adc4300c312497ea25cf7a690a9f0ac5aa5fc1" dependencies = [ "once_cell", "target-lexicon", @@ -155,9 +155,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.24.0" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5507651906a46432cdda02cd02dd0319f6064f1374c9147c45b978621d2c3a9c" +checksum = "05280526e1dbf6b420062f3ef228b78c0c54ba94e157f5cb724a609d0f2faabc" dependencies = [ "libc", "pyo3-build-config", @@ -165,9 +165,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.24.0" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0d394b5b4fd8d97d48336bb0dd2aebabad39f1d294edd6bcd2cccf2eefe6f42" +checksum = "5c3ce5686aa4d3f63359a5100c62a127c9f15e8398e5fdeb5deef1fed5cd5f44" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -177,9 +177,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.24.0" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd72da09cfa943b1080f621f024d2ef7e2773df7badd51aa30a2be1f8caa7c8e" +checksum = "f4cf6faa0cbfb0ed08e89beb8103ae9724eb4750e3a78084ba4017cbe94f3855" dependencies = [ "heck", "proc-macro2", diff --git a/Cargo.toml b/Cargo.toml index 5d52ee4..8caf6de 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,15 +22,15 @@ panic = "abort" strip = "symbols" [dependencies.hashbrown] -version = "0.15.2" +version = "0.14.5" default-features = false -features = ["inline-more"] +features = ["inline-more", "raw"] [dependencies.fastrand] version = "2.3.0" [dependencies.pyo3] -version = "0.24.0" +version = "0.24.1" default-features = false features = ["macros", "extension-module"] @@ -42,7 +42,7 @@ version = "0.12.3" default-features = false [build-dependencies.pyo3-build-config] -version = "0.24.0" +version = "0.24.1" features = ["resolve-config"] [lints.clippy] diff --git a/python/cachebox/__init__.py b/python/cachebox/__init__.py index e69de29..85d12aa 100644 --- a/python/cachebox/__init__.py +++ b/python/cachebox/__init__.py @@ -0,0 +1,4 @@ +from ._cachebox import ( + Cache as Cache, + BaseCacheImpl as BaseCacheImpl, +) diff --git a/python/cachebox/_cachebox.py b/python/cachebox/_cachebox.py new file mode 100644 index 0000000..f93e748 --- /dev/null +++ b/python/cachebox/_cachebox.py @@ -0,0 +1,234 @@ +from . import _core +import typing + + +KT = typing.TypeVar("KT") +VT = typing.TypeVar("VT") +DT = typing.TypeVar("DT") + +_sential = object() + + +def _items_to_str(items, length, max_len=50): + if length <= max_len: + return "{" + ", ".join(f"{k}: {v}" for k, v in items) + "}" + + c = 0 + left = [] + right = [] + + while c < length: + k, v = next(items) + + if c <= 20: + left.append(f"{k}: {v}") + + elif (length - c) <= 20: + right.append(f"{k}: {v}") + + c += 1 + + return "{" + ", ".join(left) + " ... truncated ... " + ", ".join(right) + "}" + + +class BaseCacheImpl(typing.Generic[KT, VT]): + """ + This is the base class of all cache classes such as Cache, FIFOCache, ... + """ + pass + + +class IteratorView(typing.Generic[VT]): + __slots__ = ("iterator", "func") + + def __init__(self, iterator, func: typing.Callable[[tuple], typing.Any]): + self.iterator = iterator + self.func = func + + def __iter__(self): + self.iterator = self.iterator.__iter__() + return self + + def __next__(self) -> VT: + return self.func(self.iterator.__next__()) + + +class Cache(BaseCacheImpl[KT, VT]): + """ + A simple cache that has no algorithm; this is only a hashmap. + + Cache vs dict: + + it is thread-safe and unordered, while dict isn't thread-safe and ordered (Python 3.6+). + it uses very lower memory than dict. + it supports useful and new methods for managing memory, while dict does not. + it does not support popitem, while dict does. + You can limit the size of Cache, but you cannot for dict. + """ + + def __init__( + self, + maxsize: int, + iterable: typing.Union["Cache", dict, tuple, typing.Generator, None] = None, + *, + capacity: int = 0, + ) -> None: + """ + A simple cache that has no algorithm; this is only a hashmap. + + :param maxsize: you can specify the limit size of the cache ( zero means infinity ); this is unchangable. + + :param iterable: you can create cache from a dict or an iterable. + + :param capacity: If `capacity` param is given, cache attempts to allocate a new hash table with at + least enough capacity for inserting the given number of elements without reallocating. + """ + self._raw = _core.Cache(maxsize, capacity=capacity) + + if iterable is not None: + self.update(iterable) + + @property + def maxsize(self) -> int: + return self._raw.maxsize() + + def capacity(self) -> int: + """Returns the number of elements the map can hold without reallocating.""" + return self._raw.capacity() + + def __len__(self) -> int: + return len(self._raw) + + def __sizeof__(self): + return self._raw.__sizeof__() + + def __contains__(self, key: KT) -> bool: + return key in self._raw + + def __bool__(self) -> bool: + return not self.is_empty() + + def is_empty(self) -> bool: + return self._raw.is_empty() + + def is_full(self) -> bool: + return self._raw.is_full() + + def insert(self, key: KT, value: VT) -> typing.Optional[VT]: + """ + Equals to `self[key] = value`, but returns a value: + + - If the cache did not have this key present, None is returned. + - If the cache did have this key present, the value is updated, + and the old value is returned. The key is not updated, though; + + Note: raises `OverflowError` if the cache reached the maxsize limit, + because this class does not have any algorithm. + """ + return self._raw.insert(key, value) + + def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + """Equals to `self[key]`, but returns `default` if the cache don't have this key present.""" + return self._raw.get(key, default) + + def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + """ + Removes specified key and return the corresponding value. If the key is not found, returns the `default`. + """ + return self._raw.pop(key, default) + + def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + """ + Inserts key with a value of default if key is not in the cache. Return the value for key if key is + in the cache, else `default`. + """ + return self._raw.setdefault(key, default) + + def popitem(self) -> typing.NoReturn: + raise NotImplementedError() + + def drain(self) -> typing.NoReturn: + raise NotImplementedError() + + def update(self, iterable: typing.Union["Cache", dict, tuple, typing.Generator]) -> None: + """ + Updates the cache with elements from a dictionary or an iterable object of key/value pairs. + + Note: raises `OverflowError` if the cache reached the maxsize limit. + """ + if hasattr(iterable, "items"): + iterable = iterable.items() + + self._raw.update(iterable) + + def __setitem__(self, key: KT, value: VT) -> None: + self.insert(key, value) + + def __getitem__(self, key: KT) -> VT: + val = self.get(key, _sential) + if val is _sential: + raise KeyError(key) + + return val + + def __delitem__(self, key: KT) -> None: + self._raw.remove(key) + + def __eq__(self, other) -> bool: + return self._raw == other + + def __ne__(self, other) -> bool: + return self._raw != other + + def shrink_to_fit(self) -> None: + """Shrinks the cache to fit len(self) elements.""" + self._raw.shrink_to_fit() + + def clear(self, *, reuse: bool = False) -> None: + """ + Removes all items from cache. + + If reuse is True, will not free the memory for reusing in the future. + """ + self._raw.clear(reuse) + + def items(self) -> IteratorView[typing.Tuple[KT, VT]]: + """ + Returns an iterable object of the cache's items (key-value pairs). + + Notes: + - You should not make any changes in cache while using this iterable object. + - Items are not ordered. + """ + return IteratorView(self._raw.items(), lambda x: x) + + def keys(self) -> IteratorView[KT]: + """ + Returns an iterable object of the cache's keys. + + Notes: + - You should not make any changes in cache while using this iterable object. + - Keys are not ordered. + """ + return IteratorView(self._raw.items(), lambda x: x[0]) + + def values(self) -> IteratorView[VT]: + """ + Returns an iterable object of the cache's values. + + Notes: + - You should not make any changes in cache while using this iterable object. + - Values are not ordered. + """ + return IteratorView(self._raw.items(), lambda x: x[1]) + + def __iter__(self) -> IteratorView[KT]: + return self.keys() + + def __repr__(self) -> str: + return "{}[{}/{}]({})".format( + type(self).__name__, + len(self._raw), + self._raw.maxsize(), + _items_to_str(self._raw.items(), len(self._raw)), + ) diff --git a/src/bridge/cache.rs b/src/bridge/cache.rs new file mode 100644 index 0000000..fb50fa0 --- /dev/null +++ b/src/bridge/cache.rs @@ -0,0 +1,319 @@ +//! implement Cache, a simple cache without any algorithms and policies + +use crate::common::{Entry, ObservedIterator, PreHashObject}; + +/// A simple cache that has no algorithm; this is only a hashmap. +/// +/// `Cache` vs `dict`: +/// - it is thread-safe and unordered, while `dict` isn't thread-safe and ordered (Python 3.6+). +/// - it uses very lower memory than `dict`. +/// - it supports useful and new methods for managing memory, while `dict` does not. +/// - it does not support `popitem`, while `dict` does. +/// - You can limit the size of [`Cache`], but you cannot for `dict`. +#[pyo3::pyclass(module = "cachebox._core", frozen)] +pub struct Cache { + raw: parking_lot::Mutex, +} + +#[pyo3::pymethods] +impl Cache { + #[new] + #[pyo3(signature=(maxsize, *, capacity=0))] + fn __new__(maxsize: usize, capacity: usize) -> pyo3::PyResult { + let raw = crate::policies::nopolicy::NoPolicy::new(maxsize, capacity)?; + + let self_ = Self { + raw: parking_lot::Mutex::new(raw), + }; + Ok(self_) + } + + fn _state(&self) -> usize { + self.raw.lock().observed.get() as usize + } + + fn maxsize(&self) -> usize { + self.raw.lock().maxsize() + } + + fn capacity(&self) -> usize { + self.raw.lock().capacity() + } + + fn __len__(&self) -> usize { + self.raw.lock().len() + } + + fn __sizeof__(&self) -> usize { + let lock = self.raw.lock(); + lock.capacity() + * (std::mem::size_of::() + std::mem::size_of::()) + } + + fn __contains__(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let lock = self.raw.lock(); + + match lock.lookup(py, &key)? { + Some(_) => Ok(true), + None => Ok(false), + } + } + + fn is_empty(&self) -> bool { + self.raw.lock().is_empty() + } + + fn is_full(&self) -> bool { + self.raw.lock().is_full() + } + + fn insert( + &self, + py: pyo3::Python<'_>, + key: pyo3::PyObject, + value: pyo3::PyObject, + ) -> pyo3::PyResult> { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.entry(py, &key)? { + Entry::Occupied(mut entry) => Ok(Some(entry.update(value)?)), + Entry::Absent(entry) => { + entry.insert(key, value)?; + Ok(None) + } + } + } + + fn get( + &self, + py: pyo3::Python<'_>, + key: pyo3::PyObject, + default: pyo3::PyObject, + ) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let lock = self.raw.lock(); + + match lock.lookup(py, &key)? { + Some(val) => Ok(val.clone_ref(py)), + None => Ok(default), + } + } + + fn remove(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult<()> { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.entry(py, &key)? { + Entry::Occupied(entry) => { + entry.remove(); + Ok(()) + } + Entry::Absent(_) => Err(pyo3::PyErr::new::(key.obj)), + } + } + + fn update( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python<'_>, + iterable: pyo3::PyObject, + ) -> pyo3::PyResult<()> { + if slf.as_ptr() == iterable.as_ptr() { + return Ok(()); + } + + let mut lock = slf.raw.lock(); + lock.extend(py, iterable) + } + + fn __richcmp__( + slf: pyo3::PyRef<'_, Self>, + other: pyo3::PyObject, + op: pyo3::class::basic::CompareOp, + ) -> pyo3::PyResult { + let other = match other.extract::>(slf.py()) { + Ok(o) => o, + Err(_) => return Ok(false), + }; + + match op { + pyo3::class::basic::CompareOp::Eq => { + if slf.as_ptr() == other.as_ptr() { + return Ok(true); + } + + let t1 = slf.raw.lock(); + let t2 = other.raw.lock(); + t1.equal(slf.py(), &*t2) + } + pyo3::class::basic::CompareOp::Ne => { + if slf.as_ptr() != other.as_ptr() { + return Ok(true); + } + + let t1 = slf.raw.lock(); + let t2 = other.raw.lock(); + t1.equal(slf.py(), &*t2).map(|r| !r) + } + _ => Err(pyo3::PyErr::new::( + "only '==' or '!=' are supported", + )), + } + } + + fn pop( + &self, + py: pyo3::Python<'_>, + key: pyo3::PyObject, + default: pyo3::PyObject, + ) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.entry(py, &key)? { + Entry::Occupied(entry) => { + let (_, value) = entry.remove(); + Ok(value) + } + Entry::Absent(_) => Ok(default), + } + } + + fn clear(&self, reuse: bool) { + let mut lock = self.raw.lock(); + lock.clear(); + + if !reuse { + lock.shrink_to_fit(); + } + } + + fn shrink_to_fit(&self) { + let mut lock = self.raw.lock(); + lock.shrink_to_fit(); + } + + fn setdefault( + &self, + py: pyo3::Python<'_>, + key: pyo3::PyObject, + default: pyo3::PyObject, + ) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.entry(py, &key)? { + Entry::Occupied(entry) => { + let (_, ref value) = entry.into_value(); + Ok(value.clone_ref(py)) + } + Entry::Absent(entry) => { + entry.insert(key, default.clone_ref(py))?; + Ok(default) + } + } + } + + fn items(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { + let lock = slf.raw.lock(); + let state = lock.observed.get(); + let iter = lock.iter(); + + let result = cache_items { + ptr: ObservedIterator::new(slf.as_ptr(), state), + iter: parking_lot::Mutex::new(iter), + }; + + pyo3::Py::new(slf.py(), result) + } + + fn __getnewargs__(&self) -> (usize,) { + (0,) + } + + fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { + let lock = self.raw.lock(); + unsafe { + let state = { + let mp = pyo3::ffi::PyDict_New(); + + if mp.is_null() { + return Err(pyo3::PyErr::fetch(py)); + } + + for bucket in lock.iter() { + let (key, val) = bucket.as_ref(); + // SAFETY: we don't need to check error because we sure about key that is hashable. + pyo3::ffi::PyDict_SetItem(mp, key.obj.as_ptr(), val.as_ptr()); + } + + let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); + let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity()); + + #[allow(unused_unsafe)] + tuple!( + py, + 3, + 0 => maxsize, + 1 => mp, + 2 => capacity, + )? + }; + Ok(pyo3::Py::from_owned_ptr(py, state)) + } + } + + pub fn __setstate__(&self, py: pyo3::Python<'_>, state: pyo3::PyObject) -> pyo3::PyResult<()> { + let mut lock = self.raw.lock(); + lock.from_pickle(py, state.as_ptr()) + } + + pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + for value in self.raw.lock().iter() { + let (key, value) = unsafe { value.as_ref() }; + visit.call(&key.obj)?; + visit.call(value)?; + } + Ok(()) + } + + pub fn __clear__(&self) { + let mut lock = self.raw.lock(); + lock.clear() + } +} + +#[allow(non_camel_case_types)] +#[pyo3::pyclass(module = "cachebox._core")] +pub struct cache_items { + pub ptr: ObservedIterator, + pub iter: parking_lot::Mutex>, +} + +#[pyo3::pymethods] +impl cache_items { + fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { + slf + } + + #[allow(unused_mut)] + fn __next__(mut slf: pyo3::PyRefMut<'_, Self>) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { + let mut iter = slf.iter.lock(); + + slf.ptr.proceed(slf.py())?; + + if let Some(x) = iter.next() { + let (key, val) = unsafe { x.as_ref() }; + + tuple!( + slf.py(), + 2, + 0 => key.obj.clone_ref(slf.py()).into_ptr(), + 1 => val.clone_ref(slf.py()).into_ptr(), + ) + } else { + Err(pyo3::PyErr::new::(())) + } + } +} diff --git a/src/bridge/mod.rs b/src/bridge/mod.rs new file mode 100644 index 0000000..a5c08fd --- /dev/null +++ b/src/bridge/mod.rs @@ -0,0 +1 @@ +pub mod cache; diff --git a/src/common.rs b/src/common.rs new file mode 100644 index 0000000..4b973b4 --- /dev/null +++ b/src/common.rs @@ -0,0 +1,316 @@ +pub fn pyobject_equal( + py: pyo3::Python<'_>, + arg1: *mut pyo3::ffi::PyObject, + arg2: *mut pyo3::ffi::PyObject, +) -> pyo3::PyResult { + unsafe { + if std::ptr::eq(arg1, arg2) { + return Ok(true); + } + + let boolean = pyo3::ffi::PyObject_RichCompareBool(arg1, arg2, pyo3::ffi::Py_EQ); + + if boolean < 0 { + Err(pyo3::PyErr::take(py).unwrap_unchecked()) + } else { + Ok(boolean == 1) + } + } +} + +#[rustfmt::skip] +macro_rules! non_zero_or { + ($num:expr, $_else:expr) => { + unsafe { + core::num::NonZeroUsize::new_unchecked( + if $num == 0 { $_else } else { $num } + ) + } + }; +} + +macro_rules! new_table { + ($capacity:expr) => {{ + if $capacity > 0 { + hashbrown::raw::RawTable::try_with_capacity($capacity) + .map_err(|_| pyo3::PyErr::new::(())) + } else { + Ok(hashbrown::raw::RawTable::new()) + } + }}; +} + +macro_rules! tuple { + ( + $py:expr, + $len:expr, + $($index:expr => $value:expr,)+ + ) => {{ + let tuple = unsafe { pyo3::ffi::PyTuple_New($len) }; + if tuple.is_null() { + Err(pyo3::PyErr::fetch($py)) + } else { + unsafe { + $( + pyo3::ffi::PyTuple_SetItem(tuple, $index, $value); + )+ + } + + Ok(tuple) + } + }}; + + (check $tuple:expr, size=$size:expr) => {{ + if unsafe { pyo3::ffi::PyTuple_CheckExact($tuple) } == 0 { + Err( + pyo3::PyErr::new::("expected tuple, but got another type") + ) + } else if unsafe {pyo3::ffi::PyTuple_Size($tuple)} != $size { + Err( + pyo3::PyErr::new::("tuple size is invalid") + ) + } else { + Ok(()) + } + }} +} + +macro_rules! extract_pickle_tuple { + ($py:expr, $state:expr) => {{ + let maxsize = { + let obj = pyo3::ffi::PyTuple_GetItem($state, 0); + pyo3::ffi::PyLong_AsSize_t(obj) + }; + + if let Some(e) = pyo3::PyErr::take($py) { + return Err(e); + } + + let iterable = { + let obj = pyo3::ffi::PyTuple_GetItem($state, 1); + + if pyo3::ffi::PyDict_CheckExact(obj) != 1 && pyo3::ffi::PyList_CheckExact(obj) != 1 { + return Err(pyo3::PyErr::new::( + "the iterable object is not an dict or list", + )); + } + + // Tuple returns borrowed reference + pyo3::PyObject::from_borrowed_ptr($py, obj) + }; + + let capacity = { + let obj = pyo3::ffi::PyTuple_GetItem($state, 2); + pyo3::ffi::PyLong_AsSize_t(obj) + }; + + if let Some(e) = pyo3::PyErr::take($py) { + return Err(e); + } + + (maxsize, iterable, capacity) + }}; +} + +/// Converts an isize value to a u64 value, mapping negative values to the upper half of the u64 range. +/// +/// This function ensures a bijective mapping between isize and u64, preserving the order of values +/// by offsetting negative values to the upper range of u64. +fn convert_isize_to_u64(v: &isize) -> u64 { + const OFFSET: u64 = 1 << 63; + + if *v >= 0 { + *v as u64 + } else { + (-(*v + 1)) as u64 + OFFSET + } +} + +/// Precomputed Hash PyObject +/// +/// A precomputed hash is a cryptographic hash value that's calculated in advance +/// and stored for later use, rather than being computed on demand when needed. +pub struct PreHashObject { + pub obj: pyo3::PyObject, + pub hash: u64, +} + +impl PreHashObject { + /// Creates a new [`PreHashObject`] + pub fn new(obj: pyo3::PyObject, hash: u64) -> Self { + Self { obj, hash } + } + + /// Calculates the hash of `object` and creates a new [`PreHashObject`] + pub fn from_pyobject(py: pyo3::Python<'_>, object: pyo3::PyObject) -> pyo3::PyResult { + unsafe { + let py_hash = pyo3::ffi::PyObject_Hash(object.as_ptr()); + + if py_hash == -1 { + // SAFETY: + // PyObject_Hash never returns -1 on success. + return Err(pyo3::PyErr::take(py).unwrap_unchecked()); + } + + Ok(Self::new(object, convert_isize_to_u64(&py_hash))) + } + } + + /// Check equality of two objects by using [`pyo3::ffi::PyObject_RichCompareBool`] + pub fn equal(&self, py: pyo3::Python<'_>, other: &Self) -> pyo3::PyResult { + pyobject_equal(py, self.obj.as_ptr(), other.obj.as_ptr()) + } +} + +impl std::fmt::Debug for PreHashObject { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "PreHashObject({})", self.hash) + } +} + +/// A view into a single entry in a table, which may either be absent or occupied. +/// +/// This is common in policies and will be used by `entry(...)` methods of them. +pub enum Entry { + Occupied(O), + Absent(V), +} + +/// A trait for adding `try_find` and `try_find_entry` methods to [`hashbrown::HashTable`] +pub trait TryFindMethods { + /// Searches for an element in the table. + fn try_find( + &self, + hash: u64, + compare: impl FnMut(&T) -> Result, + ) -> Result>, E>; +} + +impl TryFindMethods for hashbrown::raw::RawTable { + #[inline(always)] + fn try_find( + &self, + hash: u64, + mut compare: impl FnMut(&T) -> Result, + ) -> Result>, E> { + let mut error = None; + + let found = self.find(hash, |item| { + match compare(item) { + Ok(boolean) => boolean, + Err(e) => { + error = Some(e); + true // To break checking + } + } + }); + + if let Some(error) = error { + Err(error) + } else { + Ok(found) + } + } +} + +/// Observe caches' changes +#[derive(Debug)] +pub struct Observed(u16); + +impl Observed { + pub fn new() -> Self { + Self(0) + } + + pub fn change(&mut self) { + self.0 = self.0.saturating_add(1); + } + + pub fn get(&self) -> u16 { + self.0 + } +} + +unsafe fn _get_state(py: pyo3::Python<'_>, ptr: *mut pyo3::ffi::PyObject) -> pyo3::PyResult { + unsafe fn inner( + py: pyo3::Python<'_>, + ptr: *mut pyo3::ffi::PyObject, + ) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { + cfg_if::cfg_if! { + if #[cfg(all(Py_3_9, not(any(Py_LIMITED_API, PyPy, GraalPy))))] { + use pyo3::IntoPyObject; + + let m_name: pyo3::Bound<'_, pyo3::types::PyString> = "_state".into_pyobject(py)?; + Ok(pyo3::ffi::PyObject_CallMethodNoArgs(ptr, m_name.as_ptr())) + } else { + let state_fn = + pyo3::ffi::PyObject_GetAttrString(ptr, pyo3::ffi::c_str!("_state").as_ptr()); + + if state_fn.is_null() { + return Err(pyo3::PyErr::take(py).unwrap_unchecked()); + } + + let empty_args = pyo3::ffi::PyTuple_New(0); + let result = pyo3::ffi::PyObject_Call(state_fn, empty_args, std::ptr::null_mut()); + pyo3::ffi::Py_XDECREF(empty_args); + pyo3::ffi::Py_XDECREF(state_fn); + + Ok(result) + } + } + } + + let result = inner(py, ptr)?; + + if result.is_null() { + return Err(pyo3::PyErr::take(py).unwrap_unchecked()); + } + + let c = pyo3::ffi::PyLong_AsSize_t(result); + pyo3::ffi::Py_XDECREF(result); + + Ok(c as u16) +} + +/// Checks the [`Observed`] on iterators +#[derive(Debug)] +pub struct ObservedIterator { + pub ptr: core::ptr::NonNull, + pub statepoint: u16, +} + +impl ObservedIterator { + pub fn new(ptr: *mut pyo3::ffi::PyObject, state: u16) -> Self { + unsafe { + pyo3::ffi::Py_XINCREF(ptr); + } + + Self { + ptr: unsafe { core::ptr::NonNull::new(ptr).unwrap_unchecked() }, + statepoint: state, + } + } + + pub fn proceed(&self, py: pyo3::Python<'_>) -> pyo3::PyResult<()> { + let state = unsafe { _get_state(py, self.ptr.as_ptr())? }; + + if state != self.statepoint { + return Err(pyo3::PyErr::new::( + "cache changed during iteration", + )); + } + + Ok(()) + } +} + +impl Drop for ObservedIterator { + fn drop(&mut self) { + unsafe { + pyo3::ffi::Py_XDECREF(self.ptr.as_ptr()); + } + } +} + +unsafe impl Send for ObservedIterator {} +unsafe impl Sync for ObservedIterator {} diff --git a/src/lib.rs b/src/lib.rs index f61e5b1..10cea87 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,11 +1,19 @@ use pyo3::prelude::*; +#[macro_use] +mod common; + +mod bridge; +mod policies; + /// cachebox core ( written in Rust ) #[pymodule(gil_used = false)] #[cold] fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add("__author__", env!("CARGO_PKG_AUTHORS"))?; m.add("__version__", env!("CARGO_PKG_VERSION"))?; - + + m.add_class::()?; + Ok(()) } diff --git a/src/policies/mod.rs b/src/policies/mod.rs new file mode 100644 index 0000000..d90a67a --- /dev/null +++ b/src/policies/mod.rs @@ -0,0 +1 @@ +pub mod nopolicy; diff --git a/src/policies/nopolicy.rs b/src/policies/nopolicy.rs new file mode 100644 index 0000000..83b924d --- /dev/null +++ b/src/policies/nopolicy.rs @@ -0,0 +1,235 @@ +use crate::common::Entry; +use crate::common::Observed; +use crate::common::PreHashObject; +use crate::common::TryFindMethods; + +pub struct NoPolicy { + table: hashbrown::raw::RawTable<(PreHashObject, pyo3::PyObject)>, + maxsize: std::num::NonZeroUsize, + pub observed: Observed, +} + +pub struct NoPolicyOccupied<'a> { + instance: &'a mut NoPolicy, + bucket: hashbrown::raw::Bucket<(PreHashObject, pyo3::PyObject)>, +} + +pub struct NoPolicyAbsent<'a> { + instance: &'a mut NoPolicy, +} + +impl NoPolicy { + #[inline] + pub fn new(maxsize: usize, mut capacity: usize) -> pyo3::PyResult { + let maxsize = non_zero_or!(maxsize, isize::MAX as usize); + capacity = capacity.min(maxsize.get()); + + Ok(Self { + table: new_table!(capacity)?, + maxsize, + observed: Observed::new(), + }) + } + + pub fn maxsize(&self) -> usize { + self.maxsize.get() + } + + pub fn len(&self) -> usize { + self.table.len() + } + + pub fn is_empty(&self) -> bool { + self.table.is_empty() + } + + pub fn is_full(&self) -> bool { + self.table.len() == self.maxsize.get() + } + + pub fn capacity(&self) -> usize { + self.table.capacity() + } + + pub fn iter(&self) -> hashbrown::raw::RawIter<(PreHashObject, pyo3::PyObject)> { + unsafe { self.table.iter() } + } + + #[rustfmt::skip] + pub fn entry( + &mut self, + py: pyo3::Python<'_>, + key: &PreHashObject, + ) -> pyo3::PyResult> { + match self.table.try_find(key.hash, |(x, _)| x.equal(py, key))? { + Some(bucket) => { + Ok( + Entry::Occupied(NoPolicyOccupied { instance: self, bucket }) + ) + }, + None => { + Ok( + Entry::Absent(NoPolicyAbsent { instance: self }) + ) + } + } + } + + pub fn lookup( + &self, + py: pyo3::Python<'_>, + key: &PreHashObject, + ) -> pyo3::PyResult> { + match self.table.try_find(key.hash, |(x, _)| x.equal(py, key))? { + Some(x) => Ok(Some(unsafe { &x.as_ref().1 })), + None => Ok(None), + } + } + + pub fn equal(&self, py: pyo3::Python<'_>, other: &Self) -> pyo3::PyResult { + if self.maxsize != other.maxsize { + return Ok(false); + } + + if self.table.len() != other.table.len() { + return Ok(false); + } + + let mut error = None; + + let result = unsafe { + self.table.iter().all(|bucket| { + let (key, val) = bucket.as_ref(); + + match other.table.try_find(key.hash, |(x, _)| x.equal(py, key)) { + Err(e) => { + error = Some(e); + true + } + Ok(Some(bucket)) => { + let (_, val2) = bucket.as_ref(); + + match crate::common::pyobject_equal(py, val.as_ptr(), val2.as_ptr()) { + Ok(result) => result, + Err(e) => { + error = Some(e); + true + } + } + } + Ok(None) => false, + } + }) + }; + + if let Some(error) = error { + return Err(error); + } + + Ok(result) + } + + pub fn clear(&mut self) { + self.table.clear(); + } + + pub fn shrink_to_fit(&mut self) { + self.table.shrink_to(self.table.len(), |(x, _)| x.hash); + } + + pub fn extend(&mut self, py: pyo3::Python<'_>, iterable: pyo3::PyObject) -> pyo3::PyResult<()> { + use pyo3::types::{PyAnyMethods, PyDictMethods}; + + if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { + let dict = unsafe { + iterable + .downcast_bound::(py) + .unwrap_unchecked() + }; + + for (key, value) in dict.iter() { + let hk = + unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; + + match self.entry(py, &hk)? { + Entry::Occupied(mut entry) => { + entry.update(value.unbind())?; + } + Entry::Absent(entry) => { + entry.insert(hk, value.unbind())?; + } + } + } + } else { + for pair in iterable.bind(py).try_iter()? { + let (key, value) = pair?.extract::<(pyo3::PyObject, pyo3::PyObject)>()?; + + let hk = PreHashObject::from_pyobject(py, key)?; + + match self.entry(py, &hk)? { + Entry::Occupied(mut entry) => { + entry.update(value)?; + } + Entry::Absent(entry) => { + entry.insert(hk, value)?; + } + } + } + } + + Ok(()) + } + + #[allow(clippy::wrong_self_convention)] + pub fn from_pickle( + &mut self, + py: pyo3::Python<'_>, + state: *mut pyo3::ffi::PyObject, + ) -> pyo3::PyResult<()> { + tuple!(check state, size=3)?; + let (maxsize, iterable, capacity) = unsafe { extract_pickle_tuple!(py, state) }; + + let mut new = Self::new(maxsize, capacity)?; + new.extend(py, iterable)?; + + *self = new; + Ok(()) + } +} + +impl<'a> NoPolicyOccupied<'a> { + pub fn update(&mut self, value: pyo3::PyObject) -> pyo3::PyResult { + unsafe { + let old_value = std::mem::replace(&mut self.bucket.as_mut().1, value); + self.instance.observed.change(); + Ok(old_value) + } + } + + pub fn remove(self) -> (PreHashObject, pyo3::PyObject) { + let (x, _) = unsafe { self.instance.table.remove(self.bucket) }; + x + } + + pub fn into_value(self) -> &'a mut (PreHashObject, pyo3::PyObject) { + unsafe { self.bucket.as_mut() } + } +} + +impl NoPolicyAbsent<'_> { + pub fn insert(self, key: PreHashObject, value: pyo3::PyObject) -> pyo3::PyResult<()> { + if self.instance.table.len() >= self.instance.maxsize.get() { + // There's no algorithm for removing a key-value pair, so we raise PyOverflowError. + return Err(pyo3::PyErr::new::( + "The cache has reached the bound", + )); + } + + self.instance + .table + .insert(key.hash, (key, value), |(x, _)| x.hash); + + self.instance.observed.change(); + Ok(()) + } +} From 3bdec75dc600b5d4fec891dcef41fc55b3c23fcc Mon Sep 17 00:00:00 2001 From: awolverp Date: Fri, 4 Apr 2025 16:10:18 +0330 Subject: [PATCH 04/37] Update CHANGELOG.md --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c9e8ffd..63f4883 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Rewrite cache classes API in Python; this help users to use classes as subclass and customize them. - Make benchmarks better - Make error handlings better -- Make customizable and extensible: make your own strategies (If I found a good way) ## 4.5.3 - 2025-03-31 ### Changed From 49c5ffdbb2c7b68521f6ee592d788adcb5a45182 Mon Sep 17 00:00:00 2001 From: awolverp Date: Sun, 6 Apr 2025 15:20:35 +0330 Subject: [PATCH 05/37] Write a part of FIFOCache --- python/cachebox/_cachebox.py | 139 ++++++++++++++++-- src/bridge/cache.rs | 19 +-- src/bridge/fifocache.rs | 271 +++++++++++++++++++++++++++++++++++ src/bridge/mod.rs | 1 + src/lib.rs | 1 + src/policies/fifo.rs | 268 ++++++++++++++++++++++++++++++++++ src/policies/mod.rs | 1 + src/policies/nopolicy.rs | 15 ++ 8 files changed, 690 insertions(+), 25 deletions(-) create mode 100644 src/bridge/fifocache.rs create mode 100644 src/policies/fifo.rs diff --git a/python/cachebox/_cachebox.py b/python/cachebox/_cachebox.py index f93e748..045e4be 100644 --- a/python/cachebox/_cachebox.py +++ b/python/cachebox/_cachebox.py @@ -35,6 +35,7 @@ class BaseCacheImpl(typing.Generic[KT, VT]): """ This is the base class of all cache classes such as Cache, FIFOCache, ... """ + pass @@ -57,13 +58,12 @@ class Cache(BaseCacheImpl[KT, VT]): """ A simple cache that has no algorithm; this is only a hashmap. - Cache vs dict: - - it is thread-safe and unordered, while dict isn't thread-safe and ordered (Python 3.6+). - it uses very lower memory than dict. - it supports useful and new methods for managing memory, while dict does not. - it does not support popitem, while dict does. - You can limit the size of Cache, but you cannot for dict. + `Cache` vs `dict`: + - it is thread-safe and unordered, while `dict` isn't thread-safe and ordered (Python 3.6+). + - it uses very lower memory than `dict`. + - it supports useful and new methods for managing memory, while `dict` does not. + - it does not support popitem, while `dict` does. + - You can limit the size of Cache, but you cannot for `dict`. """ def __init__( @@ -165,14 +165,16 @@ def __setitem__(self, key: KT, value: VT) -> None: self.insert(key, value) def __getitem__(self, key: KT) -> VT: - val = self.get(key, _sential) + val = self._raw.get(key, _sential) if val is _sential: raise KeyError(key) return val def __delitem__(self, key: KT) -> None: - self._raw.remove(key) + val = self._raw.pop(key, _sential) + if val is _sential: + raise KeyError(key) def __eq__(self, other) -> bool: return self._raw == other @@ -232,3 +234,122 @@ def __repr__(self) -> str: self._raw.maxsize(), _items_to_str(self._raw.items(), len(self._raw)), ) + + +class FIFOCache(BaseCacheImpl[KT, VT]): + def __init__( + self, + maxsize: int, + iterable: typing.Union["Cache", dict, tuple, typing.Generator, None] = None, + *, + capacity: int = 0, + ) -> None: + self._raw = _core.FIFOCache(maxsize, capacity=capacity) + + if iterable is not None: + self.update(iterable) + + @property + def maxsize(self) -> int: + return self._raw.maxsize() + + def capacity(self) -> int: + return self._raw.capacity() + + def __len__(self) -> int: + return len(self._raw) + + def __sizeof__(self): + return self._raw.__sizeof__() + + def __contains__(self, key: KT) -> bool: + return key in self._raw + + def __bool__(self) -> bool: + return not self.is_empty() + + def is_empty(self) -> bool: + return self._raw.is_empty() + + def is_full(self) -> bool: + return self._raw.is_full() + + def insert(self, key: KT, value: VT) -> typing.Optional[VT]: + return self._raw.insert(key, value) + + def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + return self._raw.get(key, default) + + def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + return self._raw.pop(key, default) + + def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + return self._raw.setdefault(key, default) + + def popitem(self) -> typing.Tuple[KT, VT]: + return self._raw.popitem() + + def drain(self, n: int) -> int: + if n == 0: + return 0 + + for i in range(n): + try: + self._raw.popitem() + except KeyError: + return i + + return i + + def update(self, iterable: typing.Union["Cache", dict, tuple, typing.Generator]) -> None: + if hasattr(iterable, "items"): + iterable = iterable.items() + + self._raw.update(iterable) + + def __setitem__(self, key: KT, value: VT) -> None: + self.insert(key, value) + + def __getitem__(self, key: KT) -> VT: + val = self._raw.get(key, _sential) + if val is _sential: + raise KeyError(key) + + return val + + def __delitem__(self, key: KT) -> None: + val = self._raw.pop(key, _sential) + if val is _sential: + raise KeyError(key) + + def __eq__(self, other) -> bool: + return self._raw == other + + def __ne__(self, other) -> bool: + return self._raw != other + + def shrink_to_fit(self) -> None: + self._raw.shrink_to_fit() + + def clear(self, *, reuse: bool = False) -> None: + self._raw.clear(reuse) + + def items(self) -> IteratorView[typing.Tuple[KT, VT]]: + return IteratorView(self._raw.items(), lambda x: x) + + def keys(self) -> IteratorView[KT]: + return IteratorView(self._raw.items(), lambda x: x[0]) + + def values(self) -> IteratorView[VT]: + return IteratorView(self._raw.items(), lambda x: x[1]) + + def __iter__(self) -> IteratorView[KT]: + return self.keys() + + def __repr__(self) -> str: + return "{}[{}/{}]({})".format( + type(self).__name__, + len(self._raw), + self._raw.maxsize(), + _items_to_str(self._raw.items(), len(self._raw)), + ) diff --git a/src/bridge/cache.rs b/src/bridge/cache.rs index fb50fa0..1238cae 100644 --- a/src/bridge/cache.rs +++ b/src/bridge/cache.rs @@ -1,6 +1,6 @@ -//! implement Cache, a simple cache without any algorithms and policies - -use crate::common::{Entry, ObservedIterator, PreHashObject}; +use crate::common::Entry; +use crate::common::ObservedIterator; +use crate::common::PreHashObject; /// A simple cache that has no algorithm; this is only a hashmap. /// @@ -101,19 +101,6 @@ impl Cache { } } - fn remove(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult<()> { - let key = PreHashObject::from_pyobject(py, key)?; - let mut lock = self.raw.lock(); - - match lock.entry(py, &key)? { - Entry::Occupied(entry) => { - entry.remove(); - Ok(()) - } - Entry::Absent(_) => Err(pyo3::PyErr::new::(key.obj)), - } - } - fn update( slf: pyo3::PyRef<'_, Self>, py: pyo3::Python<'_>, diff --git a/src/bridge/fifocache.rs b/src/bridge/fifocache.rs new file mode 100644 index 0000000..584a05b --- /dev/null +++ b/src/bridge/fifocache.rs @@ -0,0 +1,271 @@ +use crate::common::Entry; +use crate::common::PreHashObject; + +/// FIFO Cache implementation - First-In First-Out Policy (thread-safe). +/// +/// In simple terms, the FIFO cache will remove the element that has been in the cache the longest. +#[pyo3::pyclass(module = "cachebox._core", frozen)] +pub struct FIFOCache { + raw: parking_lot::Mutex, +} + +#[pyo3::pymethods] +impl FIFOCache { + #[new] + #[pyo3(signature=(maxsize, *, capacity=0))] + fn __new__(maxsize: usize, capacity: usize) -> pyo3::PyResult { + let raw = crate::policies::fifo::FIFOPolicy::new(maxsize, capacity)?; + + let self_ = Self { + raw: parking_lot::Mutex::new(raw), + }; + Ok(self_) + } + + fn _state(&self) -> usize { + self.raw.lock().observed.get() as usize + } + + fn maxsize(&self) -> usize { + self.raw.lock().maxsize() + } + + fn capacity(&self) -> usize { + self.raw.lock().capacity().0 + } + + fn __len__(&self) -> usize { + self.raw.lock().len() + } + + fn __sizeof__(&self) -> usize { + let lock = self.raw.lock(); + let capacity = lock.capacity(); + + capacity.0 * std::mem::size_of::() + + capacity.1 + * (std::mem::size_of::() + + std::mem::size_of::()) + } + + fn __contains__(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let lock = self.raw.lock(); + + match lock.lookup(py, &key)? { + Some(_) => Ok(true), + None => Ok(false), + } + } + + fn is_empty(&self) -> bool { + self.raw.lock().is_empty() + } + + fn is_full(&self) -> bool { + self.raw.lock().is_full() + } + + fn insert( + &self, + py: pyo3::Python<'_>, + key: pyo3::PyObject, + value: pyo3::PyObject, + ) -> pyo3::PyResult> { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.entry(py, &key)? { + Entry::Occupied(mut entry) => Ok(Some(entry.update(value)?)), + Entry::Absent(entry) => { + entry.insert(py, key, value)?; + Ok(None) + } + } + } + + fn get( + &self, + py: pyo3::Python<'_>, + key: pyo3::PyObject, + default: pyo3::PyObject, + ) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let lock = self.raw.lock(); + + match lock.lookup(py, &key)? { + Some(val) => Ok(val.clone_ref(py)), + None => Ok(default), + } + } + + // fn update( + // slf: pyo3::PyRef<'_, Self>, + // py: pyo3::Python<'_>, + // iterable: pyo3::PyObject, + // ) -> pyo3::PyResult<()> { + // if slf.as_ptr() == iterable.as_ptr() { + // return Ok(()); + // } + + // let mut lock = slf.raw.lock(); + // lock.extend(py, iterable) + // } + + // fn __richcmp__( + // slf: pyo3::PyRef<'_, Self>, + // other: pyo3::PyObject, + // op: pyo3::class::basic::CompareOp, + // ) -> pyo3::PyResult { + // let other = match other.extract::>(slf.py()) { + // Ok(o) => o, + // Err(_) => return Ok(false), + // }; + + // match op { + // pyo3::class::basic::CompareOp::Eq => { + // if slf.as_ptr() == other.as_ptr() { + // return Ok(true); + // } + + // let t1 = slf.raw.lock(); + // let t2 = other.raw.lock(); + // t1.equal(slf.py(), &*t2) + // } + // pyo3::class::basic::CompareOp::Ne => { + // if slf.as_ptr() != other.as_ptr() { + // return Ok(true); + // } + + // let t1 = slf.raw.lock(); + // let t2 = other.raw.lock(); + // t1.equal(slf.py(), &*t2).map(|r| !r) + // } + // _ => Err(pyo3::PyErr::new::( + // "only '==' or '!=' are supported", + // )), + // } + // } + + fn pop( + &self, + py: pyo3::Python<'_>, + key: pyo3::PyObject, + default: pyo3::PyObject, + ) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.entry(py, &key)? { + Entry::Occupied(entry) => { + let (_, value) = entry.remove(); + Ok(value) + } + Entry::Absent(_) => Ok(default), + } + } + + fn clear(&self, reuse: bool) { + let mut lock = self.raw.lock(); + lock.clear(); + + if !reuse { + lock.shrink_to_fit(); + } + } + + fn shrink_to_fit(&self) { + let mut lock = self.raw.lock(); + lock.shrink_to_fit(); + } + + fn setdefault( + &self, + py: pyo3::Python<'_>, + key: pyo3::PyObject, + default: pyo3::PyObject, + ) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.entry(py, &key)? { + Entry::Occupied(entry) => { + let (_, ref value) = entry.into_value(); + Ok(value.clone_ref(py)) + } + Entry::Absent(entry) => { + entry.insert(py, key, default.clone_ref(py))?; + Ok(default) + } + } + } + + // fn items(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { + // let lock = slf.raw.lock(); + // let state = lock.observed.get(); + // let iter = lock.iter(); + + // let result = cache_items { + // ptr: ObservedIterator::new(slf.as_ptr(), state), + // iter: parking_lot::Mutex::new(iter), + // }; + + // pyo3::Py::new(slf.py(), result) + // } + + // fn __getnewargs__(&self) -> (usize,) { + // (0,) + // } + + // fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { + // let lock = self.raw.lock(); + // unsafe { + // let state = { + // let mp = pyo3::ffi::PyDict_New(); + + // if mp.is_null() { + // return Err(pyo3::PyErr::fetch(py)); + // } + + // for bucket in lock.iter() { + // let (key, val) = bucket.as_ref(); + // // SAFETY: we don't need to check error because we sure about key that is hashable. + // pyo3::ffi::PyDict_SetItem(mp, key.obj.as_ptr(), val.as_ptr()); + // } + + // let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); + // let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity()); + + // #[allow(unused_unsafe)] + // tuple!( + // py, + // 3, + // 0 => maxsize, + // 1 => mp, + // 2 => capacity, + // )? + // }; + // Ok(pyo3::Py::from_owned_ptr(py, state)) + // } + // } + + // pub fn __setstate__(&self, py: pyo3::Python<'_>, state: pyo3::PyObject) -> pyo3::PyResult<()> { + // let mut lock = self.raw.lock(); + // lock.from_pickle(py, state.as_ptr()) + // } + + // popitem + + pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + for value in self.raw.lock().entries_iter() { + visit.call(&value.0.obj)?; + visit.call(&value.1)?; + } + Ok(()) + } + + pub fn __clear__(&self) { + let mut lock = self.raw.lock(); + lock.clear() + } +} diff --git a/src/bridge/mod.rs b/src/bridge/mod.rs index a5c08fd..ec04015 100644 --- a/src/bridge/mod.rs +++ b/src/bridge/mod.rs @@ -1 +1,2 @@ pub mod cache; +pub mod fifocache; diff --git a/src/lib.rs b/src/lib.rs index 10cea87..ae4f8b6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,6 +14,7 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add("__version__", env!("CARGO_PKG_VERSION"))?; m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/src/policies/fifo.rs b/src/policies/fifo.rs new file mode 100644 index 0000000..09e811e --- /dev/null +++ b/src/policies/fifo.rs @@ -0,0 +1,268 @@ +//! The FIFO policy, This is inspired by Rust's indexmap with some changes. + +use crate::common::Entry; +use crate::common::Observed; +use crate::common::PreHashObject; +use crate::common::TryFindMethods; + +use std::collections::VecDeque; + +const MAX_N_SHIFT: usize = usize::MAX - (isize::MAX as usize); + +pub struct FIFOPolicy { + /// We set [Vec] objects indexes in hashtable to make search O(1). hashtable is unordered, + /// that is why we are using [Vec]. + table: hashbrown::raw::RawTable, + + /// Keep objects in order. + entries: VecDeque<(PreHashObject, pyo3::PyObject)>, + maxsize: core::num::NonZeroUsize, + + /// When we pop front an object from entries, two operations have to do: + /// 1. Shift all elements in vector. + /// 2. Decrement all indexes in hashtable. + /// + /// these are expensive operations in large elements; + /// - We removed first operation by using [`std::collections::VecDeque`] instead of [`Vec`] + /// - We removed second operation by using this variable: Instead of decrement indexes in hashtable, + /// we will increment this variable. + n_shifts: usize, + + pub observed: Observed, +} + +pub struct FIFOPolicyOccupied<'a> { + instance: &'a mut FIFOPolicy, + bucket: hashbrown::raw::Bucket, +} + +pub struct FIFOPolicyAbsent<'a> { + instance: &'a mut FIFOPolicy, +} + +impl FIFOPolicy { + #[inline] + pub fn new(maxsize: usize, mut capacity: usize) -> pyo3::PyResult { + let maxsize = non_zero_or!(maxsize, isize::MAX as usize); + capacity = capacity.min(maxsize.get()); + + Ok(Self { + table: new_table!(capacity)?, + entries: VecDeque::new(), + maxsize, + n_shifts: 0, + observed: Observed::new(), + }) + } + + #[inline] + pub fn maxsize(&self) -> usize { + self.maxsize.get() + } + + #[inline] + pub fn len(&self) -> usize { + self.table.len() + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.table.is_empty() + } + + #[inline] + pub fn is_full(&self) -> bool { + self.table.len() == self.maxsize.get() + } + + #[inline] + pub fn capacity(&self) -> (usize, usize) { + (self.table.capacity(), self.entries.capacity()) + } + + // #[inline] + // pub fn iter(&self) -> hashbrown::raw::RawIter<(PreHashObject, pyo3::PyObject)> { + // unsafe { self.table.iter() } + // } + + #[inline] + fn decrement_indexes(&mut self, start: usize, end: usize) { + if start <= 1 && end == self.entries.len() && self.n_shifts < MAX_N_SHIFT { + self.n_shifts += 1; + return; + } + + if (end - start) > self.table.buckets() / 2 { + unsafe { + for bucket in self.table.iter() { + let i = bucket.as_mut(); + if start <= (*i) - self.n_shifts && (*i) - self.n_shifts < end { + *i -= 1; + } + } + } + } else { + let shifted = self.entries.range(start..end); + for (i, entry) in (start..end).zip(shifted) { + let old = self + .table + .get_mut(entry.0.hash, |x| (*x) - self.n_shifts == i) + .expect("index not found"); + + *old -= 1; + } + } + } + + #[inline] + pub fn popitem( + &mut self, + py: pyo3::Python<'_>, + ) -> pyo3::PyResult> { + let ret = self.entries.front(); + if ret.is_none() { + return Ok(None); + } + + let ret = unsafe { ret.unwrap_unchecked() }; + + match self.table.try_find(ret.0.hash, |x| { + self.entries[(*x) - self.n_shifts].0.equal(py, &ret.0) + })? { + Some(bucket) => { + unsafe { self.table.remove(bucket) }; + } + None => unreachable!("popitem key not found in table"), + } + + let ret = unsafe { self.entries.pop_front().unwrap_unchecked() }; + + self.observed.change(); + + self.decrement_indexes(1, self.entries.len()); + Ok(Some(ret)) + } + + #[rustfmt::skip] + pub fn entry( + &mut self, + py: pyo3::Python<'_>, + key: &PreHashObject, + ) -> pyo3::PyResult> { + match self + .table + .try_find(key.hash, |x| self.entries[(*x) - self.n_shifts].0.equal(py, key))? + { + Some(bucket) => { + Ok( + Entry::Occupied(FIFOPolicyOccupied { instance: self, bucket }) + ) + } + None => { + Ok( + Entry::Absent(FIFOPolicyAbsent { instance: self }) + ) + }, + } + } + + pub fn lookup( + &self, + py: pyo3::Python<'_>, + key: &PreHashObject, + ) -> pyo3::PyResult> { + match self + .table + .try_find(key.hash, |x| { + self.entries[(*x) - self.n_shifts].0.equal(py, key) + })? + .map(|bucket| unsafe { bucket.as_ref() }) + { + Some(index) => Ok(Some(&self.entries[(*index) - self.n_shifts].1)), + None => Ok(None), + } + } + + #[inline] + pub fn clear(&mut self) { + self.table.clear(); + self.entries.clear(); + self.n_shifts = 0; + self.observed.change(); + } + + #[inline] + pub fn shrink_to_fit(&mut self) { + self.table.shrink_to(self.table.len(), |x| { + self.entries[(*x) - self.n_shifts].0.hash + }); + self.entries.shrink_to_fit(); + self.observed.change(); + } + + pub fn entries_iter( + &self, + ) -> std::collections::vec_deque::Iter<'_, (PreHashObject, pyo3::PyObject)> { + self.entries.iter() + } +} + +impl<'a> FIFOPolicyOccupied<'a> { + #[inline] + pub fn update(&mut self, value: pyo3::PyObject) -> pyo3::PyResult { + let index = unsafe { self.bucket.as_ref() }; + let item = &mut self.instance.entries[index - self.instance.n_shifts]; + let old_value = std::mem::replace(&mut item.1, value); + self.instance.observed.change(); + Ok(old_value) + } + + #[inline] + pub fn remove(self) -> (PreHashObject, pyo3::PyObject) { + // let (PreHashObject { hash, .. }, _) = &self.instance.entries[self.index - self.instance.n_shifts]; + let (mut index, _) = unsafe { self.instance.table.remove(self.bucket) }; + index = index - self.instance.n_shifts; + + self.instance + .decrement_indexes(index + 1, self.instance.entries.len()); + + let m = self.instance.entries.remove(index).unwrap(); + + self.instance.observed.change(); + m + } + + #[inline] + pub fn into_value(self) -> &'a mut (PreHashObject, pyo3::PyObject) { + let index = unsafe { self.bucket.as_ref() }; + &mut self.instance.entries[index - self.instance.n_shifts] + } +} + +impl<'a> FIFOPolicyAbsent<'a> { + #[inline] + pub fn insert( + self, + py: pyo3::Python<'_>, + key: PreHashObject, + value: pyo3::PyObject, + ) -> pyo3::PyResult<()> { + if self.instance.table.len() >= self.instance.maxsize.get() { + self.instance.popitem(py)?; + } + + self.instance.table.insert( + key.hash, + self.instance.entries.len() + self.instance.n_shifts, + |index| { + self.instance.entries[(*index) - self.instance.n_shifts] + .0 + .hash + }, + ); + self.instance.entries.push_back((key, value)); + + self.instance.observed.change(); + Ok(()) + } +} diff --git a/src/policies/mod.rs b/src/policies/mod.rs index d90a67a..e668893 100644 --- a/src/policies/mod.rs +++ b/src/policies/mod.rs @@ -1 +1,2 @@ +pub mod fifo; pub mod nopolicy; diff --git a/src/policies/nopolicy.rs b/src/policies/nopolicy.rs index 83b924d..3d65388 100644 --- a/src/policies/nopolicy.rs +++ b/src/policies/nopolicy.rs @@ -31,26 +31,32 @@ impl NoPolicy { }) } + #[inline] pub fn maxsize(&self) -> usize { self.maxsize.get() } + #[inline] pub fn len(&self) -> usize { self.table.len() } + #[inline] pub fn is_empty(&self) -> bool { self.table.is_empty() } + #[inline] pub fn is_full(&self) -> bool { self.table.len() == self.maxsize.get() } + #[inline] pub fn capacity(&self) -> usize { self.table.capacity() } + #[inline] pub fn iter(&self) -> hashbrown::raw::RawIter<(PreHashObject, pyo3::PyObject)> { unsafe { self.table.iter() } } @@ -129,12 +135,16 @@ impl NoPolicy { Ok(result) } + #[inline] pub fn clear(&mut self) { self.table.clear(); + self.observed.change(); } + #[inline] pub fn shrink_to_fit(&mut self) { self.table.shrink_to(self.table.len(), |(x, _)| x.hash); + self.observed.change(); } pub fn extend(&mut self, py: pyo3::Python<'_>, iterable: pyo3::PyObject) -> pyo3::PyResult<()> { @@ -198,6 +208,7 @@ impl NoPolicy { } impl<'a> NoPolicyOccupied<'a> { + #[inline] pub fn update(&mut self, value: pyo3::PyObject) -> pyo3::PyResult { unsafe { let old_value = std::mem::replace(&mut self.bucket.as_mut().1, value); @@ -206,17 +217,21 @@ impl<'a> NoPolicyOccupied<'a> { } } + #[inline] pub fn remove(self) -> (PreHashObject, pyo3::PyObject) { let (x, _) = unsafe { self.instance.table.remove(self.bucket) }; + self.instance.observed.change(); x } + #[inline] pub fn into_value(self) -> &'a mut (PreHashObject, pyo3::PyObject) { unsafe { self.bucket.as_mut() } } } impl NoPolicyAbsent<'_> { + #[inline] pub fn insert(self, key: PreHashObject, value: pyo3::PyObject) -> pyo3::PyResult<()> { if self.instance.table.len() >= self.instance.maxsize.get() { // There's no algorithm for removing a key-value pair, so we raise PyOverflowError. From aaccae24f50506bdd98b2e96df797d4ab0087f05 Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 7 Apr 2025 13:50:39 +0330 Subject: [PATCH 06/37] Optimize dependencies profiles Fix some bugs Write tests for FIFOCache and Cache Optimize some operations Write FIFOCache --- .gitignore | 3 +- Cargo.toml | 3 + pyproject.toml | 4 + python/cachebox/__init__.py | 2 + python/cachebox/_cachebox.py | 106 +++++--- python/tests/__init__.py | 0 python/tests/mixin.py | 455 +++++++++++++++++++++++++++++++++++ python/tests/test_caches.py | 90 +++++++ src/bridge/cache.rs | 55 ++--- src/bridge/fifocache.rs | 281 ++++++++++++--------- src/bridge/mod.rs | 4 + src/common.rs | 41 +++- src/lib.rs | 4 +- src/policies/fifo.rs | 132 +++++++++- src/policies/nopolicy.rs | 5 +- 15 files changed, 984 insertions(+), 201 deletions(-) create mode 100644 python/tests/__init__.py create mode 100644 python/tests/mixin.py create mode 100644 python/tests/test_caches.py diff --git a/.gitignore b/.gitignore index f280a73..404c5d5 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ /target __pycache__ *.so -/.caches.rs \ No newline at end of file +/.coverage +/.pytest_cache diff --git a/Cargo.toml b/Cargo.toml index 8caf6de..635869b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,9 @@ version = "1.0.0" version = "0.12.3" default-features = false +[profile.release.package."*"] +codegen-units = 1 # better optimizations + [build-dependencies.pyo3-build-config] version = "0.24.1" features = ["resolve-config"] diff --git a/pyproject.toml b/pyproject.toml index 313e4f0..e80611a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,9 +44,13 @@ Homepage = 'https://github.com/awolverp/cachebox' [project.optional-dependencies] tests = [ "pytest", + "pytest-asyncio", "coverage", ] +[tool.pytest.ini_options] +asyncio_default_fixture_loop_scope = "function" + [tool.maturin] python-source = "python" features = ["pyo3/extension-module"] diff --git a/python/cachebox/__init__.py b/python/cachebox/__init__.py index 85d12aa..bc94657 100644 --- a/python/cachebox/__init__.py +++ b/python/cachebox/__init__.py @@ -1,4 +1,6 @@ from ._cachebox import ( Cache as Cache, + FIFOCache as FIFOCache, BaseCacheImpl as BaseCacheImpl, + IteratorView as IteratorView, ) diff --git a/python/cachebox/_cachebox.py b/python/cachebox/_cachebox.py index 045e4be..f91e24b 100644 --- a/python/cachebox/_cachebox.py +++ b/python/cachebox/_cachebox.py @@ -6,29 +6,26 @@ VT = typing.TypeVar("VT") DT = typing.TypeVar("DT") -_sential = object() - -def _items_to_str(items, length, max_len=50): - if length <= max_len: +def _items_to_str(items, length): + if length <= 50: return "{" + ", ".join(f"{k}: {v}" for k, v in items) + "}" c = 0 left = [] - right = [] while c < length: k, v = next(items) - if c <= 20: + if c <= 50: left.append(f"{k}: {v}") - elif (length - c) <= 20: - right.append(f"{k}: {v}") + else: + break c += 1 - return "{" + ", ".join(left) + " ... truncated ... " + ", ".join(right) + "}" + return "{%s, ... %d more ...}" % (", ".join(left), length - c) class BaseCacheImpl(typing.Generic[KT, VT]): @@ -129,13 +126,19 @@ def insert(self, key: KT, value: VT) -> typing.Optional[VT]: def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: """Equals to `self[key]`, but returns `default` if the cache don't have this key present.""" - return self._raw.get(key, default) + try: + return self._raw.get(key) + except _core.CoreKeyError: + return default def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: """ Removes specified key and return the corresponding value. If the key is not found, returns the `default`. """ - return self._raw.pop(key, default) + try: + return self._raw.remove(key) + except _core.CoreKeyError: + return default def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: """ @@ -165,22 +168,28 @@ def __setitem__(self, key: KT, value: VT) -> None: self.insert(key, value) def __getitem__(self, key: KT) -> VT: - val = self._raw.get(key, _sential) - if val is _sential: - raise KeyError(key) - - return val + try: + return self._raw.get(key) + except _core.CoreKeyError: + raise KeyError(key) from None def __delitem__(self, key: KT) -> None: - val = self._raw.pop(key, _sential) - if val is _sential: - raise KeyError(key) + try: + self._raw.remove(key) + except _core.CoreKeyError: + raise KeyError(key) from None def __eq__(self, other) -> bool: - return self._raw == other + if not isinstance(other, Cache): + return False + + return self._raw == other._raw def __ne__(self, other) -> bool: - return self._raw != other + if not isinstance(other, Cache): + return False + + return self._raw != other._raw def shrink_to_fit(self) -> None: """Shrinks the cache to fit len(self) elements.""" @@ -278,16 +287,25 @@ def insert(self, key: KT, value: VT) -> typing.Optional[VT]: return self._raw.insert(key, value) def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - return self._raw.get(key, default) + try: + return self._raw.get(key) + except _core.CoreKeyError: + return default def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - return self._raw.pop(key, default) + try: + return self._raw.remove(key) + except _core.CoreKeyError: + return default def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: return self._raw.setdefault(key, default) def popitem(self) -> typing.Tuple[KT, VT]: - return self._raw.popitem() + try: + return self._raw.popitem() + except _core.CoreKeyError: + raise KeyError() from None def drain(self, n: int) -> int: if n == 0: @@ -296,7 +314,7 @@ def drain(self, n: int) -> int: for i in range(n): try: self._raw.popitem() - except KeyError: + except _core.CoreKeyError: return i return i @@ -311,22 +329,28 @@ def __setitem__(self, key: KT, value: VT) -> None: self.insert(key, value) def __getitem__(self, key: KT) -> VT: - val = self._raw.get(key, _sential) - if val is _sential: - raise KeyError(key) - - return val + try: + return self._raw.get(key) + except _core.CoreKeyError: + raise KeyError(key) from None def __delitem__(self, key: KT) -> None: - val = self._raw.pop(key, _sential) - if val is _sential: - raise KeyError(key) + try: + self._raw.remove(key) + except _core.CoreKeyError: + raise KeyError(key) from None def __eq__(self, other) -> bool: - return self._raw == other + if not isinstance(other, FIFOCache): + return False + + return self._raw == other._raw def __ne__(self, other) -> bool: - return self._raw != other + if not isinstance(other, FIFOCache): + return False + + return self._raw != other._raw def shrink_to_fit(self) -> None: self._raw.shrink_to_fit() @@ -343,6 +367,18 @@ def keys(self) -> IteratorView[KT]: def values(self) -> IteratorView[VT]: return IteratorView(self._raw.items(), lambda x: x[1]) + def first(self, n: int = 0) -> typing.Optional[KT]: + if n < 0: + n = len(self._raw) + n + + if n < 0: + return None + + return self._raw.get_index(n) + + def last(self) -> typing.Optional[KT]: + return self._raw.get_index(len(self._raw) - 1) + def __iter__(self) -> IteratorView[KT]: return self.keys() diff --git a/python/tests/__init__.py b/python/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/tests/mixin.py b/python/tests/mixin.py new file mode 100644 index 0000000..254b5cd --- /dev/null +++ b/python/tests/mixin.py @@ -0,0 +1,455 @@ +from cachebox import BaseCacheImpl, IteratorView +import dataclasses +import pytest +import typing +import sys + + +@dataclasses.dataclass +class EQ: + def __init__(self, val: int) -> None: + self.val = val + + def __eq__(self, other: "EQ") -> bool: + return self.val == other.val + + def __hash__(self) -> int: + return self.val + + +@dataclasses.dataclass +class NoEQ: + def __init__(self, val: int) -> None: + self.val = val + + def __hash__(self) -> int: + return self.val + + +def getsizeof(obj, use_sys=True): + try: + if use_sys: + return sys.getsizeof(obj) + else: + return obj.__sizeof__() + except TypeError: # PyPy doesn't implement getsizeof or __sizeof__ + return len(obj) + + +class _TestMixin: + CACHE: typing.Type[BaseCacheImpl] + + KWARGS: dict = {} + NO_POLICY: bool = False + ITERATOR_CLASS: typing.Optional[type] = IteratorView + + def test__new__(self): + cache = self.CACHE(10, **self.KWARGS, capacity=8) + assert cache.maxsize == 10 + assert 20 > cache.capacity() >= 8, "capacity: {}".format(cache.capacity()) + + cache = self.CACHE(20, **self.KWARGS, capacity=0) + assert cache.maxsize == 20 + assert 2 >= cache.capacity() >= 0 # This is depends on platform + + cache = self.CACHE(20, **self.KWARGS, capacity=100) + assert cache.maxsize == 20 + assert 30 > cache.capacity() >= 20 + + cache = self.CACHE(0, **self.KWARGS, capacity=8) + assert cache.maxsize == sys.maxsize + assert 20 > cache.capacity() >= 8 + + cache = self.CACHE(0, **self.KWARGS, capacity=0) + assert cache.maxsize == sys.maxsize + assert 2 >= cache.capacity() >= 0 # This is depends on platform + + def test_overflow(self): + if not self.NO_POLICY: + return + + cache = self.CACHE(10, **self.KWARGS, capacity=10) + + for i in range(10): + cache[i] = i + + with pytest.raises(OverflowError): + cache["new-key"] = "new-value" + + def test___len__(self): + cache = self.CACHE(10, **self.KWARGS, capacity=10) + + assert len(cache) == 0 + assert cache.is_empty() + + cache[0] = 0 + assert len(cache) == 1 + + cache[1] = 1 + cache[2] = 2 + cache[3] = 3 + assert len(cache) == 4 + + cache[0] = 10 + cache[1] = 5 + assert len(cache) == 4 + + for i in range(1000, 1000 + (10 - len(cache))): + cache[i] = i + + assert len(cache) == 10 + assert cache.is_full() + + def test___sizeof__(self): + cache = self.CACHE(10, **self.KWARGS, capacity=10) + + # all classes have to implement __sizeof__ + # __sizeof__ returns exactly allocated memory size by cache + # but sys.getsizeof add also garbage collector overhead to that, so sometimes + # sys.getsizeof is greater than __sizeof__ + getsizeof(cache, False) + + def test___bool__(self): + cache = self.CACHE(1, **self.KWARGS, capacity=1) + + if cache: + pytest.fail("bool(cache) returns invalid response") + + cache[1] = 1 + if not cache: + pytest.fail("not bool(cache) returns invalid response") + + def test___contains__(self): + cache = self.CACHE(1, **self.KWARGS, capacity=1) + + assert 1 not in cache + cache[1] = 1 + assert 1 in cache + + def test___setitem__(self): + cache = self.CACHE(10, **self.KWARGS, capacity=10) + + with pytest.raises(KeyError): + cache[1] + + cache[1] = 1 + cache[1] + cache[0] = 0 + cache[0] + cache[2] = 2 + cache[3] = 3 + + with pytest.raises(KeyError): + cache[4] + + del cache[1] + del cache[2] + del cache[3] + + cache[0] + + with pytest.raises(KeyError): + cache[2] + + def test___repr__(self): + cache = self.CACHE(2, **self.KWARGS, capacity=2) + assert str(cache) == repr(cache) + assert repr(cache).startswith(self.CACHE.__name__) + + def test_insert(self): + cache = self.CACHE(5, **self.KWARGS, capacity=5) + + assert cache.insert(1, 1) is None + assert cache.insert(1, 1) == 1 + assert cache.insert(1, 10) == 1 + assert cache.insert(1, 2) == 10 + + cache[5] = 5 + + assert cache.insert(5, "value") == 5 + assert cache.insert(5, 5) == "value" + + del cache[5] + + assert cache.insert(5, 5) is None + + def test_get(self): + cache = self.CACHE(5, **self.KWARGS, capacity=5) + + for i in range(5): + cache[i] = i + + assert cache.get(0, None) == 0 + assert cache.get(1, None) == 1 + assert cache.get("no-exists") is None + assert cache.get("no-exists", None) is None + assert cache.get("no-exists", 111) == 111 + + def test_pop(self): + cache = self.CACHE(5, **self.KWARGS, capacity=5) + + for i in range(5): + cache[i] = i * 2 + + assert cache.pop(1, None) == 2 + assert cache.get(1, None) is None + assert cache.pop(2, None) == 4 + assert cache.get(2, None) is None + + assert cache.pop(10, None) is None + assert cache.pop(10, 2) == 2 + + def test_setdefault(self): + obj = self.CACHE(2, **self.KWARGS, capacity=2) + + obj.setdefault("name", "nick") + obj["age"] = 18 + assert 18 == obj.setdefault("age", 1000) + assert 18 == obj["age"] + assert "nick" == obj["name"] + + if self.NO_POLICY: + with pytest.raises(OverflowError): + obj.setdefault("newkey", 0) + + def test_clear(self): + obj = self.CACHE(2, **self.KWARGS, capacity=2) + + obj[1] = 1 + obj[2] = 2 + assert 2 == len(obj) + + cap = getsizeof(obj, False) + obj.clear(reuse=True) + assert 0 == len(obj) + try: + assert getsizeof(obj, False) >= cap + except AssertionError as e: + # if not isinstance(obj, (LRUCache, LFUCache)): + raise e + + obj[1] = 1 + obj[2] = 2 + assert 2 == len(obj) + + cap = getsizeof(obj, False) + obj.clear(reuse=False) + assert 0 == len(obj) + # this is not stable and + # may increases the capacity! + try: + assert cap != getsizeof(obj, False) + except AssertionError as e: + # if not isinstance(obj, (LRUCache, LFUCache)): + raise e + + def test_update(self): + obj = self.CACHE(2, **self.KWARGS, capacity=2) + + obj.update({1: 1, 2: 2}) + assert 2 == len(obj) + assert 1 == obj[1] + assert 2 == obj[2] + + obj.update({1: 1, 2: 2}) + assert 2 == len(obj) + assert 1 == obj[1] + assert 2 == obj[2] + + obj.update([(1, "a"), (2, "b")]) + assert 2 == len(obj) + assert "a" == obj[1] + assert "b" == obj[2] + + if self.NO_POLICY: + with pytest.raises(OverflowError): + obj.update([(3, "a"), (4, "b")]) + else: + obj.update([(3, "a"), (4, "b")]) + + kw = self.KWARGS.copy() + kw["iterable"] = {1: 1, 2: 2} + obj = self.CACHE(2, **kw, capacity=2) + assert 2 == len(obj) + assert 1 == obj[1] + assert 2 == obj[2] + + kw["iterable"] = [(1, "a"), (2, "b")] + obj = self.CACHE(2, **kw, capacity=2) + assert 2 == len(obj) + assert "a" == obj[1] + assert "b" == obj[2] + + def test_eq_implemetation(self): + # see https://github.com/awolverp/cachebox/issues/5 + + size = 1000 + cache = self.CACHE(size, **self.KWARGS, capacity=size) + + for i in range(size): + cache.insert(NoEQ(val=i), i) + cache.get(NoEQ(val=i)) + + cache = self.CACHE(size, **self.KWARGS, capacity=size) + + for i in range(size): + cache.insert(EQ(val=i), i) + cache.get(EQ(val=i)) + + def test_iterators(self): + obj = self.CACHE(100, **self.KWARGS, capacity=100) + + if self.ITERATOR_CLASS: + assert isinstance(iter(obj), self.ITERATOR_CLASS) + + for i in range(6): + obj[i] = i * 2 + + k = list(range(6)) + v = list(i * 2 for i in range(6)) + assert k == sorted(obj.keys()) + assert v == sorted(obj.values()) + assert list(zip(k, v)) == sorted(obj.items()) + + with pytest.raises(RuntimeError): + for i in obj: + del obj[i] + + for i in range(100): + obj[i] = i * 2 + + for i in range(50): + del obj[i] + + p = iter(obj) + next(p) + + obj.shrink_to_fit() + + with pytest.raises(RuntimeError): + next(p) + + obj = self.CACHE(0, **self.KWARGS) + obj.update({i: i for i in range(20)}) + + for key, value in obj.items(): + assert obj[key] == value + + for key, value in obj.items(): + obj[key] = value * 2 + + with pytest.raises(RuntimeError): + for key, value in obj.items(): + obj[str(key)] = value + + def test___eq__(self): + cache = self.CACHE(100, **self.KWARGS, capacity=100) + + with pytest.raises(TypeError): + cache > cache + + with pytest.raises(TypeError): + cache < cache + + with pytest.raises(TypeError): + cache >= cache + + with pytest.raises(TypeError): + cache <= cache + + assert cache == cache + assert not cache != cache + + for i in range(90): + cache[i] = i + + assert cache == cache + assert not cache != cache + + c2 = self.CACHE(100, **self.KWARGS, capacity=100) + for i in range(90): + c2[i] = i + + assert cache == c2 + assert not c2 != cache + + c2 = self.CACHE(1000, **self.KWARGS, capacity=100) + for i in range(90): + c2[i] = i + + assert not cache == c2 + assert c2 != cache + + def test_generic(self): + obj: self.CACHE[int, int] = self.CACHE(maxsize=0, **self.KWARGS) + _ = obj + + def _test_pickle(self, check_order: typing.Callable): + import pickle + import tempfile + + c1 = self.CACHE(maxsize=0, **self.KWARGS) + c2 = pickle.loads(pickle.dumps(c1)) + assert c1 == c2 + assert c1.capacity() == c2.capacity() + + c1 = self.CACHE(maxsize=100, **self.KWARGS) + c1.update({i: i for i in range(10)}) + + for _ in range(10): + c1[0] + for _ in range(9): + c1[1] + for _ in range(8): + c1[2] + for _ in range(7): + c1[3] + for _ in range(6): + c1[4] + for _ in range(5): + c1[5] + for _ in range(4): + c1[6] + for _ in range(3): + c1[7] + for _ in range(2): + c1[8] + for _ in range(1): + c1[9] + + c2 = pickle.loads(pickle.dumps(c1)) + assert c1 == c2 + assert c1.capacity() == c2.capacity() + check_order(c1, c2) + + with tempfile.TemporaryFile("w+b") as fd: + c1 = self.CACHE(maxsize=100, **self.KWARGS) + c1.update({i: i for i in range(10)}) + + for _ in range(10): + c1[1] + for _ in range(9): + c1[2] + for _ in range(8): + c1[0] + for _ in range(7): + c1[3] + for _ in range(6): + c1[5] + for _ in range(5): + c1[4] + for _ in range(4): + c1[6] + for _ in range(3): + c1[7] + for _ in range(2): + c1[9] + for _ in range(1): + c1[8] + + pickle.dump(c1, fd) + fd.seek(0) + c2 = pickle.load(fd) + assert c1 == c2 + assert c1.capacity() == c2.capacity() + check_order(c1, c2) diff --git a/python/tests/test_caches.py b/python/tests/test_caches.py new file mode 100644 index 0000000..600402f --- /dev/null +++ b/python/tests/test_caches.py @@ -0,0 +1,90 @@ +from cachebox import ( + Cache, + FIFOCache, +) +import pytest +from .mixin import _TestMixin + + +class TestCache(_TestMixin): + CACHE = Cache + NO_POLICY = True + + def test_pickle(self): + self._test_pickle(lambda c1, c2: None) + + +class TestFIFOCache(_TestMixin): + CACHE = FIFOCache + + def test_policy(self): + cache = FIFOCache(5) + + cache[0] = 0 + cache[1] = 1 + cache[2] = 2 + + assert cache[0] == 0 + assert cache[1] == 1 + + assert cache.popitem() == (0, 0) + + cache[3] = 3 + + assert cache.popitem() == (1, 1) + assert cache.popitem() == (2, 2) + assert cache.popitem() == (3, 3) + + with pytest.raises(KeyError): + cache.popitem() + + for i in range(5): + cache[i] = i + + for i in range(5): + assert i in cache + + cache[10] = 10 + + assert 0 not in cache + assert 10 in cache + + assert cache.popitem() == (1, 1) + + del cache[2] + del cache[3] + del cache[4] + + assert cache.popitem() == (10, 10) + + def test_ordered_iterators(self): + obj = self.CACHE(100, **self.KWARGS, capacity=100) + + for i in range(6): + obj[i] = i * 2 + + k = list(range(6)) + v = list(i * 2 for i in range(6)) + assert k == list(obj.keys()) + assert v == list(obj.values()) + assert list(zip(k, v)) == list(obj.items()) + + def test_pickle(self): + def inner(c1, c2): + assert list(c1.items()) == list(c2.items()) + + self._test_pickle(inner) + + def test_first_last(self): + obj = self.CACHE(5, **self.KWARGS, capacity=5) + + for i in range(5): + obj[i] = i * 2 + + assert obj.first() == 0 + assert obj.last() == 4 + + obj[10] = 20 + + assert obj.first() == 1 + assert obj.last() == 10 diff --git a/src/bridge/cache.rs b/src/bridge/cache.rs index 1238cae..96257cb 100644 --- a/src/bridge/cache.rs +++ b/src/bridge/cache.rs @@ -2,19 +2,18 @@ use crate::common::Entry; use crate::common::ObservedIterator; use crate::common::PreHashObject; -/// A simple cache that has no algorithm; this is only a hashmap. -/// -/// `Cache` vs `dict`: -/// - it is thread-safe and unordered, while `dict` isn't thread-safe and ordered (Python 3.6+). -/// - it uses very lower memory than `dict`. -/// - it supports useful and new methods for managing memory, while `dict` does not. -/// - it does not support `popitem`, while `dict` does. -/// - You can limit the size of [`Cache`], but you cannot for `dict`. #[pyo3::pyclass(module = "cachebox._core", frozen)] pub struct Cache { raw: parking_lot::Mutex, } +#[allow(non_camel_case_types)] +#[pyo3::pyclass(module = "cachebox._core")] +pub struct cache_items { + pub ptr: ObservedIterator, + pub iter: parking_lot::Mutex>, +} + #[pyo3::pymethods] impl Cache { #[new] @@ -86,18 +85,13 @@ impl Cache { } } - fn get( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: pyo3::PyObject, - ) -> pyo3::PyResult { + fn get(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { let key = PreHashObject::from_pyobject(py, key)?; let lock = self.raw.lock(); match lock.lookup(py, &key)? { Some(val) => Ok(val.clone_ref(py)), - None => Ok(default), + None => Err(pyo3::PyErr::new::(key.obj)), } } @@ -119,29 +113,25 @@ impl Cache { other: pyo3::PyObject, op: pyo3::class::basic::CompareOp, ) -> pyo3::PyResult { - let other = match other.extract::>(slf.py()) { - Ok(o) => o, - Err(_) => return Ok(false), - }; + let other = other.extract::>(slf.py())?; match op { pyo3::class::basic::CompareOp::Eq => { if slf.as_ptr() == other.as_ptr() { return Ok(true); } - let t1 = slf.raw.lock(); let t2 = other.raw.lock(); - t1.equal(slf.py(), &*t2) + t1.equal(slf.py(), &t2) } pyo3::class::basic::CompareOp::Ne => { - if slf.as_ptr() != other.as_ptr() { - return Ok(true); + if slf.as_ptr() == other.as_ptr() { + return Ok(false); } let t1 = slf.raw.lock(); let t2 = other.raw.lock(); - t1.equal(slf.py(), &*t2).map(|r| !r) + t1.equal(slf.py(), &t2).map(|r| !r) } _ => Err(pyo3::PyErr::new::( "only '==' or '!=' are supported", @@ -149,12 +139,7 @@ impl Cache { } } - fn pop( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: pyo3::PyObject, - ) -> pyo3::PyResult { + fn remove(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { let key = PreHashObject::from_pyobject(py, key)?; let mut lock = self.raw.lock(); @@ -163,7 +148,7 @@ impl Cache { let (_, value) = entry.remove(); Ok(value) } - Entry::Absent(_) => Ok(default), + Entry::Absent(_) => Err(pyo3::PyErr::new::(key.obj)), } } @@ -238,7 +223,6 @@ impl Cache { let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity()); - #[allow(unused_unsafe)] tuple!( py, 3, @@ -271,13 +255,6 @@ impl Cache { } } -#[allow(non_camel_case_types)] -#[pyo3::pyclass(module = "cachebox._core")] -pub struct cache_items { - pub ptr: ObservedIterator, - pub iter: parking_lot::Mutex>, -} - #[pyo3::pymethods] impl cache_items { fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { diff --git a/src/bridge/fifocache.rs b/src/bridge/fifocache.rs index 584a05b..14a0bbf 100644 --- a/src/bridge/fifocache.rs +++ b/src/bridge/fifocache.rs @@ -1,14 +1,19 @@ use crate::common::Entry; +use crate::common::ObservedIterator; use crate::common::PreHashObject; -/// FIFO Cache implementation - First-In First-Out Policy (thread-safe). -/// -/// In simple terms, the FIFO cache will remove the element that has been in the cache the longest. #[pyo3::pyclass(module = "cachebox._core", frozen)] pub struct FIFOCache { raw: parking_lot::Mutex, } +#[allow(non_camel_case_types)] +#[pyo3::pyclass(module = "cachebox._core")] +pub struct fifocache_items { + pub ptr: ObservedIterator, + pub iter: parking_lot::Mutex, +} + #[pyo3::pymethods] impl FIFOCache { #[new] @@ -84,75 +89,62 @@ impl FIFOCache { } } - fn get( - &self, - py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: pyo3::PyObject, - ) -> pyo3::PyResult { + fn get(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { let key = PreHashObject::from_pyobject(py, key)?; let lock = self.raw.lock(); match lock.lookup(py, &key)? { Some(val) => Ok(val.clone_ref(py)), - None => Ok(default), + None => Err(pyo3::PyErr::new::(key.obj)), } } - // fn update( - // slf: pyo3::PyRef<'_, Self>, - // py: pyo3::Python<'_>, - // iterable: pyo3::PyObject, - // ) -> pyo3::PyResult<()> { - // if slf.as_ptr() == iterable.as_ptr() { - // return Ok(()); - // } - - // let mut lock = slf.raw.lock(); - // lock.extend(py, iterable) - // } - - // fn __richcmp__( - // slf: pyo3::PyRef<'_, Self>, - // other: pyo3::PyObject, - // op: pyo3::class::basic::CompareOp, - // ) -> pyo3::PyResult { - // let other = match other.extract::>(slf.py()) { - // Ok(o) => o, - // Err(_) => return Ok(false), - // }; - - // match op { - // pyo3::class::basic::CompareOp::Eq => { - // if slf.as_ptr() == other.as_ptr() { - // return Ok(true); - // } - - // let t1 = slf.raw.lock(); - // let t2 = other.raw.lock(); - // t1.equal(slf.py(), &*t2) - // } - // pyo3::class::basic::CompareOp::Ne => { - // if slf.as_ptr() != other.as_ptr() { - // return Ok(true); - // } - - // let t1 = slf.raw.lock(); - // let t2 = other.raw.lock(); - // t1.equal(slf.py(), &*t2).map(|r| !r) - // } - // _ => Err(pyo3::PyErr::new::( - // "only '==' or '!=' are supported", - // )), - // } - // } - - fn pop( - &self, + fn update( + slf: pyo3::PyRef<'_, Self>, py: pyo3::Python<'_>, - key: pyo3::PyObject, - default: pyo3::PyObject, - ) -> pyo3::PyResult { + iterable: pyo3::PyObject, + ) -> pyo3::PyResult<()> { + if slf.as_ptr() == iterable.as_ptr() { + return Ok(()); + } + + let mut lock = slf.raw.lock(); + lock.extend(py, iterable) + } + + fn __richcmp__( + slf: pyo3::PyRef<'_, Self>, + other: pyo3::PyObject, + op: pyo3::class::basic::CompareOp, + ) -> pyo3::PyResult { + let other = other.extract::>(slf.py())?; + + match op { + pyo3::class::basic::CompareOp::Eq => { + if slf.as_ptr() == other.as_ptr() { + return Ok(true); + } + + let t1 = slf.raw.lock(); + let t2 = other.raw.lock(); + t1.equal(slf.py(), &t2) + } + pyo3::class::basic::CompareOp::Ne => { + if slf.as_ptr() == other.as_ptr() { + return Ok(false); + } + + let t1 = slf.raw.lock(); + let t2 = other.raw.lock(); + t1.equal(slf.py(), &t2).map(|r| !r) + } + _ => Err(pyo3::PyErr::new::( + "only '==' or '!=' are supported", + )), + } + } + + fn remove(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { let key = PreHashObject::from_pyobject(py, key)?; let mut lock = self.raw.lock(); @@ -161,7 +153,16 @@ impl FIFOCache { let (_, value) = entry.remove(); Ok(value) } - Entry::Absent(_) => Ok(default), + Entry::Absent(_) => Err(pyo3::PyErr::new::(key.obj)), + } + } + + fn popitem(&self, py: pyo3::Python<'_>) -> pyo3::PyResult<(pyo3::PyObject, pyo3::PyObject)> { + let mut lock = self.raw.lock(); + + match lock.popitem(py)? { + Some((key, val)) => Ok((key.obj, val)), + None => Err(pyo3::PyErr::new::(())), } } @@ -200,61 +201,76 @@ impl FIFOCache { } } - // fn items(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { - // let lock = slf.raw.lock(); - // let state = lock.observed.get(); - // let iter = lock.iter(); - - // let result = cache_items { - // ptr: ObservedIterator::new(slf.as_ptr(), state), - // iter: parking_lot::Mutex::new(iter), - // }; - - // pyo3::Py::new(slf.py(), result) - // } - - // fn __getnewargs__(&self) -> (usize,) { - // (0,) - // } - - // fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { - // let lock = self.raw.lock(); - // unsafe { - // let state = { - // let mp = pyo3::ffi::PyDict_New(); - - // if mp.is_null() { - // return Err(pyo3::PyErr::fetch(py)); - // } - - // for bucket in lock.iter() { - // let (key, val) = bucket.as_ref(); - // // SAFETY: we don't need to check error because we sure about key that is hashable. - // pyo3::ffi::PyDict_SetItem(mp, key.obj.as_ptr(), val.as_ptr()); - // } - - // let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); - // let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity()); - - // #[allow(unused_unsafe)] - // tuple!( - // py, - // 3, - // 0 => maxsize, - // 1 => mp, - // 2 => capacity, - // )? - // }; - // Ok(pyo3::Py::from_owned_ptr(py, state)) - // } - // } - - // pub fn __setstate__(&self, py: pyo3::Python<'_>, state: pyo3::PyObject) -> pyo3::PyResult<()> { - // let mut lock = self.raw.lock(); - // lock.from_pickle(py, state.as_ptr()) - // } - - // popitem + fn items(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { + let lock = slf.raw.lock(); + let state = lock.observed.get(); + let iter = lock.iter(); + + let result = fifocache_items { + ptr: ObservedIterator::new(slf.as_ptr(), state), + iter: parking_lot::Mutex::new(iter), + }; + + pyo3::Py::new(slf.py(), result) + } + + fn get_index(&self, py: pyo3::Python<'_>, index: usize) -> Option { + let lock = self.raw.lock(); + + lock.get_index(index).map(|(key, _)| key.obj.clone_ref(py)) + } + + fn __getnewargs__(&self) -> (usize,) { + (0,) + } + + fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { + let lock = self.raw.lock(); + + let state = unsafe { + let list = pyo3::ffi::PyList_New(0); + if list.is_null() { + return Err(pyo3::PyErr::fetch(py)); + } + + for (hk, val) in lock.entries_iter() { + let tp = tuple!( + py, + 2, + 0 => hk.obj.clone_ref(py).as_ptr(), + 1 => val.clone_ref(py).as_ptr(), + ); + + if let Err(x) = tp { + pyo3::ffi::Py_DECREF(list); + return Err(x); + } + + if pyo3::ffi::PyList_Append(list, tp.unwrap_unchecked()) == -1 { + pyo3::ffi::Py_DECREF(list); + return Err(pyo3::PyErr::fetch(py)); + } + } + + let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); + let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity().0); + + tuple!( + py, + 3, + 0 => maxsize, + 1 => list, + 2 => capacity, + )? + }; + + Ok(unsafe { pyo3::Py::from_owned_ptr(py, state) }) + } + + pub fn __setstate__(&self, py: pyo3::Python<'_>, state: pyo3::PyObject) -> pyo3::PyResult<()> { + let mut lock = self.raw.lock(); + lock.from_pickle(py, state.as_ptr()) + } pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { for value in self.raw.lock().entries_iter() { @@ -269,3 +285,30 @@ impl FIFOCache { lock.clear() } } + +#[pyo3::pymethods] +impl fifocache_items { + fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { + slf + } + + #[allow(unused_mut)] + fn __next__(mut slf: pyo3::PyRefMut<'_, Self>) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { + let mut iter = slf.iter.lock(); + + slf.ptr.proceed(slf.py())?; + + if let Some(x) = iter.next() { + let (key, val) = unsafe { x.as_ref() }; + + tuple!( + slf.py(), + 2, + 0 => key.obj.clone_ref(slf.py()).into_ptr(), + 1 => val.clone_ref(slf.py()).into_ptr(), + ) + } else { + Err(pyo3::PyErr::new::(())) + } + } +} diff --git a/src/bridge/mod.rs b/src/bridge/mod.rs index ec04015..ff89a65 100644 --- a/src/bridge/mod.rs +++ b/src/bridge/mod.rs @@ -1,2 +1,6 @@ +use pyo3::create_exception; + +create_exception!(_core, CoreKeyError, pyo3::exceptions::PyException); + pub mod cache; pub mod fifocache; diff --git a/src/common.rs b/src/common.rs index 4b973b4..08f0b5f 100644 --- a/src/common.rs +++ b/src/common.rs @@ -4,7 +4,7 @@ pub fn pyobject_equal( arg2: *mut pyo3::ffi::PyObject, ) -> pyo3::PyResult { unsafe { - if std::ptr::eq(arg1, arg2) { + if std::ptr::addr_eq(arg1, arg2) { return Ok(true); } @@ -46,10 +46,12 @@ macro_rules! tuple { $len:expr, $($index:expr => $value:expr,)+ ) => {{ + #[allow(unused_unsafe)] let tuple = unsafe { pyo3::ffi::PyTuple_New($len) }; if tuple.is_null() { Err(pyo3::PyErr::fetch($py)) } else { + #[allow(unused_unsafe)] unsafe { $( pyo3::ffi::PyTuple_SetItem(tuple, $index, $value); @@ -61,6 +63,7 @@ macro_rules! tuple { }}; (check $tuple:expr, size=$size:expr) => {{ + #[allow(unused_unsafe)] if unsafe { pyo3::ffi::PyTuple_CheckExact($tuple) } == 0 { Err( pyo3::PyErr::new::("expected tuple, but got another type") @@ -116,8 +119,9 @@ macro_rules! extract_pickle_tuple { /// /// This function ensures a bijective mapping between isize and u64, preserving the order of values /// by offsetting negative values to the upper range of u64. +#[inline(always)] fn convert_isize_to_u64(v: &isize) -> u64 { - const OFFSET: u64 = 1 << 63; + const OFFSET: u64 = 0x8000000000000000; // 1 << 63 if *v >= 0 { *v as u64 @@ -314,3 +318,36 @@ impl Drop for ObservedIterator { unsafe impl Send for ObservedIterator {} unsafe impl Sync for ObservedIterator {} + +pub struct NoLifetimeSliceIter { + pub pointer: std::ptr::NonNull, + pub index: usize, + pub len: usize, +} + +impl NoLifetimeSliceIter { + #[inline] + pub fn new(slice: &[T]) -> Self { + let pointer: std::ptr::NonNull = std::ptr::NonNull::from(slice).cast(); + + Self { + pointer, + index: 0, + len: slice.len(), + } + } +} + +impl Iterator for NoLifetimeSliceIter { + type Item = std::ptr::NonNull; + + fn next(&mut self) -> Option { + if self.index >= self.len { + None + } else { + let value = unsafe { self.pointer.add(self.index) }; + self.index += 1; + Some(value) + } + } +} diff --git a/src/lib.rs b/src/lib.rs index ae4f8b6..6a4324d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,10 +9,12 @@ mod policies; /// cachebox core ( written in Rust ) #[pymodule(gil_used = false)] #[cold] -fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> { +fn _core(py: pyo3::Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add("__author__", env!("CARGO_PKG_AUTHORS"))?; m.add("__version__", env!("CARGO_PKG_VERSION"))?; + m.add("CoreKeyError", py.get_type::())?; + m.add_class::()?; m.add_class::()?; diff --git a/src/policies/fifo.rs b/src/policies/fifo.rs index 09e811e..63661d1 100644 --- a/src/policies/fifo.rs +++ b/src/policies/fifo.rs @@ -1,6 +1,7 @@ //! The FIFO policy, This is inspired by Rust's indexmap with some changes. use crate::common::Entry; +use crate::common::NoLifetimeSliceIter; use crate::common::Observed; use crate::common::PreHashObject; use crate::common::TryFindMethods; @@ -40,6 +41,11 @@ pub struct FIFOPolicyAbsent<'a> { instance: &'a mut FIFOPolicy, } +pub struct FIFOIterator { + first: NoLifetimeSliceIter<(PreHashObject, pyo3::PyObject)>, + second: NoLifetimeSliceIter<(PreHashObject, pyo3::PyObject)>, +} + impl FIFOPolicy { #[inline] pub fn new(maxsize: usize, mut capacity: usize) -> pyo3::PyResult { @@ -200,11 +206,112 @@ impl FIFOPolicy { self.observed.change(); } + #[inline] pub fn entries_iter( &self, ) -> std::collections::vec_deque::Iter<'_, (PreHashObject, pyo3::PyObject)> { self.entries.iter() } + + pub fn equal(&self, py: pyo3::Python<'_>, other: &Self) -> pyo3::PyResult { + if self.maxsize != other.maxsize { + return Ok(false); + } + + if self.entries.len() != other.entries.len() { + return Ok(false); + } + + for index in 0..self.entries.len() { + let (key1, value1) = &self.entries[index]; + let (key2, value2) = &other.entries[index]; + + if key1.hash != key2.hash + || !key1.equal(py, key2)? + || !crate::common::pyobject_equal(py, value1.as_ptr(), value2.as_ptr())? + { + return Ok(false); + } + } + + Ok(true) + } + + pub fn extend(&mut self, py: pyo3::Python<'_>, iterable: pyo3::PyObject) -> pyo3::PyResult<()> { + use pyo3::types::{PyAnyMethods, PyDictMethods}; + + if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { + let dict = unsafe { + iterable + .downcast_bound::(py) + .unwrap_unchecked() + }; + + for (key, value) in dict.iter() { + let hk = + unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; + + match self.entry(py, &hk)? { + Entry::Occupied(mut entry) => { + entry.update(value.unbind())?; + } + Entry::Absent(entry) => { + entry.insert(py, hk, value.unbind())?; + } + } + } + } else { + for pair in iterable.bind(py).try_iter()? { + let (key, value) = pair?.extract::<(pyo3::PyObject, pyo3::PyObject)>()?; + + let hk = PreHashObject::from_pyobject(py, key)?; + + match self.entry(py, &hk)? { + Entry::Occupied(mut entry) => { + entry.update(value)?; + } + Entry::Absent(entry) => { + entry.insert(py, hk, value)?; + } + } + } + } + + Ok(()) + } + + #[inline(always)] + pub fn iter(&self) -> FIFOIterator { + let (a, b) = self.entries.as_slices(); + + FIFOIterator { + first: NoLifetimeSliceIter::new(a), + second: NoLifetimeSliceIter::new(b), + } + } + + #[allow(clippy::wrong_self_convention)] + pub fn from_pickle( + &mut self, + py: pyo3::Python<'_>, + state: *mut pyo3::ffi::PyObject, + ) -> pyo3::PyResult<()> { + unsafe { + tuple!(check state, size=3)?; + let (maxsize, iterable, capacity) = extract_pickle_tuple!(py, state); + + let mut new = Self::new(maxsize, capacity)?; + new.extend(py, iterable)?; + + *self = new; + Ok(()) + } + } + + #[inline(always)] + pub fn get_index(&self, n: usize) -> Option<&(PreHashObject, pyo3::PyObject)> { + self.entries.get(n) + } } impl<'a> FIFOPolicyOccupied<'a> { @@ -213,7 +320,10 @@ impl<'a> FIFOPolicyOccupied<'a> { let index = unsafe { self.bucket.as_ref() }; let item = &mut self.instance.entries[index - self.instance.n_shifts]; let old_value = std::mem::replace(&mut item.1, value); - self.instance.observed.change(); + + // In update we don't need to change this; because this does not change the memory address ranges + // self.instance.observed.change(); + Ok(old_value) } @@ -221,7 +331,7 @@ impl<'a> FIFOPolicyOccupied<'a> { pub fn remove(self) -> (PreHashObject, pyo3::PyObject) { // let (PreHashObject { hash, .. }, _) = &self.instance.entries[self.index - self.instance.n_shifts]; let (mut index, _) = unsafe { self.instance.table.remove(self.bucket) }; - index = index - self.instance.n_shifts; + index -= self.instance.n_shifts; self.instance .decrement_indexes(index + 1, self.instance.entries.len()); @@ -239,7 +349,7 @@ impl<'a> FIFOPolicyOccupied<'a> { } } -impl<'a> FIFOPolicyAbsent<'a> { +impl FIFOPolicyAbsent<'_> { #[inline] pub fn insert( self, @@ -266,3 +376,19 @@ impl<'a> FIFOPolicyAbsent<'a> { Ok(()) } } + +impl Iterator for FIFOIterator { + type Item = std::ptr::NonNull<(PreHashObject, pyo3::PyObject)>; + + fn next(&mut self) -> Option { + match self.first.next() { + Some(val) => Some(val), + None => { + core::mem::swap(&mut self.first, &mut self.second); + self.first.next() + } + } + } +} + +unsafe impl Send for FIFOIterator {} diff --git a/src/policies/nopolicy.rs b/src/policies/nopolicy.rs index 3d65388..85636f3 100644 --- a/src/policies/nopolicy.rs +++ b/src/policies/nopolicy.rs @@ -212,7 +212,10 @@ impl<'a> NoPolicyOccupied<'a> { pub fn update(&mut self, value: pyo3::PyObject) -> pyo3::PyResult { unsafe { let old_value = std::mem::replace(&mut self.bucket.as_mut().1, value); - self.instance.observed.change(); + + // In update we don't need to change this; because this does not change the memory address ranges + // self.instance.observed.change(); + Ok(old_value) } } From e0daabafc51f2e268f6213c1730cf6aef0010ed8 Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 7 Apr 2025 17:43:04 +0330 Subject: [PATCH 07/37] Update tests --- .gitignore | 1 + python/cachebox/_cachebox.py | 18 +++++++++--------- python/tests/mixin.py | 21 +++++++++------------ python/tests/test_caches.py | 2 ++ 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/.gitignore b/.gitignore index 404c5d5..c5bff0f 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ __pycache__ *.so /.coverage /.pytest_cache +/htmlcov diff --git a/python/cachebox/_cachebox.py b/python/cachebox/_cachebox.py index f91e24b..1776eb1 100644 --- a/python/cachebox/_cachebox.py +++ b/python/cachebox/_cachebox.py @@ -96,7 +96,7 @@ def capacity(self) -> int: def __len__(self) -> int: return len(self._raw) - def __sizeof__(self): + def __sizeof__(self): # pragma: no cover return self._raw.__sizeof__() def __contains__(self, key: KT) -> bool: @@ -147,10 +147,10 @@ def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Uni """ return self._raw.setdefault(key, default) - def popitem(self) -> typing.NoReturn: + def popitem(self) -> typing.NoReturn: # pragma: no cover raise NotImplementedError() - def drain(self) -> typing.NoReturn: + def drain(self) -> typing.NoReturn: # pragma: no cover raise NotImplementedError() def update(self, iterable: typing.Union["Cache", dict, tuple, typing.Generator]) -> None: @@ -181,13 +181,13 @@ def __delitem__(self, key: KT) -> None: def __eq__(self, other) -> bool: if not isinstance(other, Cache): - return False + return False # pragma: no cover return self._raw == other._raw def __ne__(self, other) -> bool: if not isinstance(other, Cache): - return False + return False # pragma: no cover return self._raw != other._raw @@ -268,7 +268,7 @@ def capacity(self) -> int: def __len__(self) -> int: return len(self._raw) - def __sizeof__(self): + def __sizeof__(self): # pragma: no cover return self._raw.__sizeof__() def __contains__(self, key: KT) -> bool: @@ -307,7 +307,7 @@ def popitem(self) -> typing.Tuple[KT, VT]: except _core.CoreKeyError: raise KeyError() from None - def drain(self, n: int) -> int: + def drain(self, n: int) -> int: # pragma: no cover if n == 0: return 0 @@ -342,13 +342,13 @@ def __delitem__(self, key: KT) -> None: def __eq__(self, other) -> bool: if not isinstance(other, FIFOCache): - return False + return False # pragma: no cover return self._raw == other._raw def __ne__(self, other) -> bool: if not isinstance(other, FIFOCache): - return False + return False # pragma: no cover return self._raw != other._raw diff --git a/python/tests/mixin.py b/python/tests/mixin.py index 254b5cd..b1c7c5d 100644 --- a/python/tests/mixin.py +++ b/python/tests/mixin.py @@ -26,7 +26,7 @@ def __hash__(self) -> int: return self.val -def getsizeof(obj, use_sys=True): +def getsizeof(obj, use_sys=True): # pragma: no cover try: if use_sys: return sys.getsizeof(obj) @@ -36,7 +36,7 @@ def getsizeof(obj, use_sys=True): return len(obj) -class _TestMixin: +class _TestMixin: # pragma: no cover CACHE: typing.Type[BaseCacheImpl] KWARGS: dict = {} @@ -100,15 +100,6 @@ def test___len__(self): assert len(cache) == 10 assert cache.is_full() - def test___sizeof__(self): - cache = self.CACHE(10, **self.KWARGS, capacity=10) - - # all classes have to implement __sizeof__ - # __sizeof__ returns exactly allocated memory size by cache - # but sys.getsizeof add also garbage collector overhead to that, so sometimes - # sys.getsizeof is greater than __sizeof__ - getsizeof(cache, False) - def test___bool__(self): cache = self.CACHE(1, **self.KWARGS, capacity=1) @@ -146,16 +137,22 @@ def test___setitem__(self): del cache[2] del cache[3] + with pytest.raises(KeyError): + del cache["error"] + cache[0] with pytest.raises(KeyError): cache[2] def test___repr__(self): - cache = self.CACHE(2, **self.KWARGS, capacity=2) + cache = self.CACHE(100, **self.KWARGS, capacity=2) assert str(cache) == repr(cache) assert repr(cache).startswith(self.CACHE.__name__) + cache.update({i: i for i in range(100)}) + assert str(cache) == repr(cache) + def test_insert(self): cache = self.CACHE(5, **self.KWARGS, capacity=5) diff --git a/python/tests/test_caches.py b/python/tests/test_caches.py index 600402f..fb3720f 100644 --- a/python/tests/test_caches.py +++ b/python/tests/test_caches.py @@ -88,3 +88,5 @@ def test_first_last(self): assert obj.first() == 1 assert obj.last() == 10 + assert obj.first(-1) == obj.last() + assert obj.first(-10000) is None From 4a9d176aac820f8f9681f4179dc835c913bfda58 Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 7 Apr 2025 18:47:43 +0330 Subject: [PATCH 08/37] Update docstrings --- python/cachebox/_cachebox.py | 128 +++++++++++++++++++++++++++++------ 1 file changed, 109 insertions(+), 19 deletions(-) diff --git a/python/cachebox/_cachebox.py b/python/cachebox/_cachebox.py index 1776eb1..2f5f393 100644 --- a/python/cachebox/_cachebox.py +++ b/python/cachebox/_cachebox.py @@ -53,32 +53,42 @@ def __next__(self) -> VT: class Cache(BaseCacheImpl[KT, VT]): """ - A simple cache that has no algorithm; this is only a hashmap. - - `Cache` vs `dict`: - - it is thread-safe and unordered, while `dict` isn't thread-safe and ordered (Python 3.6+). - - it uses very lower memory than `dict`. - - it supports useful and new methods for managing memory, while `dict` does not. - - it does not support popitem, while `dict` does. - - You can limit the size of Cache, but you cannot for `dict`. + A thread-safe, memory-efficient hashmap-like cache with configurable maximum size. + + Provides a flexible key-value storage mechanism with: + - Configurable maximum size (zero means unlimited) + - Lower memory usage compared to standard dict + - Thread-safe operations + - Useful memory management methods + + Differs from standard dict by: + - Being thread-safe + - Unordered storage + - Size limitation + - Memory efficiency + - Additional cache management methods + + Supports initialization with optional initial data and capacity, + and provides dictionary-like access with additional cache-specific operations. """ def __init__( self, maxsize: int, - iterable: typing.Union["Cache", dict, tuple, typing.Generator, None] = None, + iterable: typing.Union[dict, typing.Iterable[tuple]] = None, *, capacity: int = 0, ) -> None: """ - A simple cache that has no algorithm; this is only a hashmap. + Initialize a new Cache instance. - :param maxsize: you can specify the limit size of the cache ( zero means infinity ); this is unchangable. + Args: + maxsize (int): Maximum number of elements the cache can hold. Zero means unlimited. + iterable (Union[Cache, dict, tuple, Generator, None], optional): Initial data to populate the cache. Defaults to None. + capacity (int, optional): Pre-allocate hash table capacity to minimize reallocations. Defaults to 0. - :param iterable: you can create cache from a dict or an iterable. - - :param capacity: If `capacity` param is given, cache attempts to allocate a new hash table with at - least enough capacity for inserting the given number of elements without reallocating. + Creates a new cache with specified size constraints and optional initial data. The cache can be pre-sized + to improve performance when the number of expected elements is known in advance. """ self._raw = _core.Cache(maxsize, capacity=capacity) @@ -153,7 +163,7 @@ def popitem(self) -> typing.NoReturn: # pragma: no cover def drain(self) -> typing.NoReturn: # pragma: no cover raise NotImplementedError() - def update(self, iterable: typing.Union["Cache", dict, tuple, typing.Generator]) -> None: + def update(self, iterable: typing.Union[dict, typing.Iterable[tuple]]) -> None: """ Updates the cache with elements from a dictionary or an iterable object of key/value pairs. @@ -246,13 +256,39 @@ def __repr__(self) -> str: class FIFOCache(BaseCacheImpl[KT, VT]): + """ + A First-In-First-Out (FIFO) cache implementation with configurable maximum size and optional initial capacity. + + This cache provides a fixed-size container that automatically removes the oldest items when the maximum size is reached. + Supports various operations like insertion, retrieval, deletion, and iteration with O(1) complexity. + + Attributes: + maxsize: The maximum number of items the cache can hold. + capacity: The initial capacity of the cache before resizing. + + Key features: + - Deterministic item eviction order (oldest items removed first) + - Efficient key-value storage and retrieval + - Supports dictionary-like operations + - Allows optional initial data population + """ + def __init__( self, maxsize: int, - iterable: typing.Union["Cache", dict, tuple, typing.Generator, None] = None, + iterable: typing.Union[typing.Union[dict, typing.Iterable[tuple]], None] = None, *, capacity: int = 0, ) -> None: + """ + Initialize a new FIFOCache instance. + + Args: + maxsize: The maximum number of items the cache can hold. + iterable: Optional initial data to populate the cache. Can be another FIFOCache, + a dictionary, tuple, generator, or None. + capacity: Optional initial capacity of the cache before resizing. Defaults to 0. + """ self._raw = _core.FIFOCache(maxsize, capacity=capacity) if iterable is not None: @@ -263,6 +299,7 @@ def maxsize(self) -> int: return self._raw.maxsize() def capacity(self) -> int: + """Returns the number of elements the map can hold without reallocating.""" return self._raw.capacity() def __len__(self) -> int: @@ -284,31 +321,51 @@ def is_full(self) -> bool: return self._raw.is_full() def insert(self, key: KT, value: VT) -> typing.Optional[VT]: + """ + Equals to `self[key] = value`, but returns a value: + + - If the cache did not have this key present, None is returned. + - If the cache did have this key present, the value is updated, + and the old value is returned. The key is not updated, though; + """ return self._raw.insert(key, value) def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + """ + Equals to `self[key]`, but returns `default` if the cache don't have this key present. + """ try: return self._raw.get(key) except _core.CoreKeyError: return default def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + """ + Removes specified key and return the corresponding value. If the key is not found, returns the `default`. + """ try: return self._raw.remove(key) except _core.CoreKeyError: return default def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + """ + Inserts key with a value of default if key is not in the cache. + + Return the value for key if key is in the cache, else default. + """ return self._raw.setdefault(key, default) def popitem(self) -> typing.Tuple[KT, VT]: + """Removes the element that has been in the cache the longest.""" try: return self._raw.popitem() except _core.CoreKeyError: raise KeyError() from None def drain(self, n: int) -> int: # pragma: no cover - if n == 0: + """Does the `popitem()` `n` times and returns count of removed items.""" + if n <= 0: return 0 for i in range(n): @@ -319,7 +376,8 @@ def drain(self, n: int) -> int: # pragma: no cover return i - def update(self, iterable: typing.Union["Cache", dict, tuple, typing.Generator]) -> None: + def update(self, iterable: typing.Union[dict, typing.Iterable[tuple]]) -> None: + """Updates the cache with elements from a dictionary or an iterable object of key/value pairs.""" if hasattr(iterable, "items"): iterable = iterable.items() @@ -353,21 +411,50 @@ def __ne__(self, other) -> bool: return self._raw != other._raw def shrink_to_fit(self) -> None: + """Shrinks the cache to fit len(self) elements.""" self._raw.shrink_to_fit() def clear(self, *, reuse: bool = False) -> None: + """ + Removes all items from cache. + + If reuse is True, will not free the memory for reusing in the future. + """ self._raw.clear(reuse) def items(self) -> IteratorView[typing.Tuple[KT, VT]]: + """ + Returns an iterable object of the cache's items (key-value pairs). + + Notes: + - You should not make any changes in cache while using this iterable object. + """ return IteratorView(self._raw.items(), lambda x: x) def keys(self) -> IteratorView[KT]: + """ + Returns an iterable object of the cache's keys. + + Notes: + - You should not make any changes in cache while using this iterable object. + """ return IteratorView(self._raw.items(), lambda x: x[0]) def values(self) -> IteratorView[VT]: + """ + Returns an iterable object of the cache's values. + + Notes: + - You should not make any changes in cache while using this iterable object. + """ return IteratorView(self._raw.items(), lambda x: x[1]) def first(self, n: int = 0) -> typing.Optional[KT]: + """ + Returns the first key in cache; this is the one which will be removed by `popitem()` (if n == 0). + + By using `n` parameter, you can browse order index by index. + """ if n < 0: n = len(self._raw) + n @@ -377,6 +464,9 @@ def first(self, n: int = 0) -> typing.Optional[KT]: return self._raw.get_index(n) def last(self) -> typing.Optional[KT]: + """ + Returns the last key in cache. Equals to `self.first(-1)`. + """ return self._raw.get_index(len(self._raw) - 1) def __iter__(self) -> IteratorView[KT]: From 08ad6d25810932632f0fb66e0b3a4c7fb82b84d7 Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 7 Apr 2025 20:41:04 +0330 Subject: [PATCH 09/37] Optimize insert operation --- python/tests/mixin.py | 25 +----------------- src/bridge/cache.rs | 2 +- src/bridge/fifocache.rs | 2 +- src/common.rs | 33 ++++++++++++++++++++++++ src/policies/fifo.rs | 55 ++++++++++++++++++++++++++++++++-------- src/policies/nopolicy.rs | 42 +++++++++++++++++++++++++----- 6 files changed, 116 insertions(+), 43 deletions(-) diff --git a/python/tests/mixin.py b/python/tests/mixin.py index b1c7c5d..1c2b8df 100644 --- a/python/tests/mixin.py +++ b/python/tests/mixin.py @@ -1,4 +1,4 @@ -from cachebox import BaseCacheImpl, IteratorView +from cachebox import BaseCacheImpl import dataclasses import pytest import typing @@ -41,7 +41,6 @@ class _TestMixin: # pragma: no cover KWARGS: dict = {} NO_POLICY: bool = False - ITERATOR_CLASS: typing.Optional[type] = IteratorView def test__new__(self): cache = self.CACHE(10, **self.KWARGS, capacity=8) @@ -60,10 +59,6 @@ def test__new__(self): assert cache.maxsize == sys.maxsize assert 20 > cache.capacity() >= 8 - cache = self.CACHE(0, **self.KWARGS, capacity=0) - assert cache.maxsize == sys.maxsize - assert 2 >= cache.capacity() >= 0 # This is depends on platform - def test_overflow(self): if not self.NO_POLICY: return @@ -100,16 +95,6 @@ def test___len__(self): assert len(cache) == 10 assert cache.is_full() - def test___bool__(self): - cache = self.CACHE(1, **self.KWARGS, capacity=1) - - if cache: - pytest.fail("bool(cache) returns invalid response") - - cache[1] = 1 - if not cache: - pytest.fail("not bool(cache) returns invalid response") - def test___contains__(self): cache = self.CACHE(1, **self.KWARGS, capacity=1) @@ -147,7 +132,6 @@ def test___setitem__(self): def test___repr__(self): cache = self.CACHE(100, **self.KWARGS, capacity=2) - assert str(cache) == repr(cache) assert repr(cache).startswith(self.CACHE.__name__) cache.update({i: i for i in range(100)}) @@ -296,9 +280,6 @@ def test_eq_implemetation(self): def test_iterators(self): obj = self.CACHE(100, **self.KWARGS, capacity=100) - if self.ITERATOR_CLASS: - assert isinstance(iter(obj), self.ITERATOR_CLASS) - for i in range(6): obj[i] = i * 2 @@ -377,10 +358,6 @@ def test___eq__(self): assert not cache == c2 assert c2 != cache - def test_generic(self): - obj: self.CACHE[int, int] = self.CACHE(maxsize=0, **self.KWARGS) - _ = obj - def _test_pickle(self, check_order: typing.Callable): import pickle import tempfile diff --git a/src/bridge/cache.rs b/src/bridge/cache.rs index 96257cb..a0da919 100644 --- a/src/bridge/cache.rs +++ b/src/bridge/cache.rs @@ -76,7 +76,7 @@ impl Cache { let key = PreHashObject::from_pyobject(py, key)?; let mut lock = self.raw.lock(); - match lock.entry(py, &key)? { + match lock.entry_with_slot(py, &key)? { Entry::Occupied(mut entry) => Ok(Some(entry.update(value)?)), Entry::Absent(entry) => { entry.insert(key, value)?; diff --git a/src/bridge/fifocache.rs b/src/bridge/fifocache.rs index 14a0bbf..201bc16 100644 --- a/src/bridge/fifocache.rs +++ b/src/bridge/fifocache.rs @@ -80,7 +80,7 @@ impl FIFOCache { let key = PreHashObject::from_pyobject(py, key)?; let mut lock = self.raw.lock(); - match lock.entry(py, &key)? { + match lock.entry_with_slot(py, &key)? { Entry::Occupied(mut entry) => Ok(Some(entry.update(value)?)), Entry::Absent(entry) => { entry.insert(py, key, value)?; diff --git a/src/common.rs b/src/common.rs index 08f0b5f..86209a8 100644 --- a/src/common.rs +++ b/src/common.rs @@ -188,6 +188,13 @@ pub trait TryFindMethods { hash: u64, compare: impl FnMut(&T) -> Result, ) -> Result>, E>; + + fn try_find_or_find_insert_slot( + &mut self, + hash: u64, + compare: impl FnMut(&T) -> Result, + hasher: impl Fn(&T) -> u64, + ) -> Result, hashbrown::raw::InsertSlot>, E>; } impl TryFindMethods for hashbrown::raw::RawTable { @@ -215,6 +222,32 @@ impl TryFindMethods for hashbrown::raw::RawTable { Ok(found) } } + + #[inline(always)] + fn try_find_or_find_insert_slot( + &mut self, + hash: u64, + mut compare: impl FnMut(&T) -> Result, + hasher: impl Fn(&T) -> u64, + ) -> Result, hashbrown::raw::InsertSlot>, E> { + let mut error = None; + + let found = self.find_or_find_insert_slot(hash, |item| { + match compare(item) { + Ok(boolean) => boolean, + Err(e) => { + error = Some(e); + true // To break checking + } + } + }, hasher); + + if let Some(error) = error { + Err(error) + } else { + Ok(found) + } + } } /// Observe caches' changes diff --git a/src/policies/fifo.rs b/src/policies/fifo.rs index 63661d1..6bbad33 100644 --- a/src/policies/fifo.rs +++ b/src/policies/fifo.rs @@ -39,6 +39,7 @@ pub struct FIFOPolicyOccupied<'a> { pub struct FIFOPolicyAbsent<'a> { instance: &'a mut FIFOPolicy, + insert_slot: Option, } pub struct FIFOIterator { @@ -166,12 +167,32 @@ impl FIFOPolicy { } None => { Ok( - Entry::Absent(FIFOPolicyAbsent { instance: self }) + Entry::Absent(FIFOPolicyAbsent { instance: self, insert_slot: None }) ) }, } } + #[rustfmt::skip] + pub fn entry_with_slot( + &mut self, + py: pyo3::Python<'_>, + key: &PreHashObject, + ) -> pyo3::PyResult> { + match self.table.try_find_or_find_insert_slot( + key.hash, + |x| self.entries[(*x) - self.n_shifts].0.equal(py, key), + |x| self.entries[(*x) - self.n_shifts].0.hash, + )? { + Ok(bucket) => Ok( + Entry::Occupied(FIFOPolicyOccupied { instance: self, bucket }) + ), + Err(insert_slot) => Ok( + Entry::Absent(FIFOPolicyAbsent { instance: self, insert_slot: Some(insert_slot) }) + ), + } + } + pub fn lookup( &self, py: pyo3::Python<'_>, @@ -251,7 +272,7 @@ impl FIFOPolicy { let hk = unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; - match self.entry(py, &hk)? { + match self.entry_with_slot(py, &hk)? { Entry::Occupied(mut entry) => { entry.update(value.unbind())?; } @@ -266,7 +287,7 @@ impl FIFOPolicy { let hk = PreHashObject::from_pyobject(py, key)?; - match self.entry(py, &hk)? { + match self.entry_with_slot(py, &hk)? { Entry::Occupied(mut entry) => { entry.update(value)?; } @@ -361,15 +382,27 @@ impl FIFOPolicyAbsent<'_> { self.instance.popitem(py)?; } - self.instance.table.insert( - key.hash, - self.instance.entries.len() + self.instance.n_shifts, - |index| { - self.instance.entries[(*index) - self.instance.n_shifts] - .0 - .hash + match self.insert_slot { + Some(slot) => unsafe { + self.instance.table.insert_in_slot( + key.hash, + slot, + self.instance.entries.len() + self.instance.n_shifts, + ); }, - ); + None => { + self.instance.table.insert( + key.hash, + self.instance.entries.len() + self.instance.n_shifts, + |index| { + self.instance.entries[(*index) - self.instance.n_shifts] + .0 + .hash + }, + ); + } + } + self.instance.entries.push_back((key, value)); self.instance.observed.change(); diff --git a/src/policies/nopolicy.rs b/src/policies/nopolicy.rs index 85636f3..43e17d9 100644 --- a/src/policies/nopolicy.rs +++ b/src/policies/nopolicy.rs @@ -16,6 +16,7 @@ pub struct NoPolicyOccupied<'a> { pub struct NoPolicyAbsent<'a> { instance: &'a mut NoPolicy, + insert_slot: Option, } impl NoPolicy { @@ -75,12 +76,32 @@ impl NoPolicy { }, None => { Ok( - Entry::Absent(NoPolicyAbsent { instance: self }) + Entry::Absent(NoPolicyAbsent { instance: self, insert_slot: None }) ) } } } + #[rustfmt::skip] + pub fn entry_with_slot( + &mut self, + py: pyo3::Python<'_>, + key: &PreHashObject, + ) -> pyo3::PyResult> { + match self.table.try_find_or_find_insert_slot( + key.hash, + |(x, _)| x.equal(py, key), + |(x, _)| x.hash, + )? { + Ok(bucket) => Ok( + Entry::Occupied(NoPolicyOccupied { instance: self, bucket }) + ), + Err(insert_slot) => Ok( + Entry::Absent(NoPolicyAbsent { instance: self, insert_slot: Some(insert_slot) }) + ), + } + } + pub fn lookup( &self, py: pyo3::Python<'_>, @@ -161,7 +182,7 @@ impl NoPolicy { let hk = unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; - match self.entry(py, &hk)? { + match self.entry_with_slot(py, &hk)? { Entry::Occupied(mut entry) => { entry.update(value.unbind())?; } @@ -176,7 +197,7 @@ impl NoPolicy { let hk = PreHashObject::from_pyobject(py, key)?; - match self.entry(py, &hk)? { + match self.entry_with_slot(py, &hk)? { Entry::Occupied(mut entry) => { entry.update(value)?; } @@ -243,9 +264,18 @@ impl NoPolicyAbsent<'_> { )); } - self.instance - .table - .insert(key.hash, (key, value), |(x, _)| x.hash); + match self.insert_slot { + Some(slot) => unsafe { + self.instance + .table + .insert_in_slot(key.hash, slot, (key, value)); + }, + None => { + self.instance + .table + .insert(key.hash, (key, value), |(x, _)| x.hash); + } + } self.instance.observed.change(); Ok(()) From 7655a3557af7799830ff7a8194d6dbc152ccea97 Mon Sep 17 00:00:00 2001 From: awolverp Date: Tue, 8 Apr 2025 12:12:20 +0330 Subject: [PATCH 10/37] RRCache implemented --- pyproject.toml | 5 - python/cachebox/__init__.py | 1 + python/cachebox/_cachebox.py | 204 ++++++++++++++++++++++++ python/tests/test_caches.py | 10 ++ src/bridge/fifocache.rs | 4 +- src/bridge/mod.rs | 1 + src/bridge/rrcache.rs | 259 ++++++++++++++++++++++++++++++ src/common.rs | 32 ++-- src/lib.rs | 1 + src/policies/mod.rs | 1 + src/policies/random.rs | 295 +++++++++++++++++++++++++++++++++++ 11 files changed, 794 insertions(+), 19 deletions(-) create mode 100644 src/bridge/rrcache.rs create mode 100644 src/policies/random.rs diff --git a/pyproject.toml b/pyproject.toml index e80611a..5fab6cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,11 +42,6 @@ dynamic = [ Homepage = 'https://github.com/awolverp/cachebox' [project.optional-dependencies] -tests = [ - "pytest", - "pytest-asyncio", - "coverage", -] [tool.pytest.ini_options] asyncio_default_fixture_loop_scope = "function" diff --git a/python/cachebox/__init__.py b/python/cachebox/__init__.py index bc94657..ac777d9 100644 --- a/python/cachebox/__init__.py +++ b/python/cachebox/__init__.py @@ -1,6 +1,7 @@ from ._cachebox import ( Cache as Cache, FIFOCache as FIFOCache, + RRCache as RRCache, BaseCacheImpl as BaseCacheImpl, IteratorView as IteratorView, ) diff --git a/python/cachebox/_cachebox.py b/python/cachebox/_cachebox.py index 2f5f393..906582d 100644 --- a/python/cachebox/_cachebox.py +++ b/python/cachebox/_cachebox.py @@ -479,3 +479,207 @@ def __repr__(self) -> str: self._raw.maxsize(), _items_to_str(self._raw.items(), len(self._raw)), ) + + +class RRCache(BaseCacheImpl[KT, VT]): + """ + A thread-safe cache implementation with Random Replacement (RR) policy. + + This cache randomly selects and removes elements when the cache reaches its maximum size, + ensuring a simple and efficient caching mechanism with configurable capacity. + + Supports operations like insertion, retrieval, deletion, and iteration with O(1) complexity. + """ + + def __init__( + self, + maxsize: int, + iterable: typing.Union[typing.Union[dict, typing.Iterable[tuple]], None] = None, + *, + capacity: int = 0, + ) -> None: + """ + Initialize a new RRCache instance. + + Args: + maxsize (int): Maximum size of the cache. A value of zero means unlimited capacity. + iterable (dict or Iterable[tuple], optional): Initial data to populate the cache. Defaults to None. + capacity (int, optional): Preallocated capacity for the cache to minimize reallocations. Defaults to 0. + + Note: + - The cache size limit is immutable after initialization. + - If an iterable is provided, the cache will be populated using the update method. + """ + self._raw = _core.FIFOCache(maxsize, capacity=capacity) + + if iterable is not None: + self.update(iterable) + + @property + def maxsize(self) -> int: + return self._raw.maxsize() + + def capacity(self) -> int: + """Returns the number of elements the map can hold without reallocating.""" + return self._raw.capacity() + + def __len__(self) -> int: + return len(self._raw) + + def __sizeof__(self): # pragma: no cover + return self._raw.__sizeof__() + + def __contains__(self, key: KT) -> bool: + return key in self._raw + + def __bool__(self) -> bool: + return not self.is_empty() + + def is_empty(self) -> bool: + return self._raw.is_empty() + + def is_full(self) -> bool: + return self._raw.is_full() + + def insert(self, key: KT, value: VT) -> typing.Optional[VT]: + """ + Equals to `self[key] = value`, but returns a value: + + - If the cache did not have this key present, None is returned. + - If the cache did have this key present, the value is updated, + and the old value is returned. The key is not updated, though; + """ + return self._raw.insert(key, value) + + def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + """ + Equals to `self[key]`, but returns `default` if the cache don't have this key present. + """ + try: + return self._raw.get(key) + except _core.CoreKeyError: + return default + + def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + """ + Removes specified key and return the corresponding value. If the key is not found, returns the `default`. + """ + try: + return self._raw.remove(key) + except _core.CoreKeyError: + return default + + def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + """ + Inserts key with a value of default if key is not in the cache. + + Return the value for key if key is in the cache, else default. + """ + return self._raw.setdefault(key, default) + + def popitem(self) -> typing.Tuple[KT, VT]: + """Randomly selects and removes a (key, value) pair from the cache.""" + try: + return self._raw.popitem() + except _core.CoreKeyError: + raise KeyError() from None + + def drain(self, n: int) -> int: # pragma: no cover + """Does the `popitem()` `n` times and returns count of removed items.""" + if n <= 0: + return 0 + + for i in range(n): + try: + self._raw.popitem() + except _core.CoreKeyError: + return i + + return i + + def update(self, iterable: typing.Union[dict, typing.Iterable[tuple]]) -> None: + """Updates the cache with elements from a dictionary or an iterable object of key/value pairs.""" + if hasattr(iterable, "items"): + iterable = iterable.items() + + self._raw.update(iterable) + + def __setitem__(self, key: KT, value: VT) -> None: + self.insert(key, value) + + def __getitem__(self, key: KT) -> VT: + try: + return self._raw.get(key) + except _core.CoreKeyError: + raise KeyError(key) from None + + def __delitem__(self, key: KT) -> None: + try: + self._raw.remove(key) + except _core.CoreKeyError: + raise KeyError(key) from None + + def __eq__(self, other) -> bool: + if not isinstance(other, RRCache): + return False # pragma: no cover + + return self._raw == other._raw + + def __ne__(self, other) -> bool: + if not isinstance(other, RRCache): + return False # pragma: no cover + + return self._raw != other._raw + + def shrink_to_fit(self) -> None: + """Shrinks the cache to fit len(self) elements.""" + self._raw.shrink_to_fit() + + def clear(self, *, reuse: bool = False) -> None: + """ + Removes all items from cache. + + If reuse is True, will not free the memory for reusing in the future. + """ + self._raw.clear(reuse) + + def items(self) -> IteratorView[typing.Tuple[KT, VT]]: + """ + Returns an iterable object of the cache's items (key-value pairs). + + Notes: + - You should not make any changes in cache while using this iterable object. + - Items are not ordered. + """ + return IteratorView(self._raw.items(), lambda x: x) + + def keys(self) -> IteratorView[KT]: + """ + Returns an iterable object of the cache's keys. + + Notes: + - You should not make any changes in cache while using this iterable object. + - Keys are not ordered. + """ + return IteratorView(self._raw.items(), lambda x: x[0]) + + def values(self) -> IteratorView[VT]: + """ + Returns an iterable object of the cache's values. + + Notes: + - You should not make any changes in cache while using this iterable object. + - Values are not ordered. + """ + return IteratorView(self._raw.items(), lambda x: x[1]) + + def __iter__(self) -> IteratorView[KT]: + return self.keys() + + def __repr__(self) -> str: + return "{}[{}/{}]({})".format( + type(self).__name__, + len(self._raw), + self._raw.maxsize(), + _items_to_str(self._raw.items(), len(self._raw)), + ) diff --git a/python/tests/test_caches.py b/python/tests/test_caches.py index fb3720f..c4eb608 100644 --- a/python/tests/test_caches.py +++ b/python/tests/test_caches.py @@ -1,6 +1,7 @@ from cachebox import ( Cache, FIFOCache, + RRCache, ) import pytest from .mixin import _TestMixin @@ -90,3 +91,12 @@ def test_first_last(self): assert obj.last() == 10 assert obj.first(-1) == obj.last() assert obj.first(-10000) is None + + + +class TestRRCache(_TestMixin): + CACHE = RRCache + + def test_pickle(self): + self._test_pickle(lambda c1, c2: None) + diff --git a/src/bridge/fifocache.rs b/src/bridge/fifocache.rs index 201bc16..b0a333c 100644 --- a/src/bridge/fifocache.rs +++ b/src/bridge/fifocache.rs @@ -27,8 +27,8 @@ impl FIFOCache { Ok(self_) } - fn _state(&self) -> usize { - self.raw.lock().observed.get() as usize + fn _state(&self) -> u16 { + self.raw.lock().observed.get() } fn maxsize(&self) -> usize { diff --git a/src/bridge/mod.rs b/src/bridge/mod.rs index ff89a65..e229422 100644 --- a/src/bridge/mod.rs +++ b/src/bridge/mod.rs @@ -4,3 +4,4 @@ create_exception!(_core, CoreKeyError, pyo3::exceptions::PyException); pub mod cache; pub mod fifocache; +pub mod rrcache; diff --git a/src/bridge/rrcache.rs b/src/bridge/rrcache.rs new file mode 100644 index 0000000..514ef83 --- /dev/null +++ b/src/bridge/rrcache.rs @@ -0,0 +1,259 @@ +use super::cache::cache_items; +use crate::common::Entry; +use crate::common::ObservedIterator; +use crate::common::PreHashObject; + +#[pyo3::pyclass(module = "cachebox._core", frozen)] +pub struct RRCache { + raw: parking_lot::Mutex, +} + +#[pyo3::pymethods] +impl RRCache { + #[new] + #[pyo3(signature=(maxsize, *, capacity=0))] + fn __new__(maxsize: usize, capacity: usize) -> pyo3::PyResult { + let raw = crate::policies::random::RandomPolicy::new(maxsize, capacity)?; + + let self_ = Self { + raw: parking_lot::Mutex::new(raw), + }; + Ok(self_) + } + + fn _state(&self) -> usize { + self.raw.lock().observed.get() as usize + } + + fn maxsize(&self) -> usize { + self.raw.lock().maxsize() + } + + fn capacity(&self) -> usize { + self.raw.lock().capacity() + } + + fn __len__(&self) -> usize { + self.raw.lock().len() + } + + fn __sizeof__(&self) -> usize { + let lock = self.raw.lock(); + lock.capacity() + * (std::mem::size_of::() + std::mem::size_of::()) + } + + fn __contains__(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let lock = self.raw.lock(); + + match lock.lookup(py, &key)? { + Some(_) => Ok(true), + None => Ok(false), + } + } + + fn is_empty(&self) -> bool { + self.raw.lock().is_empty() + } + + fn is_full(&self) -> bool { + self.raw.lock().is_full() + } + + fn insert( + &self, + py: pyo3::Python<'_>, + key: pyo3::PyObject, + value: pyo3::PyObject, + ) -> pyo3::PyResult> { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.entry_with_slot(py, &key)? { + Entry::Occupied(mut entry) => Ok(Some(entry.update(value)?)), + Entry::Absent(entry) => { + entry.insert(key, value)?; + Ok(None) + } + } + } + + fn get(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let lock = self.raw.lock(); + + match lock.lookup(py, &key)? { + Some(val) => Ok(val.clone_ref(py)), + None => Err(pyo3::PyErr::new::(key.obj)), + } + } + + fn update( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python<'_>, + iterable: pyo3::PyObject, + ) -> pyo3::PyResult<()> { + if slf.as_ptr() == iterable.as_ptr() { + return Ok(()); + } + + let mut lock = slf.raw.lock(); + lock.extend(py, iterable) + } + + fn __richcmp__( + slf: pyo3::PyRef<'_, Self>, + other: pyo3::PyObject, + op: pyo3::class::basic::CompareOp, + ) -> pyo3::PyResult { + let other = other.extract::>(slf.py())?; + + match op { + pyo3::class::basic::CompareOp::Eq => { + if slf.as_ptr() == other.as_ptr() { + return Ok(true); + } + let t1 = slf.raw.lock(); + let t2 = other.raw.lock(); + t1.equal(slf.py(), &t2) + } + pyo3::class::basic::CompareOp::Ne => { + if slf.as_ptr() == other.as_ptr() { + return Ok(false); + } + + let t1 = slf.raw.lock(); + let t2 = other.raw.lock(); + t1.equal(slf.py(), &t2).map(|r| !r) + } + _ => Err(pyo3::PyErr::new::( + "only '==' or '!=' are supported", + )), + } + } + + fn remove(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.entry(py, &key)? { + Entry::Occupied(entry) => { + let (_, value) = entry.remove(); + Ok(value) + } + Entry::Absent(_) => Err(pyo3::PyErr::new::(key.obj)), + } + } + + fn popitem(&self) -> pyo3::PyResult<(pyo3::PyObject, pyo3::PyObject)> { + let mut lock = self.raw.lock(); + + match lock.popitem()? { + Some((key, val)) => Ok((key.obj, val)), + None => Err(pyo3::PyErr::new::(())), + } + } + + fn clear(&self, reuse: bool) { + let mut lock = self.raw.lock(); + lock.clear(); + + if !reuse { + lock.shrink_to_fit(); + } + } + + fn shrink_to_fit(&self) { + let mut lock = self.raw.lock(); + lock.shrink_to_fit(); + } + + fn setdefault( + &self, + py: pyo3::Python<'_>, + key: pyo3::PyObject, + default: pyo3::PyObject, + ) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.entry(py, &key)? { + Entry::Occupied(entry) => { + let (_, ref value) = entry.into_value(); + Ok(value.clone_ref(py)) + } + Entry::Absent(entry) => { + entry.insert(key, default.clone_ref(py))?; + Ok(default) + } + } + } + + fn items(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { + let lock = slf.raw.lock(); + let state = lock.observed.get(); + let iter = lock.iter(); + + let result = cache_items { + ptr: ObservedIterator::new(slf.as_ptr(), state), + iter: parking_lot::Mutex::new(iter), + }; + + pyo3::Py::new(slf.py(), result) + } + + fn __getnewargs__(&self) -> (usize,) { + (0,) + } + + fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { + let lock = self.raw.lock(); + unsafe { + let state = { + let mp = pyo3::ffi::PyDict_New(); + + if mp.is_null() { + return Err(pyo3::PyErr::fetch(py)); + } + + for bucket in lock.iter() { + let (key, val) = bucket.as_ref(); + // SAFETY: we don't need to check error because we sure about key that is hashable. + pyo3::ffi::PyDict_SetItem(mp, key.obj.as_ptr(), val.as_ptr()); + } + + let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); + let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity()); + + tuple!( + py, + 3, + 0 => maxsize, + 1 => mp, + 2 => capacity, + )? + }; + Ok(pyo3::Py::from_owned_ptr(py, state)) + } + } + + pub fn __setstate__(&self, py: pyo3::Python<'_>, state: pyo3::PyObject) -> pyo3::PyResult<()> { + let mut lock = self.raw.lock(); + lock.from_pickle(py, state.as_ptr()) + } + + pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + for value in self.raw.lock().iter() { + let (key, value) = unsafe { value.as_ref() }; + visit.call(&key.obj)?; + visit.call(value)?; + } + Ok(()) + } + + pub fn __clear__(&self) { + let mut lock = self.raw.lock(); + lock.clear() + } +} diff --git a/src/common.rs b/src/common.rs index 86209a8..e94591f 100644 --- a/src/common.rs +++ b/src/common.rs @@ -232,15 +232,19 @@ impl TryFindMethods for hashbrown::raw::RawTable { ) -> Result, hashbrown::raw::InsertSlot>, E> { let mut error = None; - let found = self.find_or_find_insert_slot(hash, |item| { - match compare(item) { - Ok(boolean) => boolean, - Err(e) => { - error = Some(e); - true // To break checking + let found = self.find_or_find_insert_slot( + hash, + |item| { + match compare(item) { + Ok(boolean) => boolean, + Err(e) => { + error = Some(e); + true // To break checking + } } - } - }, hasher); + }, + hasher, + ); if let Some(error) = error { Err(error) @@ -252,19 +256,23 @@ impl TryFindMethods for hashbrown::raw::RawTable { /// Observe caches' changes #[derive(Debug)] -pub struct Observed(u16); +pub struct Observed(std::num::NonZeroU16); impl Observed { pub fn new() -> Self { - Self(0) + Self(unsafe { std::num::NonZeroU16::new_unchecked(1) }) } pub fn change(&mut self) { - self.0 = self.0.saturating_add(1); + self.0 = self + .0 + .checked_add(1) + .or_else(|| Some(unsafe { std::num::NonZeroU16::new_unchecked(1) })) + .unwrap(); } pub fn get(&self) -> u16 { - self.0 + self.0.get() } } diff --git a/src/lib.rs b/src/lib.rs index 6a4324d..0fd75f7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,6 +17,7 @@ fn _core(py: pyo3::Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/src/policies/mod.rs b/src/policies/mod.rs index e668893..6a070b4 100644 --- a/src/policies/mod.rs +++ b/src/policies/mod.rs @@ -1,2 +1,3 @@ pub mod fifo; pub mod nopolicy; +pub mod random; diff --git a/src/policies/random.rs b/src/policies/random.rs new file mode 100644 index 0000000..ebb61de --- /dev/null +++ b/src/policies/random.rs @@ -0,0 +1,295 @@ +use crate::common::Entry; +use crate::common::Observed; +use crate::common::PreHashObject; +use crate::common::TryFindMethods; + +pub struct RandomPolicy { + table: hashbrown::raw::RawTable<(PreHashObject, pyo3::PyObject)>, + maxsize: std::num::NonZeroUsize, + pub observed: Observed, +} + +pub struct RandomPolicyOccupied<'a> { + instance: &'a mut RandomPolicy, + bucket: hashbrown::raw::Bucket<(PreHashObject, pyo3::PyObject)>, +} + +pub struct RandomPolicyAbsent<'a> { + instance: &'a mut RandomPolicy, + insert_slot: Option, +} + +impl RandomPolicy { + #[inline] + pub fn new(maxsize: usize, mut capacity: usize) -> pyo3::PyResult { + let maxsize = non_zero_or!(maxsize, isize::MAX as usize); + capacity = capacity.min(maxsize.get()); + + Ok(Self { + table: new_table!(capacity)?, + maxsize, + observed: Observed::new(), + }) + } + + #[inline] + pub fn maxsize(&self) -> usize { + self.maxsize.get() + } + + #[inline] + pub fn len(&self) -> usize { + self.table.len() + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.table.is_empty() + } + + #[inline] + pub fn is_full(&self) -> bool { + self.table.len() == self.maxsize.get() + } + + #[inline] + pub fn capacity(&self) -> usize { + self.table.capacity() + } + + #[inline] + pub fn iter(&self) -> hashbrown::raw::RawIter<(PreHashObject, pyo3::PyObject)> { + unsafe { self.table.iter() } + } + + #[inline] + pub fn popitem(&mut self) -> pyo3::PyResult> { + if self.table.is_empty() { + Ok(None) + } else { + let nth = fastrand::usize(0..self.table.buckets()); + + let bucket = unsafe { self.table.iter().nth(nth).unwrap_unchecked() }; + let (x, _) = unsafe { self.table.remove(bucket) }; + + self.observed.change(); + Ok(Some(x)) + } + } + + #[rustfmt::skip] + pub fn entry( + &mut self, + py: pyo3::Python<'_>, + key: &PreHashObject, + ) -> pyo3::PyResult> { + match self.table.try_find(key.hash, |(x, _)| x.equal(py, key))? { + Some(bucket) => { + Ok( + Entry::Occupied(RandomPolicyOccupied { instance: self, bucket }) + ) + }, + None => { + Ok( + Entry::Absent(RandomPolicyAbsent { instance: self, insert_slot: None }) + ) + } + } + } + + #[rustfmt::skip] + pub fn entry_with_slot( + &mut self, + py: pyo3::Python<'_>, + key: &PreHashObject, + ) -> pyo3::PyResult> { + match self.table.try_find_or_find_insert_slot( + key.hash, + |(x, _)| x.equal(py, key), + |(x, _)| x.hash, + )? { + Ok(bucket) => Ok( + Entry::Occupied(RandomPolicyOccupied { instance: self, bucket }) + ), + Err(insert_slot) => Ok( + Entry::Absent(RandomPolicyAbsent { instance: self, insert_slot: Some(insert_slot) }) + ), + } + } + + pub fn lookup( + &self, + py: pyo3::Python<'_>, + key: &PreHashObject, + ) -> pyo3::PyResult> { + match self.table.try_find(key.hash, |(x, _)| x.equal(py, key))? { + Some(x) => Ok(Some(unsafe { &x.as_ref().1 })), + None => Ok(None), + } + } + + pub fn equal(&self, py: pyo3::Python<'_>, other: &Self) -> pyo3::PyResult { + if self.maxsize != other.maxsize { + return Ok(false); + } + + if self.table.len() != other.table.len() { + return Ok(false); + } + + let mut error = None; + + let result = unsafe { + self.table.iter().all(|bucket| { + let (key, val) = bucket.as_ref(); + + match other.table.try_find(key.hash, |(x, _)| x.equal(py, key)) { + Err(e) => { + error = Some(e); + true + } + Ok(Some(bucket)) => { + let (_, val2) = bucket.as_ref(); + + match crate::common::pyobject_equal(py, val.as_ptr(), val2.as_ptr()) { + Ok(result) => result, + Err(e) => { + error = Some(e); + true + } + } + } + Ok(None) => false, + } + }) + }; + + if let Some(error) = error { + return Err(error); + } + + Ok(result) + } + + #[inline] + pub fn clear(&mut self) { + self.table.clear(); + self.observed.change(); + } + + #[inline] + pub fn shrink_to_fit(&mut self) { + self.table.shrink_to(self.table.len(), |(x, _)| x.hash); + self.observed.change(); + } + + pub fn extend(&mut self, py: pyo3::Python<'_>, iterable: pyo3::PyObject) -> pyo3::PyResult<()> { + use pyo3::types::{PyAnyMethods, PyDictMethods}; + + if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { + let dict = unsafe { + iterable + .downcast_bound::(py) + .unwrap_unchecked() + }; + + for (key, value) in dict.iter() { + let hk = + unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; + + match self.entry_with_slot(py, &hk)? { + Entry::Occupied(mut entry) => { + entry.update(value.unbind())?; + } + Entry::Absent(entry) => { + entry.insert(hk, value.unbind())?; + } + } + } + } else { + for pair in iterable.bind(py).try_iter()? { + let (key, value) = pair?.extract::<(pyo3::PyObject, pyo3::PyObject)>()?; + + let hk = PreHashObject::from_pyobject(py, key)?; + + match self.entry_with_slot(py, &hk)? { + Entry::Occupied(mut entry) => { + entry.update(value)?; + } + Entry::Absent(entry) => { + entry.insert(hk, value)?; + } + } + } + } + + Ok(()) + } + + #[allow(clippy::wrong_self_convention)] + pub fn from_pickle( + &mut self, + py: pyo3::Python<'_>, + state: *mut pyo3::ffi::PyObject, + ) -> pyo3::PyResult<()> { + tuple!(check state, size=3)?; + let (maxsize, iterable, capacity) = unsafe { extract_pickle_tuple!(py, state) }; + + let mut new = Self::new(maxsize, capacity)?; + new.extend(py, iterable)?; + + *self = new; + Ok(()) + } +} + +impl<'a> RandomPolicyOccupied<'a> { + #[inline] + pub fn update(&mut self, value: pyo3::PyObject) -> pyo3::PyResult { + unsafe { + let old_value = std::mem::replace(&mut self.bucket.as_mut().1, value); + + // In update we don't need to change this; because this does not change the memory address ranges + // self.instance.observed.change(); + + Ok(old_value) + } + } + + #[inline] + pub fn remove(self) -> (PreHashObject, pyo3::PyObject) { + let (x, _) = unsafe { self.instance.table.remove(self.bucket) }; + self.instance.observed.change(); + x + } + + #[inline] + pub fn into_value(self) -> &'a mut (PreHashObject, pyo3::PyObject) { + unsafe { self.bucket.as_mut() } + } +} + +impl RandomPolicyAbsent<'_> { + #[inline] + pub fn insert(self, key: PreHashObject, value: pyo3::PyObject) -> pyo3::PyResult<()> { + if self.instance.table.len() >= self.instance.maxsize.get() { + self.instance.popitem()?; + } + + match self.insert_slot { + Some(slot) => unsafe { + self.instance + .table + .insert_in_slot(key.hash, slot, (key, value)); + }, + None => { + self.instance + .table + .insert(key.hash, (key, value), |(x, _)| x.hash); + } + } + + self.instance.observed.change(); + Ok(()) + } +} From 931bd0e2cb138a55b1ed7f51426bc088cdc2b29f Mon Sep 17 00:00:00 2001 From: awolverp Date: Tue, 8 Apr 2025 12:14:46 +0330 Subject: [PATCH 11/37] Fix RRCache --- python/cachebox/_cachebox.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cachebox/_cachebox.py b/python/cachebox/_cachebox.py index 906582d..2393da6 100644 --- a/python/cachebox/_cachebox.py +++ b/python/cachebox/_cachebox.py @@ -510,7 +510,7 @@ def __init__( - The cache size limit is immutable after initialization. - If an iterable is provided, the cache will be populated using the update method. """ - self._raw = _core.FIFOCache(maxsize, capacity=capacity) + self._raw = _core.RRCache(maxsize, capacity=capacity) if iterable is not None: self.update(iterable) From d791cc42e8427d538fe3f4d5ff9bcec685c4bb4c Mon Sep 17 00:00:00 2001 From: awolverp Date: Tue, 8 Apr 2025 12:21:09 +0330 Subject: [PATCH 12/37] Fix RRCache popitem --- src/policies/random.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/policies/random.rs b/src/policies/random.rs index ebb61de..48e8ae7 100644 --- a/src/policies/random.rs +++ b/src/policies/random.rs @@ -67,7 +67,7 @@ impl RandomPolicy { if self.table.is_empty() { Ok(None) } else { - let nth = fastrand::usize(0..self.table.buckets()); + let nth = fastrand::usize(0..self.table.len()); let bucket = unsafe { self.table.iter().nth(nth).unwrap_unchecked() }; let (x, _) = unsafe { self.table.remove(bucket) }; From 82e5e1033f8561d13d1d0ee0f7415c1d5d4a8cff Mon Sep 17 00:00:00 2001 From: awolverp Date: Tue, 8 Apr 2025 14:32:30 +0330 Subject: [PATCH 13/37] Optimize some operations --- Cargo.lock | 13 +-- Cargo.toml | 9 +- src/bridge/cache.rs | 8 +- src/bridge/fifocache.rs | 8 +- src/bridge/rrcache.rs | 6 +- src/common.rs | 3 +- src/lib.rs | 2 + src/mutex.rs | 173 +++++++++++++++++++++++++++++++++++++++ src/policies/nopolicy.rs | 4 +- 9 files changed, 196 insertions(+), 30 deletions(-) create mode 100644 src/mutex.rs diff --git a/Cargo.lock b/Cargo.lock index 344b18c..2395d29 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -21,7 +21,8 @@ dependencies = [ "cfg-if", "fastrand", "hashbrown", - "parking_lot", + "lock_api", + "parking_lot_core", "pyo3", "pyo3-build-config", ] @@ -87,16 +88,6 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" -[[package]] -name = "parking_lot" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" -dependencies = [ - "lock_api", - "parking_lot_core", -] - [[package]] name = "parking_lot_core" version = "0.9.10" diff --git a/Cargo.toml b/Cargo.toml index 635869b..65f4659 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,12 +37,13 @@ features = ["macros", "extension-module"] [dependencies.cfg-if] version = "1.0.0" -[dependencies.parking_lot] -version = "0.12.3" +[dependencies.parking_lot_core] +version = "0.9.10" default-features = false -[profile.release.package."*"] -codegen-units = 1 # better optimizations +[dependencies.lock_api] +version = "0.4.12" +default-features = false [build-dependencies.pyo3-build-config] version = "0.24.1" diff --git a/src/bridge/cache.rs b/src/bridge/cache.rs index a0da919..227f1e3 100644 --- a/src/bridge/cache.rs +++ b/src/bridge/cache.rs @@ -4,14 +4,14 @@ use crate::common::PreHashObject; #[pyo3::pyclass(module = "cachebox._core", frozen)] pub struct Cache { - raw: parking_lot::Mutex, + raw: crate::mutex::Mutex, } #[allow(non_camel_case_types)] #[pyo3::pyclass(module = "cachebox._core")] pub struct cache_items { pub ptr: ObservedIterator, - pub iter: parking_lot::Mutex>, + pub iter: crate::mutex::Mutex>, } #[pyo3::pymethods] @@ -22,7 +22,7 @@ impl Cache { let raw = crate::policies::nopolicy::NoPolicy::new(maxsize, capacity)?; let self_ = Self { - raw: parking_lot::Mutex::new(raw), + raw: crate::mutex::Mutex::new(raw), }; Ok(self_) } @@ -194,7 +194,7 @@ impl Cache { let result = cache_items { ptr: ObservedIterator::new(slf.as_ptr(), state), - iter: parking_lot::Mutex::new(iter), + iter: crate::mutex::Mutex::new(iter), }; pyo3::Py::new(slf.py(), result) diff --git a/src/bridge/fifocache.rs b/src/bridge/fifocache.rs index b0a333c..0877aaf 100644 --- a/src/bridge/fifocache.rs +++ b/src/bridge/fifocache.rs @@ -4,14 +4,14 @@ use crate::common::PreHashObject; #[pyo3::pyclass(module = "cachebox._core", frozen)] pub struct FIFOCache { - raw: parking_lot::Mutex, + raw: crate::mutex::Mutex, } #[allow(non_camel_case_types)] #[pyo3::pyclass(module = "cachebox._core")] pub struct fifocache_items { pub ptr: ObservedIterator, - pub iter: parking_lot::Mutex, + pub iter: crate::mutex::Mutex, } #[pyo3::pymethods] @@ -22,7 +22,7 @@ impl FIFOCache { let raw = crate::policies::fifo::FIFOPolicy::new(maxsize, capacity)?; let self_ = Self { - raw: parking_lot::Mutex::new(raw), + raw: crate::mutex::Mutex::new(raw), }; Ok(self_) } @@ -208,7 +208,7 @@ impl FIFOCache { let result = fifocache_items { ptr: ObservedIterator::new(slf.as_ptr(), state), - iter: parking_lot::Mutex::new(iter), + iter: crate::mutex::Mutex::new(iter), }; pyo3::Py::new(slf.py(), result) diff --git a/src/bridge/rrcache.rs b/src/bridge/rrcache.rs index 514ef83..7238625 100644 --- a/src/bridge/rrcache.rs +++ b/src/bridge/rrcache.rs @@ -5,7 +5,7 @@ use crate::common::PreHashObject; #[pyo3::pyclass(module = "cachebox._core", frozen)] pub struct RRCache { - raw: parking_lot::Mutex, + raw: crate::mutex::Mutex, } #[pyo3::pymethods] @@ -16,7 +16,7 @@ impl RRCache { let raw = crate::policies::random::RandomPolicy::new(maxsize, capacity)?; let self_ = Self { - raw: parking_lot::Mutex::new(raw), + raw: crate::mutex::Mutex::new(raw), }; Ok(self_) } @@ -197,7 +197,7 @@ impl RRCache { let result = cache_items { ptr: ObservedIterator::new(slf.as_ptr(), state), - iter: parking_lot::Mutex::new(iter), + iter: crate::mutex::Mutex::new(iter), }; pyo3::Py::new(slf.py(), result) diff --git a/src/common.rs b/src/common.rs index e94591f..b889c52 100644 --- a/src/common.rs +++ b/src/common.rs @@ -1,10 +1,11 @@ +#[inline(always)] pub fn pyobject_equal( py: pyo3::Python<'_>, arg1: *mut pyo3::ffi::PyObject, arg2: *mut pyo3::ffi::PyObject, ) -> pyo3::PyResult { unsafe { - if std::ptr::addr_eq(arg1, arg2) { + if std::ptr::eq(arg1, arg2) { return Ok(true); } diff --git a/src/lib.rs b/src/lib.rs index 0fd75f7..9984e78 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,7 @@ use pyo3::prelude::*; +mod mutex; + #[macro_use] mod common; diff --git a/src/mutex.rs b/src/mutex.rs new file mode 100644 index 0000000..e218522 --- /dev/null +++ b/src/mutex.rs @@ -0,0 +1,173 @@ +//! Mutex lock +//! +//! Thanks to `Amanieu d'Antras` for this beautiful implementation. + +use core::sync::atomic::{AtomicU8, Ordering}; +use parking_lot_core::deadlock; +use parking_lot_core::{self, ParkResult, SpinWait, UnparkResult, UnparkToken, DEFAULT_PARK_TOKEN}; +use std::time::Instant; + +const TOKEN_NORMAL: UnparkToken = UnparkToken(0); +const TOKEN_HANDOFF: UnparkToken = UnparkToken(1); + +const LOCKED_BIT: u8 = 0b01; +const PARKED_BIT: u8 = 0b10; + +pub struct RawMutex { + state: AtomicU8, +} + +unsafe impl lock_api::RawMutex for RawMutex { + #[allow(clippy::declare_interior_mutable_const)] + const INIT: RawMutex = RawMutex { + state: AtomicU8::new(0), + }; + + type GuardMarker = lock_api::GuardSend; + + #[inline] + fn lock(&self) { + if self + .state + .compare_exchange_weak(0, LOCKED_BIT, Ordering::Acquire, Ordering::Relaxed) + .is_err() + { + self.lock_slow(None); + } + unsafe { deadlock::acquire_resource(self as *const _ as usize) }; + } + + #[inline] + fn try_lock(&self) -> bool { + let mut state = self.state.load(Ordering::Relaxed); + loop { + if state & LOCKED_BIT != 0 { + return false; + } + match self.state.compare_exchange_weak( + state, + state | LOCKED_BIT, + Ordering::Acquire, + Ordering::Relaxed, + ) { + Ok(_) => { + unsafe { deadlock::acquire_resource(self as *const _ as usize) }; + return true; + } + Err(x) => state = x, + } + } + } + + #[inline] + unsafe fn unlock(&self) { + deadlock::release_resource(self as *const _ as usize); + if self + .state + .compare_exchange(LOCKED_BIT, 0, Ordering::Release, Ordering::Relaxed) + .is_ok() + { + return; + } + self.unlock_slow(false); + } + + #[inline] + fn is_locked(&self) -> bool { + let state = self.state.load(Ordering::Relaxed); + state & LOCKED_BIT != 0 + } +} + +impl RawMutex { + #[cold] + fn lock_slow(&self, timeout: Option) -> bool { + let mut spinwait = SpinWait::new(); + let mut state = self.state.load(Ordering::Relaxed); + loop { + if state & LOCKED_BIT == 0 { + match self.state.compare_exchange_weak( + state, + state | LOCKED_BIT, + Ordering::Acquire, + Ordering::Relaxed, + ) { + Ok(_) => return true, + Err(x) => state = x, + } + continue; + } + + if state & PARKED_BIT == 0 && spinwait.spin() { + state = self.state.load(Ordering::Relaxed); + continue; + } + + if state & PARKED_BIT == 0 { + if let Err(x) = self.state.compare_exchange_weak( + state, + state | PARKED_BIT, + Ordering::Relaxed, + Ordering::Relaxed, + ) { + state = x; + continue; + } + } + + let addr = self as *const _ as usize; + let validate = || self.state.load(Ordering::Relaxed) == LOCKED_BIT | PARKED_BIT; + let before_sleep = || {}; + let timed_out = |_, was_last_thread| { + if was_last_thread { + self.state.fetch_and(!PARKED_BIT, Ordering::Relaxed); + } + }; + + match unsafe { + parking_lot_core::park( + addr, + validate, + before_sleep, + timed_out, + DEFAULT_PARK_TOKEN, + timeout, + ) + } { + ParkResult::Unparked(TOKEN_HANDOFF) => return true, + ParkResult::Unparked(_) => (), + ParkResult::Invalid => (), + ParkResult::TimedOut => return false, + } + + spinwait.reset(); + state = self.state.load(Ordering::Relaxed); + } + } + + #[cold] + fn unlock_slow(&self, force_fair: bool) { + let addr = self as *const _ as usize; + let callback = |result: UnparkResult| { + if result.unparked_threads != 0 && (force_fair || result.be_fair) { + if !result.have_more_threads { + self.state.store(LOCKED_BIT, Ordering::Relaxed); + } + return TOKEN_HANDOFF; + } + + if result.have_more_threads { + self.state.store(PARKED_BIT, Ordering::Release); + } else { + self.state.store(0, Ordering::Release); + } + TOKEN_NORMAL + }; + + unsafe { + parking_lot_core::unpark_one(addr, callback); + } + } +} + +pub type Mutex = lock_api::Mutex; diff --git a/src/policies/nopolicy.rs b/src/policies/nopolicy.rs index 43e17d9..04470d5 100644 --- a/src/policies/nopolicy.rs +++ b/src/policies/nopolicy.rs @@ -232,12 +232,10 @@ impl<'a> NoPolicyOccupied<'a> { #[inline] pub fn update(&mut self, value: pyo3::PyObject) -> pyo3::PyResult { unsafe { - let old_value = std::mem::replace(&mut self.bucket.as_mut().1, value); - // In update we don't need to change this; because this does not change the memory address ranges // self.instance.observed.change(); - Ok(old_value) + Ok(std::mem::replace(&mut self.bucket.as_mut().1, value)) } } From 5394aebdfa093a480414f502508e640567a72aed Mon Sep 17 00:00:00 2001 From: awolverp Date: Fri, 11 Apr 2025 10:46:52 +0330 Subject: [PATCH 14/37] - Rewrite LRUCache - The `n` parameter of the `LRUCache.least_recently_used` method has been removed - The strictness in `__eq__` methods was reduced --- python/cachebox/__init__.py | 1 + python/cachebox/_cachebox.py | 228 +++++++++++++++++++++++ python/tests/test_caches.py | 61 ++++++- src/bridge/lrucache.rs | 329 +++++++++++++++++++++++++++++++++ src/bridge/mod.rs | 1 + src/common.rs | 27 ++- src/lib.rs | 2 + src/linked_list.rs | 195 ++++++++++++++++++++ src/policies/fifo.rs | 34 ++-- src/policies/lru.rs | 339 +++++++++++++++++++++++++++++++++++ src/policies/mod.rs | 1 + 11 files changed, 1191 insertions(+), 27 deletions(-) create mode 100644 src/bridge/lrucache.rs create mode 100644 src/linked_list.rs create mode 100644 src/policies/lru.rs diff --git a/python/cachebox/__init__.py b/python/cachebox/__init__.py index ac777d9..e774c5d 100644 --- a/python/cachebox/__init__.py +++ b/python/cachebox/__init__.py @@ -2,6 +2,7 @@ Cache as Cache, FIFOCache as FIFOCache, RRCache as RRCache, + LRUCache as LRUCache, BaseCacheImpl as BaseCacheImpl, IteratorView as IteratorView, ) diff --git a/python/cachebox/_cachebox.py b/python/cachebox/_cachebox.py index 2393da6..2c3c330 100644 --- a/python/cachebox/_cachebox.py +++ b/python/cachebox/_cachebox.py @@ -683,3 +683,231 @@ def __repr__(self) -> str: self._raw.maxsize(), _items_to_str(self._raw.items(), len(self._raw)), ) + + +class LRUCache(BaseCacheImpl[KT, VT]): + """ + Thread-safe Least Recently Used (LRU) cache implementation. + + Provides a cache that automatically removes the least recently used items when + the cache reaches its maximum size. Supports various operations like insertion, + retrieval, and management of cached items with configurable maximum size and + initial capacity. + + Key features: + - Configurable maximum cache size + - Optional initial capacity allocation + - Thread-safe operations + - Efficient key-value pair management + - Supports initialization from dictionaries or iterables + """ + + def __init__( + self, + maxsize: int, + iterable: typing.Union[typing.Union[dict, typing.Iterable[tuple]], None] = None, + *, + capacity: int = 0, + ) -> None: + """ + Initialize a new LRU Cache instance. + + Args: + maxsize (int): Maximum size of the cache. Zero indicates unlimited size. + iterable (dict | Iterable[tuple], optional): Initial data to populate the cache. + capacity (int, optional): Pre-allocated capacity for the cache to minimize reallocations. + + Notes: + - The cache size is immutable after initialization. + - If an iterable is provided, it will be used to populate the cache. + """ + self._raw = _core.LRUCache(maxsize, capacity=capacity) + + if iterable is not None: + self.update(iterable) + + @property + def maxsize(self) -> int: + return self._raw.maxsize() + + def capacity(self) -> int: + """Returns the number of elements the map can hold without reallocating.""" + return self._raw.capacity() + + def __len__(self) -> int: + return len(self._raw) + + def __sizeof__(self): # pragma: no cover + return self._raw.__sizeof__() + + def __contains__(self, key: KT) -> bool: + return key in self._raw + + def __bool__(self) -> bool: + return not self.is_empty() + + def is_empty(self) -> bool: + return self._raw.is_empty() + + def is_full(self) -> bool: + return self._raw.is_full() + + def insert(self, key: KT, value: VT) -> typing.Optional[VT]: + """ + Equals to `self[key] = value`, but returns a value: + + - If the cache did not have this key present, None is returned. + - If the cache did have this key present, the value is updated, + and the old value is returned. The key is not updated, though; + """ + return self._raw.insert(key, value) + + def peek(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + """ + Searches for a key-value in the cache and returns it (without moving the key to recently used). + """ + try: + return self._raw.peek(key) + except _core.CoreKeyError: + return default + + def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + """ + Equals to `self[key]`, but returns `default` if the cache don't have this key present. + """ + try: + return self._raw.get(key) + except _core.CoreKeyError: + return default + + def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + """ + Removes specified key and return the corresponding value. If the key is not found, returns the `default`. + """ + try: + return self._raw.remove(key) + except _core.CoreKeyError: + return default + + def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + """ + Inserts key with a value of default if key is not in the cache. + + Return the value for key if key is in the cache, else default. + """ + return self._raw.setdefault(key, default) + + def popitem(self) -> typing.Tuple[KT, VT]: + try: + return self._raw.popitem() + except _core.CoreKeyError: + raise KeyError() from None + + def drain(self, n: int) -> int: # pragma: no cover + """Does the `popitem()` `n` times and returns count of removed items.""" + if n <= 0: + return 0 + + for i in range(n): + try: + self._raw.popitem() + except _core.CoreKeyError: + return i + + return i + + def update(self, iterable: typing.Union[dict, typing.Iterable[tuple]]) -> None: + """Updates the cache with elements from a dictionary or an iterable object of key/value pairs.""" + if hasattr(iterable, "items"): + iterable = iterable.items() + + self._raw.update(iterable) + + def __setitem__(self, key: KT, value: VT) -> None: + self.insert(key, value) + + def __getitem__(self, key: KT) -> VT: + try: + return self._raw.get(key) + except _core.CoreKeyError: + raise KeyError(key) from None + + def __delitem__(self, key: KT) -> None: + try: + self._raw.remove(key) + except _core.CoreKeyError: + raise KeyError(key) from None + + def __eq__(self, other) -> bool: + if not isinstance(other, LRUCache): + return False # pragma: no cover + + return self._raw == other._raw + + def __ne__(self, other) -> bool: + if not isinstance(other, LRUCache): + return False # pragma: no cover + + return self._raw != other._raw + + def shrink_to_fit(self) -> None: + """Shrinks the cache to fit len(self) elements.""" + self._raw.shrink_to_fit() + + def clear(self, *, reuse: bool = False) -> None: + """ + Removes all items from cache. + + If reuse is True, will not free the memory for reusing in the future. + """ + self._raw.clear(reuse) + + def items(self) -> IteratorView[typing.Tuple[KT, VT]]: + """ + Returns an iterable object of the cache's items (key-value pairs). + + Notes: + - You should not make any changes in cache while using this iterable object. + """ + return IteratorView(self._raw.items(), lambda x: x) + + def keys(self) -> IteratorView[KT]: + """ + Returns an iterable object of the cache's keys. + + Notes: + - You should not make any changes in cache while using this iterable object. + """ + return IteratorView(self._raw.items(), lambda x: x[0]) + + def values(self) -> IteratorView[VT]: + """ + Returns an iterable object of the cache's values. + + Notes: + - You should not make any changes in cache while using this iterable object. + """ + return IteratorView(self._raw.items(), lambda x: x[1]) + + def least_recently_used(self) -> typing.Optional[KT]: + """ + Returns the key in the cache that has not been accessed in the longest time. + """ + return self._raw.least_recently_used() + + def most_recently_used(self) -> typing.Optional[KT]: + """ + Returns the key in the cache that has been accessed in the shortest time. + """ + return self._raw.most_recently_used() + + def __iter__(self) -> IteratorView[KT]: + return self.keys() + + def __repr__(self) -> str: + return "{}[{}/{}]({})".format( + type(self).__name__, + len(self._raw), + self._raw.maxsize(), + _items_to_str(self._raw.items(), len(self._raw)), + ) diff --git a/python/tests/test_caches.py b/python/tests/test_caches.py index c4eb608..50bfbca 100644 --- a/python/tests/test_caches.py +++ b/python/tests/test_caches.py @@ -2,6 +2,7 @@ Cache, FIFOCache, RRCache, + LRUCache, ) import pytest from .mixin import _TestMixin @@ -93,10 +94,68 @@ def test_first_last(self): assert obj.first(-10000) is None - class TestRRCache(_TestMixin): CACHE = RRCache def test_pickle(self): self._test_pickle(lambda c1, c2: None) + +class TestLRUCache(_TestMixin): + CACHE = LRUCache + + def test_policy(self): + obj = self.CACHE(3) + + obj[1] = 1 + obj[2] = 2 + obj[3] = 3 + + assert (1, 1) == obj.popitem() + + obj[1] = 1 + obj[2] + + assert (3, 3) == obj.popitem() + + obj[4] = 4 + assert 1 == obj.get(1) + + obj[5] = 5 + assert 2 not in obj + + def test_ordered_iterators(self): + obj = self.CACHE(20, **self.KWARGS, capacity=20) + + for i in range(6): + obj[i] = i * 2 + + obj[1] + obj[5] + obj[3] = 7 + + k = [0, 2, 4, 1, 5, 3] + v = [0, 4, 8, 2, 10, 7] + assert k == list(obj.keys()) + assert v == list(obj.values()) + assert list(zip(k, v)) == list(obj.items()) + + def test_recently_used_funcs(self): + obj = LRUCache(10) + + for i in range(6): + obj[i] = i * 2 + + obj[1] + obj[5] + obj[3] = 7 + obj.peek(4) + + assert obj.most_recently_used() == 3 + assert obj.least_recently_used() == 0 + + def test_pickle(self): + def inner(c1, c2): + assert list(c1.items()) == list(c2.items()) + + self._test_pickle(inner) diff --git a/src/bridge/lrucache.rs b/src/bridge/lrucache.rs new file mode 100644 index 0000000..a857eba --- /dev/null +++ b/src/bridge/lrucache.rs @@ -0,0 +1,329 @@ +use crate::common::Entry; +use crate::common::ObservedIterator; +use crate::common::PreHashObject; + +#[pyo3::pyclass(module = "cachebox._core", frozen)] +pub struct LRUCache { + raw: crate::mutex::Mutex, +} + +#[allow(non_camel_case_types)] +#[pyo3::pyclass(module = "cachebox._core")] +pub struct lrucache_items { + pub ptr: ObservedIterator, + pub iter: crate::mutex::Mutex, +} + +#[pyo3::pymethods] +impl LRUCache { + #[new] + #[pyo3(signature=(maxsize, *, capacity=0))] + fn __new__(maxsize: usize, capacity: usize) -> pyo3::PyResult { + let raw = crate::policies::lru::LRUPolicy::new(maxsize, capacity)?; + + let self_ = Self { + raw: crate::mutex::Mutex::new(raw), + }; + Ok(self_) + } + + fn _state(&self) -> u16 { + self.raw.lock().observed.get() + } + + fn maxsize(&self) -> usize { + self.raw.lock().maxsize() + } + + fn capacity(&self) -> usize { + self.raw.lock().capacity() + } + + fn __len__(&self) -> usize { + self.raw.lock().len() + } + + fn __sizeof__(&self) -> usize { + let lock = self.raw.lock(); + + lock.capacity() + * (std::mem::size_of::() + std::mem::size_of::()) + } + + fn __contains__(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.lookup(py, &key)? { + Some(_) => Ok(true), + None => Ok(false), + } + } + + fn is_empty(&self) -> bool { + self.raw.lock().is_empty() + } + + fn is_full(&self) -> bool { + self.raw.lock().is_full() + } + + fn insert( + &self, + py: pyo3::Python<'_>, + key: pyo3::PyObject, + value: pyo3::PyObject, + ) -> pyo3::PyResult> { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.entry_with_slot(py, &key)? { + Entry::Occupied(mut entry) => Ok(Some(entry.update(value)?)), + Entry::Absent(entry) => { + entry.insert(key, value)?; + Ok(None) + } + } + } + + fn get(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.lookup(py, &key)? { + Some(val) => Ok(val.clone_ref(py)), + None => Err(pyo3::PyErr::new::(key.obj)), + } + } + + fn peek(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let lock = self.raw.lock(); + + match lock.peek(py, &key)? { + Some(val) => Ok(val.clone_ref(py)), + None => Err(pyo3::PyErr::new::(key.obj)), + } + } + + fn update( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python<'_>, + iterable: pyo3::PyObject, + ) -> pyo3::PyResult<()> { + if slf.as_ptr() == iterable.as_ptr() { + return Ok(()); + } + + let mut lock = slf.raw.lock(); + lock.extend(py, iterable) + } + + fn __richcmp__( + slf: pyo3::PyRef<'_, Self>, + other: pyo3::PyObject, + op: pyo3::class::basic::CompareOp, + ) -> pyo3::PyResult { + let other = other.extract::>(slf.py())?; + + match op { + pyo3::class::basic::CompareOp::Eq => { + if slf.as_ptr() == other.as_ptr() { + return Ok(true); + } + + let t1 = slf.raw.lock(); + let t2 = other.raw.lock(); + t1.equal(slf.py(), &t2) + } + pyo3::class::basic::CompareOp::Ne => { + if slf.as_ptr() == other.as_ptr() { + return Ok(false); + } + + let t1 = slf.raw.lock(); + let t2 = other.raw.lock(); + t1.equal(slf.py(), &t2).map(|r| !r) + } + _ => Err(pyo3::PyErr::new::( + "only '==' or '!=' are supported", + )), + } + } + + fn remove(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.entry(py, &key)? { + Entry::Occupied(entry) => { + let (_, value) = entry.remove(); + Ok(value) + } + Entry::Absent(_) => Err(pyo3::PyErr::new::(key.obj)), + } + } + + fn popitem(&self) -> pyo3::PyResult<(pyo3::PyObject, pyo3::PyObject)> { + let mut lock = self.raw.lock(); + + match lock.popitem() { + Some((key, val)) => Ok((key.obj, val)), + None => Err(pyo3::PyErr::new::(())), + } + } + + fn clear(&self, reuse: bool) { + let mut lock = self.raw.lock(); + lock.clear(); + + if !reuse { + lock.shrink_to_fit(); + } + } + + fn shrink_to_fit(&self) { + let mut lock = self.raw.lock(); + lock.shrink_to_fit(); + } + + fn setdefault( + &self, + py: pyo3::Python<'_>, + key: pyo3::PyObject, + default: pyo3::PyObject, + ) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.entry(py, &key)? { + Entry::Occupied(entry) => { + let (_, ref value) = entry.into_value(); + Ok(value.clone_ref(py)) + } + Entry::Absent(entry) => { + entry.insert(key, default.clone_ref(py))?; + Ok(default) + } + } + } + + fn items(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { + let lock = slf.raw.lock(); + let state = lock.observed.get(); + let iter = lock.iter(); + + let result = lrucache_items { + ptr: ObservedIterator::new(slf.as_ptr(), state), + iter: crate::mutex::Mutex::new(iter), + }; + + pyo3::Py::new(slf.py(), result) + } + + fn least_recently_used(&self, py: pyo3::Python<'_>) -> Option { + let lock = self.raw.lock(); + lock.least_recently_used().map(|x| x.0.obj.clone_ref(py)) + } + + fn most_recently_used(&self, py: pyo3::Python<'_>) -> Option { + let lock = self.raw.lock(); + lock.most_recently_used().map(|x| x.0.obj.clone_ref(py)) + } + + fn __getnewargs__(&self) -> (usize,) { + (0,) + } + + fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { + let lock = self.raw.lock(); + + let state = unsafe { + let list = pyo3::ffi::PyList_New(0); + if list.is_null() { + return Err(pyo3::PyErr::fetch(py)); + } + + for node in lock.iter() { + let (hk, val) = &(*node.as_ptr()).element; + + let tp = tuple!( + py, + 2, + 0 => hk.obj.clone_ref(py).as_ptr(), + 1 => val.clone_ref(py).as_ptr(), + ); + + if let Err(x) = tp { + pyo3::ffi::Py_DECREF(list); + return Err(x); + } + + if pyo3::ffi::PyList_Append(list, tp.unwrap_unchecked()) == -1 { + pyo3::ffi::Py_DECREF(list); + return Err(pyo3::PyErr::fetch(py)); + } + } + + let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); + let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity()); + + tuple!( + py, + 3, + 0 => maxsize, + 1 => list, + 2 => capacity, + )? + }; + + Ok(unsafe { pyo3::Py::from_owned_ptr(py, state) }) + } + + pub fn __setstate__(&self, py: pyo3::Python<'_>, state: pyo3::PyObject) -> pyo3::PyResult<()> { + let mut lock = self.raw.lock(); + lock.from_pickle(py, state.as_ptr()) + } + + pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + for node in self.raw.lock().iter() { + let value = unsafe { node.as_ref() }; + + visit.call(&value.element.0.obj)?; + visit.call(&value.element.1)?; + } + Ok(()) + } + + pub fn __clear__(&self) { + let mut lock = self.raw.lock(); + lock.clear() + } +} + +#[pyo3::pymethods] +impl lrucache_items { + fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { + slf + } + + #[allow(unused_mut)] + fn __next__(mut slf: pyo3::PyRefMut<'_, Self>) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { + let mut iter = slf.iter.lock(); + + slf.ptr.proceed(slf.py())?; + + if let Some(x) = iter.next() { + let (key, val) = unsafe { &x.as_ref().element }; + + tuple!( + slf.py(), + 2, + 0 => key.obj.clone_ref(slf.py()).into_ptr(), + 1 => val.clone_ref(slf.py()).into_ptr(), + ) + } else { + Err(pyo3::PyErr::new::(())) + } + } +} diff --git a/src/bridge/mod.rs b/src/bridge/mod.rs index e229422..95316b1 100644 --- a/src/bridge/mod.rs +++ b/src/bridge/mod.rs @@ -4,4 +4,5 @@ create_exception!(_core, CoreKeyError, pyo3::exceptions::PyException); pub mod cache; pub mod fifocache; +pub mod lrucache; pub mod rrcache; diff --git a/src/common.rs b/src/common.rs index b889c52..6064e0d 100644 --- a/src/common.rs +++ b/src/common.rs @@ -1,4 +1,4 @@ -#[inline(always)] +#[inline] pub fn pyobject_equal( py: pyo3::Python<'_>, arg1: *mut pyo3::ffi::PyObject, @@ -181,6 +181,15 @@ pub enum Entry { Absent(V), } +impl Entry { + pub fn map(self, f: impl FnOnce(O) -> T) -> Option { + match self { + Entry::Occupied(c) => Some(f(c)), + Entry::Absent(_) => None, + } + } +} + /// A trait for adding `try_find` and `try_find_entry` methods to [`hashbrown::HashTable`] pub trait TryFindMethods { /// Searches for an element in the table. @@ -257,23 +266,23 @@ impl TryFindMethods for hashbrown::raw::RawTable { /// Observe caches' changes #[derive(Debug)] -pub struct Observed(std::num::NonZeroU16); +pub struct Observed(u16); impl Observed { pub fn new() -> Self { - Self(unsafe { std::num::NonZeroU16::new_unchecked(1) }) + Self(0) } pub fn change(&mut self) { - self.0 = self - .0 - .checked_add(1) - .or_else(|| Some(unsafe { std::num::NonZeroU16::new_unchecked(1) })) - .unwrap(); + if self.0 == u16::MAX { + self.0 = 0; + } else { + self.0 = unsafe { self.0.unchecked_add(1) }; + } } pub fn get(&self) -> u16 { - self.0.get() + self.0 } } diff --git a/src/lib.rs b/src/lib.rs index 9984e78..9b855d4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ use pyo3::prelude::*; +mod linked_list; mod mutex; #[macro_use] @@ -20,6 +21,7 @@ fn _core(py: pyo3::Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/src/linked_list.rs b/src/linked_list.rs new file mode 100644 index 0000000..58a71cd --- /dev/null +++ b/src/linked_list.rs @@ -0,0 +1,195 @@ +use crate::common::PreHashObject; +use std::ptr::NonNull; + +/// A doubly-linked list implementation with optional head and tail nodes. +/// +/// This list maintains references to the first and last nodes, and tracks the total number of elements. +/// Uses `NonNull` pointers for efficient memory management and allows for constant-time +/// insertion and deletion at both ends of the list. +pub struct LinkedList { + pub head: Option>, // front + pub tail: Option>, // back + len: usize, +} + +/// A node in a doubly-linked list, containing a reference to the previous and next nodes, +/// and storing a key-value pair as its element. +/// +/// The node uses `NonNull` pointers for efficient memory management and allows for +/// constant-time insertion and deletion in the linked list. +pub struct Node { + pub prev: Option>, + pub next: Option>, + pub element: (PreHashObject, pyo3::PyObject), +} + +impl LinkedList { + #[inline] + pub fn new() -> Self { + Self { + head: None, + tail: None, + len: 0, + } + } + + pub fn push_back(&mut self, key: PreHashObject, val: pyo3::PyObject) -> NonNull { + unsafe { + let node = NonNull::new_unchecked(Box::into_raw(Box::new(Node { + prev: None, + next: None, + element: (key, val), + }))); + + if let Some(old) = self.tail { + (*old.as_ptr()).next = Some(node); + (*node.as_ptr()).prev = Some(old); + } else { + // means list is empty, so this node is also can be the front of list + debug_assert!(self.head.is_none(), "head is not None"); + self.head = Some(node); + } + + self.tail = Some(node); + self.len += 1; + node + } + } + + pub fn pop_front(&mut self) -> Option<(PreHashObject, pyo3::PyObject)> { + unsafe { + self.head.map(|node| { + let boxed_node = Box::from_raw(node.as_ptr()); + debug_assert!(boxed_node.prev.is_none(), "head.prev is not None"); + + self.head = boxed_node.next; + + match self.head { + None => self.tail = None, + // Not creating new mutable (unique!) references overlapping `element`. + Some(head) => (*head.as_ptr()).prev = None, + } + + debug_assert!(self.len > 0, "self.len is zero"); + self.len -= 1; + boxed_node.element + }) + } + } + + #[inline] + pub fn clear(&mut self) { + while self.pop_front().is_some() {} + } + + pub unsafe fn remove(&mut self, node: NonNull) -> (PreHashObject, pyo3::PyObject) { + let node = Box::from_raw(node.as_ptr()); + let result = node.element; + + match node.next { + Some(next) => (*next.as_ptr()).prev = node.prev, + None => { + // Means this node is our self.tail + self.tail = node.prev; + } + } + + match node.prev { + Some(prev) => (*prev.as_ptr()).next = node.next, + None => { + // Means this node is our self.head + self.head = node.next; + } + } + + self.len -= 1; + result + } + + pub unsafe fn move_back(&mut self, node: NonNull) { + if (*node.as_ptr()).next.is_none() { + // Means this node is our self.tail + return; + } + + // unlink + match (*node.as_ptr()).next { + Some(next) => (*next.as_ptr()).prev = (*node.as_ptr()).prev, + None => std::hint::unreachable_unchecked(), + } + + match (*node.as_ptr()).prev { + Some(prev) => (*prev.as_ptr()).next = (*node.as_ptr()).next, + None => { + // Means this node is our self.head + self.head = (*node.as_ptr()).next; + } + } + + (*node.as_ptr()).next = None; + (*node.as_ptr()).prev = None; + + // push_back again + if let Some(old) = self.tail { + (*old.as_ptr()).next = Some(node); + (*node.as_ptr()).prev = Some(old); + } else { + // means list is empty, so this node is also can be the front of list + debug_assert!(self.head.is_none(), "head is not None"); + self.head = Some(node); + } + + self.tail = Some(node); + } + + #[inline] + pub fn iter(&self) -> Iter { + Iter { + head: self.head, + len: self.len, + } + } +} + +pub struct Iter { + head: Option>, + len: usize, +} + +impl Iterator for Iter { + type Item = NonNull; + + #[inline] + fn next(&mut self) -> Option { + if self.len == 0 { + None + } else { + self.head.inspect(|node| unsafe { + self.len -= 1; + self.head = (*node.as_ptr()).next; + }) + } + } +} + +impl Drop for LinkedList { + fn drop(&mut self) { + struct DropGuard<'a>(&'a mut LinkedList); + + impl Drop for DropGuard<'_> { + fn drop(&mut self) { + // Continue the same loop we do below. This only runs when a destructor has + // panicked. If another one panics this will abort. + while self.0.pop_front().is_some() {} + } + } + + // Wrap self so that if a destructor panics, we can try to keep looping + let guard = DropGuard(self); + while guard.0.pop_front().is_some() {} + core::mem::forget(guard); + } +} + +unsafe impl Sync for Iter {} +unsafe impl Send for Iter {} diff --git a/src/policies/fifo.rs b/src/policies/fifo.rs index 6bbad33..667c5f0 100644 --- a/src/policies/fifo.rs +++ b/src/policies/fifo.rs @@ -1,5 +1,3 @@ -//! The FIFO policy, This is inspired by Rust's indexmap with some changes. - use crate::common::Entry; use crate::common::NoLifetimeSliceIter; use crate::common::Observed; @@ -87,11 +85,6 @@ impl FIFOPolicy { (self.table.capacity(), self.entries.capacity()) } - // #[inline] - // pub fn iter(&self) -> hashbrown::raw::RawIter<(PreHashObject, pyo3::PyObject)> { - // unsafe { self.table.iter() } - // } - #[inline] fn decrement_indexes(&mut self, start: usize, end: usize) { if start <= 1 && end == self.entries.len() && self.n_shifts < MAX_N_SHIFT { @@ -239,19 +232,26 @@ impl FIFOPolicy { return Ok(false); } - if self.entries.len() != other.entries.len() { + if self.table.len() != other.table.len() { return Ok(false); } - for index in 0..self.entries.len() { - let (key1, value1) = &self.entries[index]; - let (key2, value2) = &other.entries[index]; - - if key1.hash != key2.hash - || !key1.equal(py, key2)? - || !crate::common::pyobject_equal(py, value1.as_ptr(), value2.as_ptr())? - { - return Ok(false); + unsafe { + for index1 in self.table.iter().map(|x| x.as_ref()) { + let (key1, value1) = &self.entries[(*index1) - self.n_shifts]; + + match other.table.try_find(key1.hash, |x| { + key1.equal(py, &other.entries[(*x) - other.n_shifts].0) + })? { + Some(bucket) => { + let (_, value2) = &other.entries[(*bucket.as_ref()) - other.n_shifts]; + + if !crate::common::pyobject_equal(py, value1.as_ptr(), value2.as_ptr())? { + return Ok(false); + } + } + None => return Ok(false), + } } } diff --git a/src/policies/lru.rs b/src/policies/lru.rs new file mode 100644 index 0000000..aea0bff --- /dev/null +++ b/src/policies/lru.rs @@ -0,0 +1,339 @@ +use crate::common::Entry; +use crate::common::Observed; +use crate::common::PreHashObject; +use crate::common::TryFindMethods; +use crate::linked_list; + +type NotNullNode = std::ptr::NonNull; + +pub struct LRUPolicy { + table: hashbrown::raw::RawTable, + list: linked_list::LinkedList, + maxsize: std::num::NonZeroUsize, + pub observed: Observed, +} + +pub struct LRUPolicyOccupied<'a> { + instance: &'a mut LRUPolicy, + bucket: hashbrown::raw::Bucket, +} + +pub struct LRUPolicyAbsent<'a> { + instance: &'a mut LRUPolicy, + insert_slot: Option, +} + +impl LRUPolicy { + #[inline] + pub fn new(maxsize: usize, mut capacity: usize) -> pyo3::PyResult { + let maxsize = non_zero_or!(maxsize, isize::MAX as usize); + capacity = capacity.min(maxsize.get()); + + Ok(Self { + table: new_table!(capacity)?, + list: linked_list::LinkedList::new(), + maxsize, + observed: Observed::new(), + }) + } + + #[inline] + pub fn maxsize(&self) -> usize { + self.maxsize.get() + } + + #[inline] + pub fn len(&self) -> usize { + self.table.len() + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.table.is_empty() + } + + #[inline] + pub fn is_full(&self) -> bool { + self.table.len() == self.maxsize.get() + } + + #[inline] + pub fn capacity(&self) -> usize { + self.table.capacity() + } + + pub fn popitem(&mut self) -> Option<(PreHashObject, pyo3::PyObject)> { + let ret = self.list.head?; + + unsafe { + self.table + .remove_entry((*ret.as_ptr()).element.0.hash, |node| { + core::ptr::eq(node.as_ptr(), ret.as_ptr()) + }) + .expect("popitem key not found."); + } + + self.observed.change(); + Some(self.list.pop_front().unwrap()) + } + + #[rustfmt::skip] + pub fn entry( + &mut self, + py: pyo3::Python<'_>, + key: &PreHashObject, + ) -> pyo3::PyResult> { + match self + .table + .try_find(key.hash, |x| unsafe { x.as_ref().element.0.equal(py, key) })? + { + Some(bucket) => { + unsafe { + self.list.move_back(*bucket.as_ptr()); + } + + Ok( + Entry::Occupied(LRUPolicyOccupied { instance: self, bucket }) + ) + } + None => { + Ok( + Entry::Absent(LRUPolicyAbsent { instance: self, insert_slot: None }) + ) + }, + } + } + + #[rustfmt::skip] + pub fn entry_with_slot( + &mut self, + py: pyo3::Python<'_>, + key: &PreHashObject, + ) -> pyo3::PyResult> { + match self + .table + .try_find_or_find_insert_slot( + key.hash, + |x| unsafe { x.as_ref().element.0.equal(py, key) }, + |x| unsafe { x.as_ref().element.0.hash } + )? { + Ok(bucket) => { + unsafe { + self.list.move_back(*bucket.as_ptr()); + } + + Ok( + Entry::Occupied(LRUPolicyOccupied { instance: self, bucket }) + ) + } + Err(slot) => { + Ok( + Entry::Absent(LRUPolicyAbsent { instance: self, insert_slot: Some(slot) }) + ) + }, + } + } + + pub fn lookup( + &mut self, + py: pyo3::Python<'_>, + key: &PreHashObject, + ) -> pyo3::PyResult> { + let entry = self.entry(py, key)?; + + Ok(entry.map(|x| unsafe { &x.bucket.as_ref().as_ref().element.1 })) + } + + pub fn peek( + &self, + py: pyo3::Python<'_>, + key: &PreHashObject, + ) -> pyo3::PyResult> { + let result = self + .table + .try_find(key.hash, |x| unsafe { x.as_ref().element.0.equal(py, key) })? + .map(|x| unsafe { &x.as_ref().as_ref().element.1 }); + + Ok(result) + } + + #[inline] + pub fn clear(&mut self) { + self.table.clear(); + self.list.clear(); + self.observed.change(); + } + + #[inline] + pub fn shrink_to_fit(&mut self) { + self.table + .shrink_to(self.table.len(), |x| unsafe { x.as_ref().element.0.hash }); + + self.observed.change(); + } + + pub fn equal(&self, py: pyo3::Python<'_>, other: &Self) -> pyo3::PyResult { + if self.maxsize != other.maxsize { + return Ok(false); + } + + if self.table.len() != other.table.len() { + return Ok(false); + } + + unsafe { + for node in self.table.iter().map(|x| x.as_ref()) { + let (key1, value1) = &node.as_ref().element; + + match other + .table + .try_find(key1.hash, |x| key1.equal(py, &x.as_ref().element.0))? + { + Some(bucket) => { + let (_, value2) = &bucket.as_ref().as_ref().element; + + if !crate::common::pyobject_equal(py, value1.as_ptr(), value2.as_ptr())? { + return Ok(false); + } + } + None => return Ok(false), + } + } + } + + Ok(true) + } + + pub fn extend(&mut self, py: pyo3::Python<'_>, iterable: pyo3::PyObject) -> pyo3::PyResult<()> { + use pyo3::types::{PyAnyMethods, PyDictMethods}; + + if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { + let dict = unsafe { + iterable + .downcast_bound::(py) + .unwrap_unchecked() + }; + + for (key, value) in dict.iter() { + let hk = + unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; + + match self.entry_with_slot(py, &hk)? { + Entry::Occupied(mut entry) => { + entry.update(value.unbind())?; + } + Entry::Absent(entry) => { + entry.insert(hk, value.unbind())?; + } + } + } + } else { + for pair in iterable.bind(py).try_iter()? { + let (key, value) = pair?.extract::<(pyo3::PyObject, pyo3::PyObject)>()?; + + let hk = PreHashObject::from_pyobject(py, key)?; + + match self.entry_with_slot(py, &hk)? { + Entry::Occupied(mut entry) => { + entry.update(value)?; + } + Entry::Absent(entry) => { + entry.insert(hk, value)?; + } + } + } + } + + Ok(()) + } + + pub fn iter(&self) -> linked_list::Iter { + self.list.iter() + } + + pub fn least_recently_used(&self) -> Option<&(PreHashObject, pyo3::PyObject)> { + self.list.head.map(|x| unsafe { &x.as_ref().element }) + } + + pub fn most_recently_used(&self) -> Option<&(PreHashObject, pyo3::PyObject)> { + self.list.tail.map(|x| unsafe { &x.as_ref().element }) + } + + #[allow(clippy::wrong_self_convention)] + #[inline] + pub fn from_pickle( + &mut self, + py: pyo3::Python<'_>, + state: *mut pyo3::ffi::PyObject, + ) -> pyo3::PyResult<()> { + unsafe { + tuple!(check state, size=3)?; + let (maxsize, iterable, capacity) = extract_pickle_tuple!(py, state); + + let mut new = Self::new(maxsize, capacity)?; + new.extend(py, iterable)?; + + *self = new; + Ok(()) + } + } +} + +impl<'a> LRUPolicyOccupied<'a> { + #[inline] + pub fn update(&mut self, value: pyo3::PyObject) -> pyo3::PyResult { + let item = unsafe { self.bucket.as_mut() }; + unsafe { + self.instance.list.move_back(*item); + } + + // In update we don't need to change this; because this does not change the memory address ranges + // self.instance.observed.change(); + + Ok(unsafe { std::mem::replace(&mut item.as_mut().element.1, value) }) + } + + #[inline] + pub fn remove(self) -> (PreHashObject, pyo3::PyObject) { + // let (PreHashObject { hash, .. }, _) = &self.instance.entries[self.index - self.instance.n_shifts]; + let (item, _) = unsafe { self.instance.table.remove(self.bucket) }; + let item = unsafe { self.instance.list.remove(item) }; + + self.instance.observed.change(); + item + } + + #[inline] + pub fn into_value(self) -> &'a mut (PreHashObject, pyo3::PyObject) { + let item = unsafe { self.bucket.as_mut() }; + unsafe { &mut item.as_mut().element } + } +} + +impl LRUPolicyAbsent<'_> { + #[inline] + pub fn insert(self, key: PreHashObject, value: pyo3::PyObject) -> pyo3::PyResult<()> { + if self.instance.table.len() >= self.instance.maxsize.get() { + self.instance.popitem(); + } + + let hash = key.hash; + let node = self.instance.list.push_back(key, value); + + match self.insert_slot { + Some(slot) => unsafe { + self.instance.table.insert_in_slot(hash, slot, node); + }, + None => { + self.instance + .table + .insert(hash, node, |x| unsafe { x.as_ref().element.0.hash }); + } + } + + self.instance.observed.change(); + Ok(()) + } +} + +unsafe impl Send for LRUPolicy {} diff --git a/src/policies/mod.rs b/src/policies/mod.rs index 6a070b4..d666e55 100644 --- a/src/policies/mod.rs +++ b/src/policies/mod.rs @@ -1,3 +1,4 @@ pub mod fifo; +pub mod lru; pub mod nopolicy; pub mod random; From 83d318f1ae4a5061f58a7bef43bbda68b5c98c2b Mon Sep 17 00:00:00 2001 From: awolverp Date: Sat, 12 Apr 2025 18:53:30 +0330 Subject: [PATCH 15/37] Refactor some part of LFUCache (not completed) Optimize some operations in LRUCache --- src/bridge/lfucache.rs | 298 +++++++++++++++++++++++++++++++++++++++ src/bridge/mod.rs | 1 + src/common.rs | 16 +-- src/lazyheap.rs | 173 +++++++++++++++++++++++ src/lib.rs | 2 + src/policies/lfu.rs | 313 +++++++++++++++++++++++++++++++++++++++++ src/policies/lru.rs | 21 +-- src/policies/mod.rs | 1 + 8 files changed, 807 insertions(+), 18 deletions(-) create mode 100644 src/bridge/lfucache.rs create mode 100644 src/lazyheap.rs create mode 100644 src/policies/lfu.rs diff --git a/src/bridge/lfucache.rs b/src/bridge/lfucache.rs new file mode 100644 index 0000000..f84646d --- /dev/null +++ b/src/bridge/lfucache.rs @@ -0,0 +1,298 @@ +use crate::common::Entry; +use crate::common::PreHashObject; + +#[pyo3::pyclass(module = "cachebox._core", frozen)] +pub struct LFUCache { + raw: crate::mutex::Mutex, +} + +#[pyo3::pymethods] +impl LFUCache { + #[new] + #[pyo3(signature=(maxsize, *, capacity=0))] + fn __new__(maxsize: usize, capacity: usize) -> pyo3::PyResult { + let raw = crate::policies::lfu::LFUPolicy::new(maxsize, capacity)?; + + let self_ = Self { + raw: crate::mutex::Mutex::new(raw), + }; + Ok(self_) + } + + fn _state(&self) -> u16 { + self.raw.lock().observed.get() + } + + fn maxsize(&self) -> usize { + self.raw.lock().maxsize() + } + + fn capacity(&self) -> usize { + self.raw.lock().capacity() + } + + fn __len__(&self) -> usize { + self.raw.lock().len() + } + + fn __sizeof__(&self) -> usize { + let lock = self.raw.lock(); + + lock.capacity() + * (std::mem::size_of::() + std::mem::size_of::()) + } + + fn __contains__(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.lookup(py, &key)? { + Some(_) => Ok(true), + None => Ok(false), + } + } + + fn is_empty(&self) -> bool { + self.raw.lock().is_empty() + } + + fn is_full(&self) -> bool { + self.raw.lock().is_full() + } + + #[pyo3(signature=(key, value, freq=0usize))] + fn insert( + &self, + py: pyo3::Python<'_>, + key: pyo3::PyObject, + value: pyo3::PyObject, + freq: usize, + ) -> pyo3::PyResult> { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.entry_with_slot(py, &key)? { + Entry::Occupied(mut entry) => Ok(Some(entry.update(value)?)), + Entry::Absent(entry) => { + entry.insert(key, value, freq)?; + Ok(None) + } + } + } + + fn get(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.lookup(py, &key)? { + Some(val) => Ok(val.clone_ref(py)), + None => Err(pyo3::PyErr::new::(key.obj)), + } + } + + fn peek(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let lock = self.raw.lock(); + + match lock.peek(py, &key)? { + Some(val) => Ok(val.clone_ref(py)), + None => Err(pyo3::PyErr::new::(key.obj)), + } + } + + fn update( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python<'_>, + iterable: pyo3::PyObject, + ) -> pyo3::PyResult<()> { + if slf.as_ptr() == iterable.as_ptr() { + return Ok(()); + } + + let mut lock = slf.raw.lock(); + lock.extend(py, iterable) + } + + fn __richcmp__( + slf: pyo3::PyRef<'_, Self>, + other: pyo3::PyObject, + op: pyo3::class::basic::CompareOp, + ) -> pyo3::PyResult { + let other = other.extract::>(slf.py())?; + + match op { + pyo3::class::basic::CompareOp::Eq => { + if slf.as_ptr() == other.as_ptr() { + return Ok(true); + } + + let t1 = slf.raw.lock(); + let t2 = other.raw.lock(); + t1.equal(slf.py(), &t2) + } + pyo3::class::basic::CompareOp::Ne => { + if slf.as_ptr() == other.as_ptr() { + return Ok(false); + } + + let t1 = slf.raw.lock(); + let t2 = other.raw.lock(); + t1.equal(slf.py(), &t2).map(|r| !r) + } + _ => Err(pyo3::PyErr::new::( + "only '==' or '!=' are supported", + )), + } + } + + fn remove(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.entry(py, &key)? { + Entry::Occupied(entry) => { + let (_, value, _) = entry.remove(); + Ok(value) + } + Entry::Absent(_) => Err(pyo3::PyErr::new::(key.obj)), + } + } + + fn popitem(&self) -> pyo3::PyResult<(pyo3::PyObject, pyo3::PyObject)> { + let mut lock = self.raw.lock(); + + match lock.popitem() { + Some((key, val, _)) => Ok((key.obj, val)), + None => Err(pyo3::PyErr::new::(())), + } + } + + fn clear(&self, reuse: bool) { + let mut lock = self.raw.lock(); + lock.clear(); + + if !reuse { + lock.shrink_to_fit(); + } + } + + fn shrink_to_fit(&self) { + let mut lock = self.raw.lock(); + lock.shrink_to_fit(); + } + + #[pyo3(signature=(key, default, freq=0usize))] + fn setdefault( + &self, + py: pyo3::Python<'_>, + key: pyo3::PyObject, + default: pyo3::PyObject, + freq: usize, + ) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.entry(py, &key)? { + Entry::Occupied(entry) => { + let node = entry.into_value(); + Ok(unsafe { node.as_ref().1.clone_ref(py) }) + } + Entry::Absent(entry) => { + entry.insert(key, default.clone_ref(py), freq)?; + Ok(default) + } + } + } + + // fn items(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { + // let lock = slf.raw.lock(); + // let state = lock.observed.get(); + // let iter = lock.iter(); + + // let result = lrucache_items { + // ptr: ObservedIterator::new(slf.as_ptr(), state), + // iter: crate::mutex::Mutex::new(iter), + // }; + + // pyo3::Py::new(slf.py(), result) + // } + + // fn least_recently_used(&self, py: pyo3::Python<'_>) -> Option { + // let lock = self.raw.lock(); + // lock.least_recently_used().map(|x| x.0.obj.clone_ref(py)) + // } + + // fn most_recently_used(&self, py: pyo3::Python<'_>) -> Option { + // let lock = self.raw.lock(); + // lock.most_recently_used().map(|x| x.0.obj.clone_ref(py)) + // } + + // fn __getnewargs__(&self) -> (usize,) { + // (0,) + // } + + // fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { + // let lock = self.raw.lock(); + + // let state = unsafe { + // let list = pyo3::ffi::PyList_New(0); + // if list.is_null() { + // return Err(pyo3::PyErr::fetch(py)); + // } + + // for node in lock.iter() { + // let (hk, val) = &(*node.as_ptr()).element; + + // let tp = tuple!( + // py, + // 2, + // 0 => hk.obj.clone_ref(py).as_ptr(), + // 1 => val.clone_ref(py).as_ptr(), + // ); + + // if let Err(x) = tp { + // pyo3::ffi::Py_DECREF(list); + // return Err(x); + // } + + // if pyo3::ffi::PyList_Append(list, tp.unwrap_unchecked()) == -1 { + // pyo3::ffi::Py_DECREF(list); + // return Err(pyo3::PyErr::fetch(py)); + // } + // } + + // let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); + // let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity()); + + // tuple!( + // py, + // 3, + // 0 => maxsize, + // 1 => list, + // 2 => capacity, + // )? + // }; + + // Ok(unsafe { pyo3::Py::from_owned_ptr(py, state) }) + // } + + // pub fn __setstate__(&self, py: pyo3::Python<'_>, state: pyo3::PyObject) -> pyo3::PyResult<()> { + // let mut lock = self.raw.lock(); + // lock.from_pickle(py, state.as_ptr()) + // } + + // pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + // for node in self.raw.lock().iter() { + // let value = unsafe { node.as_ref() }; + + // visit.call(&value.element.0.obj)?; + // visit.call(&value.element.1)?; + // } + // Ok(()) + // } + + // pub fn __clear__(&self) { + // let mut lock = self.raw.lock(); + // lock.clear() + // } +} diff --git a/src/bridge/mod.rs b/src/bridge/mod.rs index 95316b1..005b21f 100644 --- a/src/bridge/mod.rs +++ b/src/bridge/mod.rs @@ -6,3 +6,4 @@ pub mod cache; pub mod fifocache; pub mod lrucache; pub mod rrcache; +pub mod lfucache; diff --git a/src/common.rs b/src/common.rs index 6064e0d..2e0db16 100644 --- a/src/common.rs +++ b/src/common.rs @@ -181,14 +181,14 @@ pub enum Entry { Absent(V), } -impl Entry { - pub fn map(self, f: impl FnOnce(O) -> T) -> Option { - match self { - Entry::Occupied(c) => Some(f(c)), - Entry::Absent(_) => None, - } - } -} +// impl Entry { +// pub fn map(self, f: impl FnOnce(O) -> T) -> Option { +// match self { +// Entry::Occupied(c) => Some(f(c)), +// Entry::Absent(_) => None, +// } +// } +// } /// A trait for adding `try_find` and `try_find_entry` methods to [`hashbrown::HashTable`] pub trait TryFindMethods { diff --git a/src/lazyheap.rs b/src/lazyheap.rs new file mode 100644 index 0000000..70d6ac8 --- /dev/null +++ b/src/lazyheap.rs @@ -0,0 +1,173 @@ +use std::ptr::NonNull; + +/// A heap data structure that lazily maintains sorting order. +/// +/// `LazyHeap` allows for efficient insertion of elements without immediately sorting, +/// with the ability to defer sorting until necessary. This can improve performance +/// in scenarios where sorting is not immediately required. +/// +/// ``` +/// let mut heap = LazyHeap::new(); +/// heap.push(5); +/// ``` +pub struct LazyHeap { + data: std::collections::VecDeque>, + is_sorted: bool, +} + +/// An iterator for traversing elements in a `LazyHeap`. +/// +/// This iterator allows sequential access to the elements of a `LazyHeap`, +/// maintaining the current position and total length during iteration. +/// +/// # Safety +/// +/// This iterator uses raw pointers and requires careful management to ensure +/// memory safety and prevent use-after-free or dangling pointer scenarios. +pub struct Iter { + slice: *const NonNull, + index: usize, + len: usize, +} + +impl LazyHeap { + #[inline] + pub fn new() -> Self { + Self { + data: std::collections::VecDeque::new(), + is_sorted: true, + } + } + + #[inline] + pub fn queue_sort(&mut self) { + self.is_sorted = false; + } + + #[inline] + pub fn front(&self) -> Option<&NonNull> { + debug_assert!(self.is_sorted, "heap not sorted"); + self.data.front() + } + + pub fn push(&mut self, value: T) -> NonNull { + unsafe { + let node: NonNull = NonNull::new_unchecked(Box::into_raw(Box::new(value))).cast(); + + self.data.push_back(node); + self.is_sorted = false; + + node + } + } + + #[inline] + pub fn sort_by(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) { + if self.is_sorted { + return; + } + + if self.data.len() > 1 { + unsafe { + self.data + .make_contiguous() + .sort_by(|a, b| compare(a.as_ref(), b.as_ref())); + } + } + + self.is_sorted = true; + } + + fn unlink_front(&mut self) -> Option { + let node = self.data.pop_front()?; + let node = unsafe { Box::from_raw(node.as_ptr()) }; + Some(*node) + } + + #[inline] + pub fn pop_front(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) -> Option { + self.sort_by(compare); + self.unlink_front() + } + + fn unlink_back(&mut self) -> Option { + let node = self.data.pop_back()?; + let node = unsafe { Box::from_raw(node.as_ptr()) }; + Some(*node) + } + + #[inline] + pub fn pop_back(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) -> Option { + self.sort_by(compare); + self.unlink_back() + } + + // #[inline] + // pub fn get(&self, index: usize) -> Option<&NonNull> { + // self.data.get(index) + // } + + pub fn remove(&mut self, node: NonNull, compare: F) -> T + where + F: Fn(&T, &T) -> std::cmp::Ordering, + { + debug_assert!(!self.data.is_empty()); + + if self.data.len() == 1 { + return self.pop_back(compare).unwrap(); + } + + self.sort_by(compare); + + let index = self.data.iter().position(|x| node == *x).unwrap(); + + let node = unsafe { self.data.remove(index).unwrap_unchecked() }; + let boxed_node = unsafe { Box::from_raw(node.as_ptr()) }; + *boxed_node + } + + #[inline] + pub fn clear(&mut self) { + while self.unlink_back().is_some() {} + self.is_sorted = true; + } + + pub fn shrink_to_fit(&mut self) { + self.data.shrink_to_fit(); + } +} + +impl Drop for LazyHeap { + fn drop(&mut self) { + struct DropGuard<'a, T>(&'a mut LazyHeap); + + impl Drop for DropGuard<'_, T> { + fn drop(&mut self) { + // Continue the same loop we do below. This only runs when a destructor has + // panicked. If another one panics this will abort. + while self.0.unlink_back().is_some() {} + } + } + + // Wrap self so that if a destructor panics, we can try to keep looping + let guard = DropGuard(self); + while guard.0.unlink_back().is_some() {} + core::mem::forget(guard); + } +} + +impl Iterator for Iter { + type Item = NonNull; + + fn next(&mut self) -> Option { + if self.index == self.len { + None + } else { + let value = unsafe { self.slice.add(self.index) }; + self.index += 1; + Some(unsafe { *value }) + } + } +} + +unsafe impl Send for Iter {} diff --git a/src/lib.rs b/src/lib.rs index 9b855d4..3afbb93 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ use pyo3::prelude::*; +mod lazyheap; mod linked_list; mod mutex; @@ -22,6 +23,7 @@ fn _core(py: pyo3::Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/src/policies/lfu.rs b/src/policies/lfu.rs new file mode 100644 index 0000000..3563b6b --- /dev/null +++ b/src/policies/lfu.rs @@ -0,0 +1,313 @@ +use crate::common::Entry; +use crate::common::Observed; +use crate::common::PreHashObject; +use crate::common::TryFindMethods; +use crate::lazyheap; +use std::ptr::NonNull; + +type TupleValue = (PreHashObject, pyo3::PyObject, usize); + +pub struct LFUPolicy { + table: hashbrown::raw::RawTable>, + heap: lazyheap::LazyHeap, + maxsize: std::num::NonZeroUsize, + pub observed: Observed, +} + +pub struct LFUPolicyOccupied<'a> { + instance: &'a mut LFUPolicy, + bucket: hashbrown::raw::Bucket>, +} + +pub struct LFUPolicyAbsent<'a> { + instance: &'a mut LFUPolicy, + insert_slot: Option, +} + +impl LFUPolicy { + #[inline] + pub fn new(maxsize: usize, mut capacity: usize) -> pyo3::PyResult { + let maxsize = non_zero_or!(maxsize, isize::MAX as usize); + capacity = capacity.min(maxsize.get()); + + Ok(Self { + table: new_table!(capacity)?, + heap: lazyheap::LazyHeap::new(), + maxsize, + observed: Observed::new(), + }) + } + + #[inline] + pub fn maxsize(&self) -> usize { + self.maxsize.get() + } + + #[inline] + pub fn len(&self) -> usize { + self.table.len() + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.table.is_empty() + } + + #[inline] + pub fn is_full(&self) -> bool { + self.table.len() == self.maxsize.get() + } + + #[inline] + pub fn capacity(&self) -> usize { + self.table.capacity() + } + + pub fn popitem(&mut self) -> Option { + self.heap.sort_by(|a, b| a.2.cmp(&b.2)); + let front = self.heap.front()?; + + unsafe { + self.table + .remove_entry(front.as_ref().0.hash, |x| { + std::ptr::eq(x.as_ptr(), front.as_ptr()) + }) + .unwrap(); + } + + self.observed.change(); + Some(self.heap.pop_front(|a, b| a.2.cmp(&b.2)).unwrap()) + } + + #[rustfmt::skip] + pub fn entry( + &mut self, + py: pyo3::Python<'_>, + key: &PreHashObject, + ) -> pyo3::PyResult> { + match self + .table + .try_find(key.hash, |ptr| unsafe { ptr.as_ref().0.equal(py, key) })? + { + Some(bucket) => { + Ok( + Entry::Occupied(LFUPolicyOccupied { instance: self, bucket }) + ) + }, + None => { + Ok( + Entry::Absent(LFUPolicyAbsent { instance: self, insert_slot: None }) + ) + } + } + } + + #[rustfmt::skip] + pub fn entry_with_slot( + &mut self, + py: pyo3::Python<'_>, + key: &PreHashObject, + ) -> pyo3::PyResult> { + match self.table.try_find_or_find_insert_slot( + key.hash, + |ptr| unsafe { ptr.as_ref().0.equal(py, key) }, + |ptr| unsafe { ptr.as_ref().0.hash }, + )? { + Ok(bucket) => { + Ok( + Entry::Occupied(LFUPolicyOccupied { instance: self, bucket }) + ) + }, + Err(slot) => { + Ok( + Entry::Absent(LFUPolicyAbsent { instance: self, insert_slot: Some(slot) }) + ) + } + } + } + + pub fn lookup( + &mut self, + py: pyo3::Python<'_>, + key: &PreHashObject, + ) -> pyo3::PyResult> { + match self.entry(py, key)? { + Entry::Occupied(x) => unsafe { + x.bucket.as_mut().as_mut().2 += 1; + x.instance.heap.queue_sort(); + + Ok(Some(&x.bucket.as_ref().as_ref().1)) + }, + Entry::Absent(_) => Ok(None), + } + } + + pub fn peek( + &self, + py: pyo3::Python<'_>, + key: &PreHashObject, + ) -> pyo3::PyResult> { + let result = self + .table + .try_find(key.hash, |x| unsafe { x.as_ref().0.equal(py, key) })? + .map(|x| unsafe { &x.as_ref().as_ref().1 }); + + Ok(result) + } + + #[inline] + pub fn clear(&mut self) { + self.table.clear(); + self.heap.clear(); + self.observed.change(); + } + + #[inline] + pub fn shrink_to_fit(&mut self) { + self.table + .shrink_to(self.table.len(), |x| unsafe { x.as_ref().0.hash }); + + self.heap.shrink_to_fit(); + self.observed.change(); + } + + pub fn equal(&self, py: pyo3::Python<'_>, other: &Self) -> pyo3::PyResult { + if self.maxsize != other.maxsize { + return Ok(false); + } + + if self.table.len() != other.table.len() { + return Ok(false); + } + + unsafe { + for node in self.table.iter().map(|x| x.as_ref()) { + let (key1, value1, _) = node.as_ref(); + + match other + .table + .try_find(key1.hash, |x| key1.equal(py, &x.as_ref().0))? + { + Some(bucket) => { + let (_, value2, _) = bucket.as_ref().as_ref(); + + if !crate::common::pyobject_equal(py, value1.as_ptr(), value2.as_ptr())? { + return Ok(false); + } + } + None => return Ok(false), + } + } + } + + Ok(true) + } + + pub fn extend(&mut self, py: pyo3::Python<'_>, iterable: pyo3::PyObject) -> pyo3::PyResult<()> { + use pyo3::types::{PyAnyMethods, PyDictMethods}; + + if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { + let dict = unsafe { + iterable + .downcast_bound::(py) + .unwrap_unchecked() + }; + + for (key, value) in dict.iter() { + let hk = + unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; + + match self.entry_with_slot(py, &hk)? { + Entry::Occupied(mut entry) => { + entry.update(value.unbind())?; + } + Entry::Absent(entry) => { + entry.insert(hk, value.unbind(), 0)?; + } + } + } + } else { + for pair in iterable.bind(py).try_iter()? { + let (key, value) = pair?.extract::<(pyo3::PyObject, pyo3::PyObject)>()?; + + let hk = PreHashObject::from_pyobject(py, key)?; + + match self.entry_with_slot(py, &hk)? { + Entry::Occupied(mut entry) => { + entry.update(value)?; + } + Entry::Absent(entry) => { + entry.insert(hk, value, 0)?; + } + } + } + } + + Ok(()) + } +} + +impl LFUPolicyOccupied<'_> { + #[inline] + pub fn update(&mut self, value: pyo3::PyObject) -> pyo3::PyResult { + let item = unsafe { self.bucket.as_mut() }; + unsafe { + item.as_mut().2 += 1; + } + + self.instance.heap.queue_sort(); + + // In update we don't need to change this; because this does not change the memory address ranges + // self.instance.observed.change(); + + Ok(unsafe { std::mem::replace(&mut item.as_mut().1, value) }) + } + + #[inline] + pub fn remove(self) -> TupleValue { + let (item, _) = unsafe { self.instance.table.remove(self.bucket) }; + let item = self.instance.heap.remove(item, |a, b| a.2.cmp(&b.2)); + + self.instance.observed.change(); + item + } + + #[inline] + pub fn into_value(self) -> NonNull { + let item = unsafe { self.bucket.as_mut() }; + *item + } +} + +impl LFUPolicyAbsent<'_> { + #[inline] + pub fn insert( + self, + key: PreHashObject, + value: pyo3::PyObject, + freq: usize, + ) -> pyo3::PyResult<()> { + if self.instance.table.len() >= self.instance.maxsize.get() { + self.instance.popitem(); + } + + let hash = key.hash; + let node = self.instance.heap.push((key, value, freq)); + + match self.insert_slot { + Some(slot) => unsafe { + self.instance.table.insert_in_slot(hash, slot, node); + }, + None => { + self.instance + .table + .insert(hash, node, |x| unsafe { x.as_ref().0.hash }); + } + } + + self.instance.observed.change(); + Ok(()) + } +} + +unsafe impl Send for LFUPolicy {} diff --git a/src/policies/lru.rs b/src/policies/lru.rs index aea0bff..aea0793 100644 --- a/src/policies/lru.rs +++ b/src/policies/lru.rs @@ -88,10 +88,6 @@ impl LRUPolicy { .try_find(key.hash, |x| unsafe { x.as_ref().element.0.equal(py, key) })? { Some(bucket) => { - unsafe { - self.list.move_back(*bucket.as_ptr()); - } - Ok( Entry::Occupied(LRUPolicyOccupied { instance: self, bucket }) ) @@ -118,10 +114,6 @@ impl LRUPolicy { |x| unsafe { x.as_ref().element.0.hash } )? { Ok(bucket) => { - unsafe { - self.list.move_back(*bucket.as_ptr()); - } - Ok( Entry::Occupied(LRUPolicyOccupied { instance: self, bucket }) ) @@ -139,9 +131,14 @@ impl LRUPolicy { py: pyo3::Python<'_>, key: &PreHashObject, ) -> pyo3::PyResult> { - let entry = self.entry(py, key)?; + match self.entry(py, key)? { + Entry::Occupied(x) => unsafe { + x.instance.list.move_back(*x.bucket.as_ptr()); - Ok(entry.map(|x| unsafe { &x.bucket.as_ref().as_ref().element.1 })) + Ok(Some(&x.bucket.as_ref().as_ref().element.1)) + }, + Entry::Absent(_) => Ok(None), + } } pub fn peek( @@ -305,6 +302,10 @@ impl<'a> LRUPolicyOccupied<'a> { #[inline] pub fn into_value(self) -> &'a mut (PreHashObject, pyo3::PyObject) { + unsafe { + self.instance.list.move_back(*self.bucket.as_ptr()); + } + let item = unsafe { self.bucket.as_mut() }; unsafe { &mut item.as_mut().element } } diff --git a/src/policies/mod.rs b/src/policies/mod.rs index d666e55..fb89981 100644 --- a/src/policies/mod.rs +++ b/src/policies/mod.rs @@ -1,4 +1,5 @@ pub mod fifo; pub mod lru; +pub mod lfu; pub mod nopolicy; pub mod random; From a188b58f7117ee769fdbc5f08403a4dad8f06d29 Mon Sep 17 00:00:00 2001 From: awolverp Date: Sun, 13 Apr 2025 12:23:31 +0330 Subject: [PATCH 16/37] * Refactor and optimize LFUCache * Now the LFUCache using VecDeque instead of Vec * Update docstrings * Add more strictness for loading pickle objects --- python/cachebox/__init__.py | 6 + python/cachebox/_cachebox.py | 286 +++++++++++++++++++++++++++++++++-- python/cachebox/_core.pyi | 9 ++ python/tests/mixin.py | 2 +- python/tests/test_caches.py | 89 +++++++++++ src/bridge/lfucache.rs | 223 ++++++++++++++++----------- src/bridge/mod.rs | 4 +- src/common.rs | 39 ++++- src/lazyheap.rs | 41 +++-- src/policies/fifo.rs | 18 ++- src/policies/lfu.rs | 55 +++++++ src/policies/lru.rs | 17 ++- src/policies/mod.rs | 2 +- src/policies/nopolicy.rs | 25 ++- src/policies/random.rs | 25 ++- 15 files changed, 712 insertions(+), 129 deletions(-) create mode 100644 python/cachebox/_core.pyi diff --git a/python/cachebox/__init__.py b/python/cachebox/__init__.py index e774c5d..14152c8 100644 --- a/python/cachebox/__init__.py +++ b/python/cachebox/__init__.py @@ -1,8 +1,14 @@ +from ._core import ( + __author__ as __author__, + __version__ as __version__, +) + from ._cachebox import ( Cache as Cache, FIFOCache as FIFOCache, RRCache as RRCache, LRUCache as LRUCache, + LFUCache as LFUCache, BaseCacheImpl as BaseCacheImpl, IteratorView as IteratorView, ) diff --git a/python/cachebox/_cachebox.py b/python/cachebox/_cachebox.py index 2c3c330..fd34a77 100644 --- a/python/cachebox/_cachebox.py +++ b/python/cachebox/_cachebox.py @@ -30,9 +30,12 @@ def _items_to_str(items, length): class BaseCacheImpl(typing.Generic[KT, VT]): """ - This is the base class of all cache classes such as Cache, FIFOCache, ... + Base implementation for cache classes in the cachebox library. + + This abstract base class defines the generic structure for cache implementations, + supporting different key and value types through generic type parameters. + Serves as a foundation for specific cache variants like Cache and FIFOCache. """ - pass @@ -247,8 +250,11 @@ def __iter__(self) -> IteratorView[KT]: return self.keys() def __repr__(self) -> str: - return "{}[{}/{}]({})".format( - type(self).__name__, + cls = type(self) + + return "%s.%s[%d/%d](%s)" % ( + cls.__module__, + cls.__name__, len(self._raw), self._raw.maxsize(), _items_to_str(self._raw.items(), len(self._raw)), @@ -473,8 +479,11 @@ def __iter__(self) -> IteratorView[KT]: return self.keys() def __repr__(self) -> str: - return "{}[{}/{}]({})".format( - type(self).__name__, + cls = type(self) + + return "%s.%s[%d/%d](%s)" % ( + cls.__module__, + cls.__name__, len(self._raw), self._raw.maxsize(), _items_to_str(self._raw.items(), len(self._raw)), @@ -677,8 +686,11 @@ def __iter__(self) -> IteratorView[KT]: return self.keys() def __repr__(self) -> str: - return "{}[{}/{}]({})".format( - type(self).__name__, + cls = type(self) + + return "%s.%s[%d/%d](%s)" % ( + cls.__module__, + cls.__name__, len(self._raw), self._raw.maxsize(), _items_to_str(self._raw.items(), len(self._raw)), @@ -798,6 +810,10 @@ def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Uni return self._raw.setdefault(key, default) def popitem(self) -> typing.Tuple[KT, VT]: + """ + Removes the least recently used item from the cache and returns it as a (key, value) tuple. + Raises KeyError if the cache is empty. + """ try: return self._raw.popitem() except _core.CoreKeyError: @@ -905,9 +921,259 @@ def __iter__(self) -> IteratorView[KT]: return self.keys() def __repr__(self) -> str: - return "{}[{}/{}]({})".format( - type(self).__name__, + cls = type(self) + + return "%s.%s[%d/%d](%s)" % ( + cls.__module__, + cls.__name__, len(self._raw), self._raw.maxsize(), _items_to_str(self._raw.items(), len(self._raw)), ) + + +class LFUCache(BaseCacheImpl[KT, VT]): + """ + A thread-safe Least Frequently Used (LFU) cache implementation. + + This cache removes elements that have been accessed the least number of times, + regardless of their access time. It provides methods for inserting, retrieving, + and managing cache entries with configurable maximum size and initial capacity. + + Key features: + - Thread-safe cache with LFU eviction policy + - Configurable maximum size and initial capacity + - Supports initialization from dictionaries or iterables + - Provides methods for key-value management similar to dict + """ + + def __init__( + self, + maxsize: int, + iterable: typing.Union[typing.Union[dict, typing.Iterable[tuple]], None] = None, + *, + capacity: int = 0, + ) -> None: + """ + Initialize a new Least Frequently Used (LFU) cache. + + Args: + maxsize (int): Maximum size of the cache. A value of zero means unlimited size. + iterable (dict or Iterable[tuple], optional): Initial data to populate the cache. + capacity (int, optional): Initial hash table capacity to minimize reallocations. Defaults to 0. + + The cache uses a thread-safe LFU eviction policy, removing least frequently accessed items when the cache reaches its maximum size. + """ + self._raw = _core.LFUCache(maxsize, capacity=capacity) + + if iterable is not None: + self.update(iterable) + + @property + def maxsize(self) -> int: + return self._raw.maxsize() + + def capacity(self) -> int: + """Returns the number of elements the map can hold without reallocating.""" + return self._raw.capacity() + + def __len__(self) -> int: + return len(self._raw) + + def __sizeof__(self): # pragma: no cover + return self._raw.__sizeof__() + + def __contains__(self, key: KT) -> bool: + return key in self._raw + + def __bool__(self) -> bool: + return not self.is_empty() + + def is_empty(self) -> bool: + return self._raw.is_empty() + + def is_full(self) -> bool: + return self._raw.is_full() + + def insert(self, key: KT, value: VT) -> typing.Optional[VT]: + """ + Equals to `self[key] = value`, but returns a value: + + - If the cache did not have this key present, None is returned. + - If the cache did have this key present, the value is updated, + and the old value is returned. The key is not updated, though; + """ + return self._raw.insert(key, value) + + def peek(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + """ + Searches for a key-value in the cache and returns it (without moving the key to recently used). + """ + try: + return self._raw.peek(key) + except _core.CoreKeyError: + return default + + def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + """ + Equals to `self[key]`, but returns `default` if the cache don't have this key present. + """ + try: + return self._raw.get(key) + except _core.CoreKeyError: + return default + + def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + """ + Removes specified key and return the corresponding value. If the key is not found, returns the `default`. + """ + try: + return self._raw.remove(key) + except _core.CoreKeyError: + return default + + def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + """ + Inserts key with a value of default if key is not in the cache. + + Return the value for key if key is in the cache, else default. + """ + return self._raw.setdefault(key, default) + + def popitem(self) -> typing.Tuple[KT, VT]: + """ + Removes and returns the least frequently used (LFU) item from the cache. + """ + try: + return self._raw.popitem() + except _core.CoreKeyError: + raise KeyError() from None + + def drain(self, n: int) -> int: # pragma: no cover + """Does the `popitem()` `n` times and returns count of removed items.""" + if n <= 0: + return 0 + + for i in range(n): + try: + self._raw.popitem() + except _core.CoreKeyError: + return i + + return i + + def update(self, iterable: typing.Union[dict, typing.Iterable[tuple]]) -> None: + """Updates the cache with elements from a dictionary or an iterable object of key/value pairs.""" + if hasattr(iterable, "items"): + iterable = iterable.items() + + self._raw.update(iterable) + + def __setitem__(self, key: KT, value: VT) -> None: + self.insert(key, value) + + def __getitem__(self, key: KT) -> VT: + try: + return self._raw.get(key) + except _core.CoreKeyError: + raise KeyError(key) from None + + def __delitem__(self, key: KT) -> None: + try: + self._raw.remove(key) + except _core.CoreKeyError: + raise KeyError(key) from None + + def __eq__(self, other) -> bool: + if not isinstance(other, LFUCache): + return False # pragma: no cover + + return self._raw == other._raw + + def __ne__(self, other) -> bool: + if not isinstance(other, LFUCache): + return False # pragma: no cover + + return self._raw != other._raw + + def shrink_to_fit(self) -> None: + """Shrinks the cache to fit len(self) elements.""" + self._raw.shrink_to_fit() + + def clear(self, *, reuse: bool = False) -> None: + """ + Removes all items from cache. + + If reuse is True, will not free the memory for reusing in the future. + """ + self._raw.clear(reuse) + + def items(self) -> IteratorView[typing.Tuple[KT, VT]]: + """ + Returns an iterable object of the cache's items (key-value pairs). + + Notes: + - You should not make any changes in cache while using this iterable object. + """ + return IteratorView(self._raw.items(), lambda x: (x[0], x[1])) + + def items_with_frequency(self) -> IteratorView[typing.Tuple[KT, VT, int]]: + """ + Returns an iterable view - containing tuples of `(key, value, frequency)` - of the cache's items along with their access frequency. + + Notes: + - The returned iterator should not be used to modify the cache. + - Frequency represents how many times the item has been accessed. + """ + return IteratorView(self._raw.items(), lambda x: x) + + def keys(self) -> IteratorView[KT]: + """ + Returns an iterable object of the cache's keys. + + Notes: + - You should not make any changes in cache while using this iterable object. + """ + return IteratorView(self._raw.items(), lambda x: x[0]) + + def values(self) -> IteratorView[VT]: + """ + Returns an iterable object of the cache's values. + + Notes: + - You should not make any changes in cache while using this iterable object. + """ + return IteratorView(self._raw.items(), lambda x: x[1]) + + def least_frequently_used(self, n: int = 0) -> typing.Optional[KT]: + """ + Returns the key in the cache that has been accessed the least, regardless of time. + + If n is given, returns the nth least frequently used key. + + Notes: + - This method may re-sort the cache which can cause iterators to be stopped. + - Do not use this method while using iterators. + """ + if n < 0: + n = len(self._raw) + n + + if n < 0: + return None + + return self._raw.least_frequently_used(n) + + def __iter__(self) -> IteratorView[KT]: + return self.keys() + + def __repr__(self) -> str: + cls = type(self) + + return "%s.%s[%d/%d](%s)" % ( + cls.__module__, + cls.__name__, + len(self._raw), + self._raw.maxsize(), + # NOTE: we cannot use self._raw.items() here because iterables a tuples of (key, value, frequency) + _items_to_str(self.items(), len(self._raw)), + ) diff --git a/python/cachebox/_core.pyi b/python/cachebox/_core.pyi new file mode 100644 index 0000000..aa98175 --- /dev/null +++ b/python/cachebox/_core.pyi @@ -0,0 +1,9 @@ +__version__: str +__author__: str + +class CoreKeyError(Exception): + """ + An exception when a key is not found in a cache. + This exception is internal to the library core and won't affect you. + """ + ... diff --git a/python/tests/mixin.py b/python/tests/mixin.py index 1c2b8df..c4f4d38 100644 --- a/python/tests/mixin.py +++ b/python/tests/mixin.py @@ -132,7 +132,7 @@ def test___setitem__(self): def test___repr__(self): cache = self.CACHE(100, **self.KWARGS, capacity=2) - assert repr(cache).startswith(self.CACHE.__name__) + assert repr(cache).startswith(self.CACHE.__module__ + "." + self.CACHE.__name__) cache.update({i: i for i in range(100)}) assert str(cache) == repr(cache) diff --git a/python/tests/test_caches.py b/python/tests/test_caches.py index 50bfbca..1ea46d6 100644 --- a/python/tests/test_caches.py +++ b/python/tests/test_caches.py @@ -3,6 +3,7 @@ FIFOCache, RRCache, LRUCache, + LFUCache, ) import pytest from .mixin import _TestMixin @@ -159,3 +160,91 @@ def inner(c1, c2): assert list(c1.items()) == list(c2.items()) self._test_pickle(inner) + + +class TestLFUCache(_TestMixin): + CACHE = LFUCache + + def test_policy(self): + obj = self.CACHE(5, {i: i for i in range(5)}) + + for i in range(5): + obj[i] = i + + for i in range(10): + assert 0 == obj[0] + for i in range(7): + assert 1 == obj[1] + for i in range(3): + assert 2 == obj[2] + for i in range(4): + assert 3 == obj[3] + for i in range(6): + assert 4 == obj[4] + + assert (2, 2) == obj.popitem() + assert (3, 3) == obj.popitem() + + for i in range(10): + assert 4 == obj.get(4) + + assert (1, 1) == obj.popitem() + + assert 2 == len(obj) + obj.clear() + + for i in range(5): + obj[i] = i + + assert [0, 1, 2, 3, 4] == list(obj.keys()) + + for i in range(10): + obj[0] += 1 + for i in range(7): + obj[1] += 1 + for i in range(3): + obj[2] += 1 + for i in range(4): + obj[3] += 1 + for i in range(6): + obj[4] += 1 + + obj[5] = 4 + assert [5, 3, 4, 1, 0] == list(obj.keys()) + + def test_items_with_frequency(self): + # no need to test completely items_with_frequency + # because it's tested in test_iterators + obj = LFUCache(10, {1:2, 3:4}) + for key, val, freq in obj.items_with_frequency(): + assert key in obj + assert val == obj[key] + assert isinstance(freq, int) + + def test_least_frequently_used(self): + obj = LFUCache(10) + + for i in range(5): + obj[i] = i * 2 + + for i in range(10): + obj[0] += 1 + for i in range(7): + obj[1] += 1 + for i in range(3): + obj[2] += 1 + for i in range(4): + obj[3] += 1 + for i in range(6): + obj[4] += 1 + + assert obj.least_frequently_used() == 2 + assert obj.least_frequently_used(1) == 3 + assert obj.least_frequently_used(4) == 0 + assert obj.least_frequently_used(5) is None + + def test_pickle(self): + def inner(c1, c2): + assert list(c1.items()) == list(c2.items()) + + self._test_pickle(inner) diff --git a/src/bridge/lfucache.rs b/src/bridge/lfucache.rs index f84646d..b7401c6 100644 --- a/src/bridge/lfucache.rs +++ b/src/bridge/lfucache.rs @@ -1,4 +1,5 @@ use crate::common::Entry; +use crate::common::ObservedIterator; use crate::common::PreHashObject; #[pyo3::pyclass(module = "cachebox._core", frozen)] @@ -6,6 +7,13 @@ pub struct LFUCache { raw: crate::mutex::Mutex, } +#[allow(non_camel_case_types)] +#[pyo3::pyclass(module = "cachebox._core")] +pub struct lfucache_items { + pub ptr: ObservedIterator, + pub iter: crate::mutex::Mutex, +} + #[pyo3::pymethods] impl LFUCache { #[new] @@ -204,95 +212,128 @@ impl LFUCache { } } - // fn items(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { - // let lock = slf.raw.lock(); - // let state = lock.observed.get(); - // let iter = lock.iter(); - - // let result = lrucache_items { - // ptr: ObservedIterator::new(slf.as_ptr(), state), - // iter: crate::mutex::Mutex::new(iter), - // }; - - // pyo3::Py::new(slf.py(), result) - // } - - // fn least_recently_used(&self, py: pyo3::Python<'_>) -> Option { - // let lock = self.raw.lock(); - // lock.least_recently_used().map(|x| x.0.obj.clone_ref(py)) - // } - - // fn most_recently_used(&self, py: pyo3::Python<'_>) -> Option { - // let lock = self.raw.lock(); - // lock.most_recently_used().map(|x| x.0.obj.clone_ref(py)) - // } - - // fn __getnewargs__(&self) -> (usize,) { - // (0,) - // } - - // fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { - // let lock = self.raw.lock(); - - // let state = unsafe { - // let list = pyo3::ffi::PyList_New(0); - // if list.is_null() { - // return Err(pyo3::PyErr::fetch(py)); - // } - - // for node in lock.iter() { - // let (hk, val) = &(*node.as_ptr()).element; - - // let tp = tuple!( - // py, - // 2, - // 0 => hk.obj.clone_ref(py).as_ptr(), - // 1 => val.clone_ref(py).as_ptr(), - // ); - - // if let Err(x) = tp { - // pyo3::ffi::Py_DECREF(list); - // return Err(x); - // } - - // if pyo3::ffi::PyList_Append(list, tp.unwrap_unchecked()) == -1 { - // pyo3::ffi::Py_DECREF(list); - // return Err(pyo3::PyErr::fetch(py)); - // } - // } - - // let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); - // let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity()); - - // tuple!( - // py, - // 3, - // 0 => maxsize, - // 1 => list, - // 2 => capacity, - // )? - // }; - - // Ok(unsafe { pyo3::Py::from_owned_ptr(py, state) }) - // } - - // pub fn __setstate__(&self, py: pyo3::Python<'_>, state: pyo3::PyObject) -> pyo3::PyResult<()> { - // let mut lock = self.raw.lock(); - // lock.from_pickle(py, state.as_ptr()) - // } - - // pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { - // for node in self.raw.lock().iter() { - // let value = unsafe { node.as_ref() }; - - // visit.call(&value.element.0.obj)?; - // visit.call(&value.element.1)?; - // } - // Ok(()) - // } - - // pub fn __clear__(&self) { - // let mut lock = self.raw.lock(); - // lock.clear() - // } + fn items(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { + let mut lock = slf.raw.lock(); + let state = lock.observed.get(); + let iter = lock.iter(); + + let result = lfucache_items { + ptr: ObservedIterator::new(slf.as_ptr(), state), + iter: crate::mutex::Mutex::new(iter), + }; + + pyo3::Py::new(slf.py(), result) + } + + pub fn least_frequently_used(&self, py: pyo3::Python<'_>, n: usize) -> Option { + let mut lock = self.raw.lock(); + lock.least_frequently_used(n) + .map(|x| unsafe { x.as_ref().0.obj.clone_ref(py) }) + } + + fn __getnewargs__(&self) -> (usize,) { + (0,) + } + + fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { + let mut lock = self.raw.lock(); + + let state = unsafe { + let list = pyo3::ffi::PyList_New(0); + if list.is_null() { + return Err(pyo3::PyErr::fetch(py)); + } + + for ptr in lock.iter() { + let node = &(*ptr.as_ptr()); + + let frequency = pyo3::ffi::PyLong_FromSize_t(node.2); + if frequency.is_null() { + pyo3::ffi::Py_DECREF(list); + return Err(pyo3::PyErr::fetch(py)); + } + + let tp = tuple!( + py, + 3, + 0 => node.0.obj.clone_ref(py).into_ptr(), + 1 => node.1.clone_ref(py).into_ptr(), + 2 => frequency, + ); + + if let Err(x) = tp { + pyo3::ffi::Py_DECREF(list); + return Err(x); + } + + if pyo3::ffi::PyList_Append(list, tp.unwrap_unchecked()) == -1 { + pyo3::ffi::Py_DECREF(list); + return Err(pyo3::PyErr::fetch(py)); + } + } + + let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); + let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity()); + + tuple!( + py, + 3, + 0 => maxsize, + 1 => list, + 2 => capacity, + )? + }; + + Ok(unsafe { pyo3::Py::from_owned_ptr(py, state) }) + } + + pub fn __setstate__(&self, py: pyo3::Python<'_>, state: pyo3::PyObject) -> pyo3::PyResult<()> { + let mut lock = self.raw.lock(); + lock.from_pickle(py, state.as_ptr()) + } + + pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + for node in self.raw.lock().iter() { + let value = unsafe { node.as_ref() }; + + visit.call(&value.0.obj)?; + visit.call(&value.1)?; + } + Ok(()) + } + + pub fn __clear__(&self) { + let mut lock = self.raw.lock(); + lock.clear() + } +} + +#[pyo3::pymethods] +impl lfucache_items { + fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { + slf + } + + #[allow(unused_mut)] + fn __next__(mut slf: pyo3::PyRefMut<'_, Self>) -> pyo3::PyResult<*mut pyo3::ffi::PyObject> { + let mut iter = slf.iter.lock(); + + slf.ptr.proceed(slf.py())?; + + if let Some(x) = iter.next() { + let (key, val, freq) = unsafe { x.as_ref() }; + + let freq = unsafe { pyo3::ffi::PyLong_FromSize_t(*freq) }; + + tuple!( + slf.py(), + 3, + 0 => key.obj.clone_ref(slf.py()).into_ptr(), + 1 => val.clone_ref(slf.py()).into_ptr(), + 2 => freq, + ) + } else { + Err(pyo3::PyErr::new::(())) + } + } } diff --git a/src/bridge/mod.rs b/src/bridge/mod.rs index 005b21f..f0e3523 100644 --- a/src/bridge/mod.rs +++ b/src/bridge/mod.rs @@ -1,9 +1,9 @@ use pyo3::create_exception; -create_exception!(_core, CoreKeyError, pyo3::exceptions::PyException); +create_exception!(cachebox._core, CoreKeyError, pyo3::exceptions::PyException); pub mod cache; pub mod fifocache; +pub mod lfucache; pub mod lrucache; pub mod rrcache; -pub mod lfucache; diff --git a/src/common.rs b/src/common.rs index 2e0db16..368bac3 100644 --- a/src/common.rs +++ b/src/common.rs @@ -80,7 +80,7 @@ macro_rules! tuple { } macro_rules! extract_pickle_tuple { - ($py:expr, $state:expr) => {{ + ($py:expr, $state:expr => list) => {{ let maxsize = { let obj = pyo3::ffi::PyTuple_GetItem($state, 0); pyo3::ffi::PyLong_AsSize_t(obj) @@ -93,7 +93,42 @@ macro_rules! extract_pickle_tuple { let iterable = { let obj = pyo3::ffi::PyTuple_GetItem($state, 1); - if pyo3::ffi::PyDict_CheckExact(obj) != 1 && pyo3::ffi::PyList_CheckExact(obj) != 1 { + if pyo3::ffi::PyList_CheckExact(obj) != 1 { + return Err(pyo3::PyErr::new::( + "the iterable object is not an dict or list", + )); + } + + // Tuple returns borrowed reference + pyo3::PyObject::from_borrowed_ptr($py, obj) + }; + + let capacity = { + let obj = pyo3::ffi::PyTuple_GetItem($state, 2); + pyo3::ffi::PyLong_AsSize_t(obj) + }; + + if let Some(e) = pyo3::PyErr::take($py) { + return Err(e); + } + + (maxsize, iterable, capacity) + }}; + + ($py:expr, $state:expr => dict) => {{ + let maxsize = { + let obj = pyo3::ffi::PyTuple_GetItem($state, 0); + pyo3::ffi::PyLong_AsSize_t(obj) + }; + + if let Some(e) = pyo3::PyErr::take($py) { + return Err(e); + } + + let iterable = { + let obj = pyo3::ffi::PyTuple_GetItem($state, 1); + + if pyo3::ffi::PyDict_CheckExact(obj) != 1 { return Err(pyo3::PyErr::new::( "the iterable object is not an dict or list", )); diff --git a/src/lazyheap.rs b/src/lazyheap.rs index 70d6ac8..65e1b4e 100644 --- a/src/lazyheap.rs +++ b/src/lazyheap.rs @@ -1,3 +1,4 @@ +use crate::common::NoLifetimeSliceIter; use std::ptr::NonNull; /// A heap data structure that lazily maintains sorting order. @@ -5,7 +6,7 @@ use std::ptr::NonNull; /// `LazyHeap` allows for efficient insertion of elements without immediately sorting, /// with the ability to defer sorting until necessary. This can improve performance /// in scenarios where sorting is not immediately required. -/// +/// /// ``` /// let mut heap = LazyHeap::new(); /// heap.push(5); @@ -25,9 +26,8 @@ pub struct LazyHeap { /// This iterator uses raw pointers and requires careful management to ensure /// memory safety and prevent use-after-free or dangling pointer scenarios. pub struct Iter { - slice: *const NonNull, - index: usize, - len: usize, + first: NoLifetimeSliceIter>, + second: NoLifetimeSliceIter>, } impl LazyHeap { @@ -102,10 +102,10 @@ impl LazyHeap { self.unlink_back() } - // #[inline] - // pub fn get(&self, index: usize) -> Option<&NonNull> { - // self.data.get(index) - // } + #[inline] + pub fn get(&self, index: usize) -> Option<&NonNull> { + self.data.get(index) + } pub fn remove(&mut self, node: NonNull, compare: F) -> T where @@ -132,9 +132,22 @@ impl LazyHeap { self.is_sorted = true; } + #[inline] pub fn shrink_to_fit(&mut self) { self.data.shrink_to_fit(); } + + #[inline] + pub fn iter(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) -> Iter { + self.sort_by(compare); + + let (a, b) = self.data.as_slices(); + + Iter { + first: NoLifetimeSliceIter::new(a), + second: NoLifetimeSliceIter::new(b), + } + } } impl Drop for LazyHeap { @@ -160,12 +173,12 @@ impl Iterator for Iter { type Item = NonNull; fn next(&mut self) -> Option { - if self.index == self.len { - None - } else { - let value = unsafe { self.slice.add(self.index) }; - self.index += 1; - Some(unsafe { *value }) + match self.first.next() { + Some(val) => Some(unsafe { *val.as_ptr() }), + None => { + core::mem::swap(&mut self.first, &mut self.second); + self.first.next().map(|x| unsafe { *x.as_ptr() }) + } } } } diff --git a/src/policies/fifo.rs b/src/policies/fifo.rs index 667c5f0..5e65a3e 100644 --- a/src/policies/fifo.rs +++ b/src/policies/fifo.rs @@ -317,12 +317,26 @@ impl FIFOPolicy { py: pyo3::Python<'_>, state: *mut pyo3::ffi::PyObject, ) -> pyo3::PyResult<()> { + use pyo3::types::PyAnyMethods; + unsafe { tuple!(check state, size=3)?; - let (maxsize, iterable, capacity) = extract_pickle_tuple!(py, state); + let (maxsize, iterable, capacity) = extract_pickle_tuple!(py, state => list); let mut new = Self::new(maxsize, capacity)?; - new.extend(py, iterable)?; + + for pair in iterable.bind(py).try_iter()? { + let (key, value) = pair?.extract::<(pyo3::PyObject, pyo3::PyObject)>()?; + + let hk = PreHashObject::from_pyobject(py, key)?; + + match new.entry_with_slot(py, &hk)? { + Entry::Absent(entry) => { + entry.insert(py, hk, value)?; + } + _ => std::hint::unreachable_unchecked(), + } + } *self = new; Ok(()) diff --git a/src/policies/lfu.rs b/src/policies/lfu.rs index 3563b6b..b7f9a53 100644 --- a/src/policies/lfu.rs +++ b/src/policies/lfu.rs @@ -24,6 +24,8 @@ pub struct LFUPolicyAbsent<'a> { insert_slot: Option, } +pub type LFUIterator = lazyheap::Iter<(PreHashObject, pyo3::Py, usize)>; + impl LFUPolicy { #[inline] pub fn new(maxsize: usize, mut capacity: usize) -> pyo3::PyResult { @@ -245,6 +247,59 @@ impl LFUPolicy { Ok(()) } + + pub fn iter(&mut self) -> LFUIterator { + self.heap.iter(|a, b| a.2.cmp(&b.2)) + } + + pub fn least_frequently_used(&mut self, n: usize) -> Option> { + self.heap.sort_by(|a, b| a.2.cmp(&b.2)); + let node = self.heap.get(n)?; + + Some(*node) + } + + #[allow(clippy::wrong_self_convention)] + #[inline] + pub fn from_pickle( + &mut self, + py: pyo3::Python<'_>, + state: *mut pyo3::ffi::PyObject, + ) -> pyo3::PyResult<()> { + use pyo3::types::PyAnyMethods; + + unsafe { + tuple!(check state, size=3)?; + let (maxsize, iterable, capacity) = extract_pickle_tuple!(py, state => list); + + // SAFETY: we check `iterable` type in `extract_pickle_tuple` macro + if maxsize < (pyo3::ffi::PyObject_Size(iterable.as_ptr()) as usize) { + return Err(pyo3::PyErr::new::( + "iterable object size is greater than maxsize", + )); + } + + let mut new = Self::new(maxsize, capacity)?; + + for pair in iterable.bind(py).try_iter()? { + let (key, value, freq) = pair?.extract::<(pyo3::PyObject, pyo3::PyObject, usize)>()?; + + let hk = PreHashObject::from_pyobject(py, key)?; + + match new.entry_with_slot(py, &hk)? { + Entry::Absent(entry) => { + entry.insert(hk, value, freq)?; + } + _ => std::hint::unreachable_unchecked(), + } + } + + new.heap.sort_by(|a, b| a.2.cmp(&b.2)); + + *self = new; + Ok(()) + } + } } impl LFUPolicyOccupied<'_> { diff --git a/src/policies/lru.rs b/src/policies/lru.rs index aea0793..3d9fdd2 100644 --- a/src/policies/lru.rs +++ b/src/policies/lru.rs @@ -263,12 +263,25 @@ impl LRUPolicy { py: pyo3::Python<'_>, state: *mut pyo3::ffi::PyObject, ) -> pyo3::PyResult<()> { + use pyo3::types::PyAnyMethods; unsafe { tuple!(check state, size=3)?; - let (maxsize, iterable, capacity) = extract_pickle_tuple!(py, state); + let (maxsize, iterable, capacity) = extract_pickle_tuple!(py, state => list); let mut new = Self::new(maxsize, capacity)?; - new.extend(py, iterable)?; + + for pair in iterable.bind(py).try_iter()? { + let (key, value) = pair?.extract::<(pyo3::PyObject, pyo3::PyObject)>()?; + + let hk = PreHashObject::from_pyobject(py, key)?; + + match new.entry_with_slot(py, &hk)? { + Entry::Absent(entry) => { + entry.insert(hk, value)?; + } + _ => std::hint::unreachable_unchecked(), + } + } *self = new; Ok(()) diff --git a/src/policies/mod.rs b/src/policies/mod.rs index fb89981..6956bd1 100644 --- a/src/policies/mod.rs +++ b/src/policies/mod.rs @@ -1,5 +1,5 @@ pub mod fifo; -pub mod lru; pub mod lfu; +pub mod lru; pub mod nopolicy; pub mod random; diff --git a/src/policies/nopolicy.rs b/src/policies/nopolicy.rs index 04470d5..6b6a149 100644 --- a/src/policies/nopolicy.rs +++ b/src/policies/nopolicy.rs @@ -217,11 +217,32 @@ impl NoPolicy { py: pyo3::Python<'_>, state: *mut pyo3::ffi::PyObject, ) -> pyo3::PyResult<()> { + use pyo3::types::PyDictMethods; + tuple!(check state, size=3)?; - let (maxsize, iterable, capacity) = unsafe { extract_pickle_tuple!(py, state) }; + let (maxsize, iterable, capacity) = unsafe { extract_pickle_tuple!(py, state => dict) }; let mut new = Self::new(maxsize, capacity)?; - new.extend(py, iterable)?; + + // SAFETY: we checked that the iterable is a dict in extract_pickle_tuple! macro + let dict = unsafe { + iterable + .downcast_bound::(py) + .unwrap_unchecked() + }; + + unsafe { + for (key, value) in dict.iter() { + let hk = PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked(); + + match new.entry_with_slot(py, &hk)? { + Entry::Absent(entry) => { + entry.insert(hk, value.unbind())?; + } + _ => std::hint::unreachable_unchecked(), + } + } + } *self = new; Ok(()) diff --git a/src/policies/random.rs b/src/policies/random.rs index 48e8ae7..7c51073 100644 --- a/src/policies/random.rs +++ b/src/policies/random.rs @@ -232,11 +232,32 @@ impl RandomPolicy { py: pyo3::Python<'_>, state: *mut pyo3::ffi::PyObject, ) -> pyo3::PyResult<()> { + use pyo3::types::PyDictMethods; + tuple!(check state, size=3)?; - let (maxsize, iterable, capacity) = unsafe { extract_pickle_tuple!(py, state) }; + let (maxsize, iterable, capacity) = unsafe { extract_pickle_tuple!(py, state => dict) }; let mut new = Self::new(maxsize, capacity)?; - new.extend(py, iterable)?; + + // SAFETY: we checked that the iterable is a dict in extract_pickle_tuple! macro + let dict = unsafe { + iterable + .downcast_bound::(py) + .unwrap_unchecked() + }; + + unsafe { + for (key, value) in dict.iter() { + let hk = PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked(); + + match new.entry_with_slot(py, &hk)? { + Entry::Absent(entry) => { + entry.insert(hk, value.unbind())?; + } + _ => std::hint::unreachable_unchecked(), + } + } + } *self = new; Ok(()) From 35cf2952f46c8eb096e3d79bc18540d14f2967b1 Mon Sep 17 00:00:00 2001 From: awolverp Date: Sun, 13 Apr 2025 12:26:33 +0330 Subject: [PATCH 17/37] Update test workflow !test --- .github/workflows/python-test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index 9e5a247..11011df 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -12,7 +12,7 @@ permissions: jobs: test-python: - if: "contains(github.event.head_commit.message, '[run test]')" + if: "contains(github.event.head_commit.message, '!test')" name: test ${{ matrix.python-version }} strategy: fail-fast: false @@ -54,7 +54,7 @@ jobs: HYPOTHESIS_PROFILE: slow test-os: - if: "contains(github.event.head_commit.message, '[run test]')" + if: "contains(github.event.head_commit.message, '!test')" name: test on ${{ matrix.os }} strategy: From 25420e15611bc36cec94284921dd221fcc5057cb Mon Sep 17 00:00:00 2001 From: awolverp Date: Sun, 13 Apr 2025 12:43:48 +0330 Subject: [PATCH 18/37] Add new method `random_key` to RRCache --- python/cachebox/_cachebox.py | 10 ++++++++++ src/bridge/rrcache.rs | 8 ++++++++ src/policies/random.rs | 14 ++++++++++++++ 3 files changed, 32 insertions(+) diff --git a/python/cachebox/_cachebox.py b/python/cachebox/_cachebox.py index fd34a77..a45c8ca 100644 --- a/python/cachebox/_cachebox.py +++ b/python/cachebox/_cachebox.py @@ -613,6 +613,16 @@ def update(self, iterable: typing.Union[dict, typing.Iterable[tuple]]) -> None: self._raw.update(iterable) + def random_key(self) -> KT: + """ + Randomly selects and returns a key from the cache. + Raises `KeyError` If the cache is empty. + """ + try: + return self._raw.random_key() + except _core.CoreKeyError: + raise KeyError() from None + def __setitem__(self, key: KT, value: VT) -> None: self.insert(key, value) diff --git a/src/bridge/rrcache.rs b/src/bridge/rrcache.rs index 7238625..ebc26c6 100644 --- a/src/bridge/rrcache.rs +++ b/src/bridge/rrcache.rs @@ -203,6 +203,14 @@ impl RRCache { pyo3::Py::new(slf.py(), result) } + fn random_key(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { + let lock = self.raw.lock(); + match lock.random_key() { + Some(x) => Ok(x.obj.clone_ref(py)), + None => Err(pyo3::PyErr::new::(())), + } + } + fn __getnewargs__(&self) -> (usize,) { (0,) } diff --git a/src/policies/random.rs b/src/policies/random.rs index 7c51073..c6773fb 100644 --- a/src/policies/random.rs +++ b/src/policies/random.rs @@ -262,6 +262,20 @@ impl RandomPolicy { *self = new; Ok(()) } + + #[inline] + pub fn random_key(&self) -> Option<&PreHashObject> { + if self.table.is_empty() { + None + } else { + let nth = fastrand::usize(0..self.table.len()); + + let bucket = unsafe { self.table.iter().nth(nth).unwrap_unchecked() }; + let (key, _) = unsafe { bucket.as_ref() }; + + Some(key) + } + } } impl<'a> RandomPolicyOccupied<'a> { From bbc71d366a6f6f0bee033d1118e44392b65ca32b Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 14 Apr 2025 12:53:19 +0330 Subject: [PATCH 19/37] Refactor a part of TTLCache --- python/cachebox/__init__.py | 1 + python/cachebox/_cachebox.py | 208 ++++++++++++- python/cachebox/_core.pyi | 1 + python/tests/test_caches.py | 2 +- src/bridge/cache.rs | 2 +- src/bridge/fifocache.rs | 2 +- src/bridge/lfucache.rs | 2 +- src/bridge/lrucache.rs | 2 +- src/bridge/mod.rs | 1 + src/bridge/rrcache.rs | 2 +- src/bridge/ttlcache.rs | 370 +++++++++++++++++++++++ src/lib.rs | 2 + src/policies/fifo.rs | 7 +- src/policies/lfu.rs | 7 +- src/policies/mod.rs | 1 + src/policies/ttl.rs | 563 +++++++++++++++++++++++++++++++++++ 16 files changed, 1155 insertions(+), 18 deletions(-) create mode 100644 src/bridge/ttlcache.rs create mode 100644 src/policies/ttl.rs diff --git a/python/cachebox/__init__.py b/python/cachebox/__init__.py index 14152c8..9698b5f 100644 --- a/python/cachebox/__init__.py +++ b/python/cachebox/__init__.py @@ -9,6 +9,7 @@ RRCache as RRCache, LRUCache as LRUCache, LFUCache as LFUCache, + TTLCache as TTLCache, BaseCacheImpl as BaseCacheImpl, IteratorView as IteratorView, ) diff --git a/python/cachebox/_cachebox.py b/python/cachebox/_cachebox.py index a45c8ca..ac9b226 100644 --- a/python/cachebox/_cachebox.py +++ b/python/cachebox/_cachebox.py @@ -31,11 +31,12 @@ def _items_to_str(items, length): class BaseCacheImpl(typing.Generic[KT, VT]): """ Base implementation for cache classes in the cachebox library. - + This abstract base class defines the generic structure for cache implementations, supporting different key and value types through generic type parameters. Serves as a foundation for specific cache variants like Cache and FIFOCache. """ + pass @@ -1130,7 +1131,7 @@ def items(self) -> IteratorView[typing.Tuple[KT, VT]]: def items_with_frequency(self) -> IteratorView[typing.Tuple[KT, VT, int]]: """ Returns an iterable view - containing tuples of `(key, value, frequency)` - of the cache's items along with their access frequency. - + Notes: - The returned iterator should not be used to modify the cache. - Frequency represents how many times the item has been accessed. @@ -1158,9 +1159,9 @@ def values(self) -> IteratorView[VT]: def least_frequently_used(self, n: int = 0) -> typing.Optional[KT]: """ Returns the key in the cache that has been accessed the least, regardless of time. - + If n is given, returns the nth least frequently used key. - + Notes: - This method may re-sort the cache which can cause iterators to be stopped. - Do not use this method while using iterators. @@ -1170,7 +1171,7 @@ def least_frequently_used(self, n: int = 0) -> typing.Optional[KT]: if n < 0: return None - + return self._raw.least_frequently_used(n) def __iter__(self) -> IteratorView[KT]: @@ -1187,3 +1188,200 @@ def __repr__(self) -> str: # NOTE: we cannot use self._raw.items() here because iterables a tuples of (key, value, frequency) _items_to_str(self.items(), len(self._raw)), ) + + +class TTLCache(BaseCacheImpl[KT, VT]): + """ + TTL Cache implementation - Time-To-Live Policy (thread-safe). + + In simple terms, the TTL cache will automatically remove the element in the cache that has expired. + """ + + def __init__( + self, + maxsize: int, + ttl: float, + iterable: typing.Union[typing.Union[dict, typing.Iterable[tuple]], None] = None, + *, + capacity: int = 0, + ) -> None: + self._raw = _core.TTLCache(maxsize, ttl, capacity=capacity) + + if iterable is not None: + self.update(iterable) + + @property + def maxsize(self) -> int: + return self._raw.maxsize() + + @property + def ttl(self) -> float: + return self._raw.ttl() + + def capacity(self) -> int: + """Returns the number of elements the map can hold without reallocating.""" + return self._raw.capacity() + + def __len__(self) -> int: + return len(self._raw) + + def __sizeof__(self): # pragma: no cover + return self._raw.__sizeof__() + + def __contains__(self, key: KT) -> bool: + return key in self._raw + + def __bool__(self) -> bool: + return not self.is_empty() + + def is_empty(self) -> bool: + return self._raw.is_empty() + + def is_full(self) -> bool: + return self._raw.is_full() + + def insert(self, key: KT, value: VT) -> typing.Optional[VT]: + """ + Equals to `self[key] = value`, but returns a value: + + - If the cache did not have this key present, None is returned. + - If the cache did have this key present, the value is updated, + and the old value is returned. The key is not updated, though; + """ + return self._raw.insert(key, value) + + def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + """ + Equals to `self[key]`, but returns `default` if the cache don't have this key present. + """ + try: + return self._raw.get(key).value() + except _core.CoreKeyError: + return default + + def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + """ + Removes specified key and return the corresponding value. If the key is not found, returns the `default`. + """ + try: + return self._raw.remove(key).value() + except _core.CoreKeyError: + return default + + def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + """ + Inserts key with a value of default if key is not in the cache. + + Return the value for key if key is in the cache, else default. + """ + return self._raw.setdefault(key, default) + + def popitem(self) -> typing.Tuple[KT, VT]: + """Removes the element that has been in the cache the longest.""" + try: + val = self._raw.popitem() + except _core.CoreKeyError: + raise KeyError() from None + else: + return (val.key(), val.value()) + + def drain(self, n: int) -> int: # pragma: no cover + """Does the `popitem()` `n` times and returns count of removed items.""" + if n <= 0: + return 0 + + for i in range(n): + try: + self._raw.popitem() + except _core.CoreKeyError: + return i + + return i + + def update(self, iterable: typing.Union[dict, typing.Iterable[tuple]]) -> None: + """Updates the cache with elements from a dictionary or an iterable object of key/value pairs.""" + if hasattr(iterable, "items"): + iterable = iterable.items() + + self._raw.update(iterable) + + def __setitem__(self, key: KT, value: VT) -> None: + self.insert(key, value) + + def __getitem__(self, key: KT) -> VT: + try: + return self._raw.get(key).value() + except _core.CoreKeyError: + raise KeyError(key) from None + + def __delitem__(self, key: KT) -> None: + try: + self._raw.remove(key) + except _core.CoreKeyError: + raise KeyError(key) from None + + def __eq__(self, other) -> bool: + if not isinstance(other, TTLCache): + return False # pragma: no cover + + return self._raw == other._raw + + def __ne__(self, other) -> bool: + if not isinstance(other, TTLCache): + return False # pragma: no cover + + return self._raw != other._raw + + def shrink_to_fit(self) -> None: + """Shrinks the cache to fit len(self) elements.""" + self._raw.shrink_to_fit() + + def clear(self, *, reuse: bool = False) -> None: + """ + Removes all items from cache. + + If reuse is True, will not free the memory for reusing in the future. + """ + self._raw.clear(reuse) + + def items(self) -> IteratorView[typing.Tuple[KT, VT]]: + """ + Returns an iterable object of the cache's items (key-value pairs). + + Notes: + - You should not make any changes in cache while using this iterable object. + """ + return IteratorView(self._raw.items(), lambda x: (x.key(), x.value())) + + def keys(self) -> IteratorView[KT]: + """ + Returns an iterable object of the cache's keys. + + Notes: + - You should not make any changes in cache while using this iterable object. + """ + return IteratorView(self._raw.items(), lambda x: x.key()) + + def values(self) -> IteratorView[VT]: + """ + Returns an iterable object of the cache's values. + + Notes: + - You should not make any changes in cache while using this iterable object. + """ + return IteratorView(self._raw.items(), lambda x: x.value()) + + def __iter__(self) -> IteratorView[KT]: + return self.keys() + + def __repr__(self) -> str: + cls = type(self) + + return "%s.%s[%d/%d, ttl=%f](%s)" % ( + cls.__module__, + cls.__name__, + len(self._raw), + self._raw.maxsize(), + self._raw.ttl(), + _items_to_str(self.items(), len(self._raw)), + ) diff --git a/python/cachebox/_core.pyi b/python/cachebox/_core.pyi index aa98175..059faf8 100644 --- a/python/cachebox/_core.pyi +++ b/python/cachebox/_core.pyi @@ -6,4 +6,5 @@ class CoreKeyError(Exception): An exception when a key is not found in a cache. This exception is internal to the library core and won't affect you. """ + ... diff --git a/python/tests/test_caches.py b/python/tests/test_caches.py index 1ea46d6..080c93d 100644 --- a/python/tests/test_caches.py +++ b/python/tests/test_caches.py @@ -215,7 +215,7 @@ def test_policy(self): def test_items_with_frequency(self): # no need to test completely items_with_frequency # because it's tested in test_iterators - obj = LFUCache(10, {1:2, 3:4}) + obj = LFUCache(10, {1: 2, 3: 4}) for key, val, freq in obj.items_with_frequency(): assert key in obj assert val == obj[key] diff --git a/src/bridge/cache.rs b/src/bridge/cache.rs index 227f1e3..1b59aaa 100644 --- a/src/bridge/cache.rs +++ b/src/bridge/cache.rs @@ -2,7 +2,7 @@ use crate::common::Entry; use crate::common::ObservedIterator; use crate::common::PreHashObject; -#[pyo3::pyclass(module = "cachebox._core", frozen)] +#[pyo3::pyclass(module = "cachebox._core", frozen, subclass)] pub struct Cache { raw: crate::mutex::Mutex, } diff --git a/src/bridge/fifocache.rs b/src/bridge/fifocache.rs index 0877aaf..8ee688f 100644 --- a/src/bridge/fifocache.rs +++ b/src/bridge/fifocache.rs @@ -2,7 +2,7 @@ use crate::common::Entry; use crate::common::ObservedIterator; use crate::common::PreHashObject; -#[pyo3::pyclass(module = "cachebox._core", frozen)] +#[pyo3::pyclass(module = "cachebox._core", frozen, subclass)] pub struct FIFOCache { raw: crate::mutex::Mutex, } diff --git a/src/bridge/lfucache.rs b/src/bridge/lfucache.rs index b7401c6..0894d54 100644 --- a/src/bridge/lfucache.rs +++ b/src/bridge/lfucache.rs @@ -2,7 +2,7 @@ use crate::common::Entry; use crate::common::ObservedIterator; use crate::common::PreHashObject; -#[pyo3::pyclass(module = "cachebox._core", frozen)] +#[pyo3::pyclass(module = "cachebox._core", frozen, subclass)] pub struct LFUCache { raw: crate::mutex::Mutex, } diff --git a/src/bridge/lrucache.rs b/src/bridge/lrucache.rs index a857eba..05f625f 100644 --- a/src/bridge/lrucache.rs +++ b/src/bridge/lrucache.rs @@ -2,7 +2,7 @@ use crate::common::Entry; use crate::common::ObservedIterator; use crate::common::PreHashObject; -#[pyo3::pyclass(module = "cachebox._core", frozen)] +#[pyo3::pyclass(module = "cachebox._core", frozen, subclass)] pub struct LRUCache { raw: crate::mutex::Mutex, } diff --git a/src/bridge/mod.rs b/src/bridge/mod.rs index f0e3523..2e37db4 100644 --- a/src/bridge/mod.rs +++ b/src/bridge/mod.rs @@ -7,3 +7,4 @@ pub mod fifocache; pub mod lfucache; pub mod lrucache; pub mod rrcache; +pub mod ttlcache; diff --git a/src/bridge/rrcache.rs b/src/bridge/rrcache.rs index ebc26c6..e8b51b5 100644 --- a/src/bridge/rrcache.rs +++ b/src/bridge/rrcache.rs @@ -3,7 +3,7 @@ use crate::common::Entry; use crate::common::ObservedIterator; use crate::common::PreHashObject; -#[pyo3::pyclass(module = "cachebox._core", frozen)] +#[pyo3::pyclass(module = "cachebox._core", frozen, subclass)] pub struct RRCache { raw: crate::mutex::Mutex, } diff --git a/src/bridge/ttlcache.rs b/src/bridge/ttlcache.rs new file mode 100644 index 0000000..703095d --- /dev/null +++ b/src/bridge/ttlcache.rs @@ -0,0 +1,370 @@ +use crate::common::Entry; +use crate::common::ObservedIterator; +use crate::common::PreHashObject; + +#[pyo3::pyclass(module = "cachebox._core", frozen, subclass)] +pub struct TTLCache { + raw: crate::mutex::Mutex, +} + +#[pyo3::pyclass(module = "cachebox._core", frozen)] +pub struct TTLPair { + key: pyo3::PyObject, + value: pyo3::PyObject, + duration: std::time::Duration, +} + +#[allow(non_camel_case_types)] +#[pyo3::pyclass(module = "cachebox._core")] +pub struct ttlcache_items { + pub ptr: ObservedIterator, + pub iter: crate::mutex::Mutex, + pub now: std::time::SystemTime, +} + +#[pyo3::pymethods] +impl TTLCache { + #[new] + #[pyo3(signature=(maxsize, ttl, *, capacity=0))] + fn __new__(maxsize: usize, ttl: f64, capacity: usize) -> pyo3::PyResult { + let raw = crate::policies::ttl::TTLPolicy::new(maxsize, capacity, ttl)?; + + let self_ = Self { + raw: crate::mutex::Mutex::new(raw), + }; + Ok(self_) + } + + fn _state(&self) -> u16 { + self.raw.lock().observed.get() + } + + fn maxsize(&self) -> usize { + self.raw.lock().maxsize() + } + + fn ttl(&self) -> f64 { + self.raw.lock().ttl().as_secs_f64() + } + + fn capacity(&self) -> usize { + self.raw.lock().capacity().0 + } + + fn __len__(&self) -> usize { + self.raw.lock().real_len() + } + + fn __sizeof__(&self) -> usize { + let lock = self.raw.lock(); + let capacity = lock.capacity(); + + capacity.0 * std::mem::size_of::() + + capacity.1 + * (std::mem::size_of::() + + std::mem::size_of::()) + } + + fn __contains__(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let lock = self.raw.lock(); + + match lock.lookup(py, &key)? { + Some(_) => Ok(true), + None => Ok(false), + } + } + + fn is_empty(&self) -> bool { + self.raw.lock().is_empty() + } + + fn is_full(&self) -> bool { + self.raw.lock().is_full() + } + + fn insert( + &self, + py: pyo3::Python<'_>, + key: pyo3::PyObject, + value: pyo3::PyObject, + ) -> pyo3::PyResult> { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.entry_with_slot(py, &key)? { + Entry::Occupied(mut entry) => Ok(Some(entry.update(value)?)), + Entry::Absent(entry) => { + entry.insert(py, key, value)?; + Ok(None) + } + } + } + + fn get(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let lock = self.raw.lock(); + + match lock.lookup(py, &key)? { + Some(val) => Ok(TTLPair::clone_from_pair(py, val)), + None => Err(pyo3::PyErr::new::(key.obj)), + } + } + + fn update( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python<'_>, + iterable: pyo3::PyObject, + ) -> pyo3::PyResult<()> { + if slf.as_ptr() == iterable.as_ptr() { + return Ok(()); + } + + let mut lock = slf.raw.lock(); + lock.extend(py, iterable) + } + + fn __richcmp__( + slf: pyo3::PyRef<'_, Self>, + other: pyo3::PyObject, + op: pyo3::class::basic::CompareOp, + ) -> pyo3::PyResult { + let other = other.extract::>(slf.py())?; + + match op { + pyo3::class::basic::CompareOp::Eq => { + if slf.as_ptr() == other.as_ptr() { + return Ok(true); + } + + let t1 = slf.raw.lock(); + let t2 = other.raw.lock(); + t1.equal(slf.py(), &t2) + } + pyo3::class::basic::CompareOp::Ne => { + if slf.as_ptr() == other.as_ptr() { + return Ok(false); + } + + let t1 = slf.raw.lock(); + let t2 = other.raw.lock(); + t1.equal(slf.py(), &t2).map(|r| !r) + } + _ => Err(pyo3::PyErr::new::( + "only '==' or '!=' are supported", + )), + } + } + + fn remove(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.entry(py, &key)? { + Entry::Occupied(entry) => { + let val = entry.remove(); + Ok(TTLPair::from(val)) + } + Entry::Absent(_) => Err(pyo3::PyErr::new::(key.obj)), + } + } + + fn popitem(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { + let mut lock = self.raw.lock(); + + match lock.popitem(py)? { + Some(val) => Ok(TTLPair::from(val)), + None => Err(pyo3::PyErr::new::(())), + } + } + + fn clear(&self, py: pyo3::Python<'_>, reuse: bool) { + let mut lock = self.raw.lock(); + lock.clear(); + + if !reuse { + lock.shrink_to_fit(py); + } + } + + fn shrink_to_fit(&self, py: pyo3::Python<'_>) { + let mut lock = self.raw.lock(); + lock.shrink_to_fit(py); + } + + fn setdefault( + &self, + py: pyo3::Python<'_>, + key: pyo3::PyObject, + default: pyo3::PyObject, + ) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.entry(py, &key)? { + Entry::Occupied(entry) => { + let val = entry.into_value(); + Ok(val.value.clone_ref(py)) + } + Entry::Absent(entry) => { + entry.insert(py, key, default.clone_ref(py))?; + Ok(default) + } + } + } + + fn items(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { + let mut lock = slf.raw.lock(); + let state = lock.observed.get(); + let iter = lock.iter(slf.py()); + + let result = ttlcache_items { + ptr: ObservedIterator::new(slf.as_ptr(), state), + iter: crate::mutex::Mutex::new(iter), + now: std::time::SystemTime::now(), + }; + + pyo3::Py::new(slf.py(), result) + } + + // fn get_index(&self, py: pyo3::Python<'_>, index: usize) -> Option { + // let lock = self.raw.lock(); + + // lock.get_index(index).map(|(key, _)| key.obj.clone_ref(py)) + // } + + fn __getnewargs__(&self) -> (usize,) { + (0,) + } + + // fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { + // let lock = self.raw.lock(); + + // let state = unsafe { + // let list = pyo3::ffi::PyList_New(0); + // if list.is_null() { + // return Err(pyo3::PyErr::fetch(py)); + // } + + // for (hk, val) in lock.entries_iter() { + // let tp = tuple!( + // py, + // 2, + // 0 => hk.obj.clone_ref(py).as_ptr(), + // 1 => val.clone_ref(py).as_ptr(), + // ); + + // if let Err(x) = tp { + // pyo3::ffi::Py_DECREF(list); + // return Err(x); + // } + + // if pyo3::ffi::PyList_Append(list, tp.unwrap_unchecked()) == -1 { + // pyo3::ffi::Py_DECREF(list); + // return Err(pyo3::PyErr::fetch(py)); + // } + // } + + // let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); + // let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity().0); + + // tuple!( + // py, + // 3, + // 0 => maxsize, + // 1 => list, + // 2 => capacity, + // )? + // }; + + // Ok(unsafe { pyo3::Py::from_owned_ptr(py, state) }) + // } + + // pub fn __setstate__(&self, py: pyo3::Python<'_>, state: pyo3::PyObject) -> pyo3::PyResult<()> { + // let mut lock = self.raw.lock(); + // lock.from_pickle(py, state.as_ptr()) + // } + + pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + for value in self.raw.lock().entries_iter() { + visit.call(&value.key.obj)?; + visit.call(&value.value)?; + } + Ok(()) + } + + pub fn __clear__(&self) { + let mut lock = self.raw.lock(); + lock.clear() + } +} + +impl TTLPair { + fn clone_from_pair(py: pyo3::Python<'_>, pair: &crate::policies::ttl::TimeToLivePair) -> Self { + TTLPair { + key: pair.key.obj.clone_ref(py), + value: pair.value.clone_ref(py), + duration: pair.duration(), + } + } +} + +impl From for TTLPair { + fn from(value: crate::policies::ttl::TimeToLivePair) -> Self { + let duration = value.duration(); + + TTLPair { + key: value.key.obj, + value: value.value, + duration, + } + } +} + +#[pyo3::pymethods] +impl TTLPair { + fn key(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyObject { + slf.key.clone_ref(slf.py()) + } + + fn value(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyObject { + slf.value.clone_ref(slf.py()) + } + + fn duration(slf: pyo3::PyRef<'_, Self>) -> f64 { + slf.duration.as_secs_f64() + } +} + +#[pyo3::pymethods] +impl ttlcache_items { + fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { + slf + } + + #[allow(unused_mut)] + fn __next__(mut slf: pyo3::PyRefMut<'_, Self>) -> pyo3::PyResult { + let mut iter = slf.iter.lock(); + + slf.ptr.proceed(slf.py())?; + + let mut element: std::ptr::NonNull; + loop { + element = { + if let Some(x) = iter.next() { + x + } else { + return Err(pyo3::PyErr::new::(())); + } + }; + + if unsafe { element.as_ref().expire_at } > slf.now { + break; + } + } + + Ok(TTLPair::clone_from_pair(slf.py(), unsafe { + element.as_ref() + })) + } +} diff --git a/src/lib.rs b/src/lib.rs index 3afbb93..e3057e1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,6 +24,8 @@ fn _core(py: pyo3::Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/src/policies/fifo.rs b/src/policies/fifo.rs index 5e65a3e..c7a54d2 100644 --- a/src/policies/fifo.rs +++ b/src/policies/fifo.rs @@ -6,7 +6,7 @@ use crate::common::TryFindMethods; use std::collections::VecDeque; -const MAX_N_SHIFT: usize = usize::MAX - (isize::MAX as usize); +pub const MAX_N_SHIFT: usize = usize::MAX - (isize::MAX as usize); pub struct FIFOPolicy { /// We set [Vec] objects indexes in hashtable to make search O(1). hashtable is unordered, @@ -301,7 +301,7 @@ impl FIFOPolicy { Ok(()) } - #[inline(always)] + #[inline] pub fn iter(&self) -> FIFOIterator { let (a, b) = self.entries.as_slices(); @@ -324,7 +324,7 @@ impl FIFOPolicy { let (maxsize, iterable, capacity) = extract_pickle_tuple!(py, state => list); let mut new = Self::new(maxsize, capacity)?; - + for pair in iterable.bind(py).try_iter()? { let (key, value) = pair?.extract::<(pyo3::PyObject, pyo3::PyObject)>()?; @@ -364,7 +364,6 @@ impl<'a> FIFOPolicyOccupied<'a> { #[inline] pub fn remove(self) -> (PreHashObject, pyo3::PyObject) { - // let (PreHashObject { hash, .. }, _) = &self.instance.entries[self.index - self.instance.n_shifts]; let (mut index, _) = unsafe { self.instance.table.remove(self.bucket) }; index -= self.instance.n_shifts; diff --git a/src/policies/lfu.rs b/src/policies/lfu.rs index b7f9a53..4d25137 100644 --- a/src/policies/lfu.rs +++ b/src/policies/lfu.rs @@ -267,7 +267,7 @@ impl LFUPolicy { state: *mut pyo3::ffi::PyObject, ) -> pyo3::PyResult<()> { use pyo3::types::PyAnyMethods; - + unsafe { tuple!(check state, size=3)?; let (maxsize, iterable, capacity) = extract_pickle_tuple!(py, state => list); @@ -282,7 +282,8 @@ impl LFUPolicy { let mut new = Self::new(maxsize, capacity)?; for pair in iterable.bind(py).try_iter()? { - let (key, value, freq) = pair?.extract::<(pyo3::PyObject, pyo3::PyObject, usize)>()?; + let (key, value, freq) = + pair?.extract::<(pyo3::PyObject, pyo3::PyObject, usize)>()?; let hk = PreHashObject::from_pyobject(py, key)?; @@ -293,7 +294,7 @@ impl LFUPolicy { _ => std::hint::unreachable_unchecked(), } } - + new.heap.sort_by(|a, b| a.2.cmp(&b.2)); *self = new; diff --git a/src/policies/mod.rs b/src/policies/mod.rs index 6956bd1..4140d2d 100644 --- a/src/policies/mod.rs +++ b/src/policies/mod.rs @@ -3,3 +3,4 @@ pub mod lfu; pub mod lru; pub mod nopolicy; pub mod random; +pub mod ttl; diff --git a/src/policies/ttl.rs b/src/policies/ttl.rs new file mode 100644 index 0000000..b8c1f84 --- /dev/null +++ b/src/policies/ttl.rs @@ -0,0 +1,563 @@ +use super::fifo::MAX_N_SHIFT; +use crate::common::Entry; +use crate::common::NoLifetimeSliceIter; +use crate::common::Observed; +use crate::common::PreHashObject; +use crate::common::TryFindMethods; + +use std::collections::VecDeque; + +pub struct TTLPolicy { + // See FIFOPolicy to find out fields + table: hashbrown::raw::RawTable, + entries: VecDeque, + maxsize: core::num::NonZeroUsize, + ttl: std::time::Duration, + n_shifts: usize, + pub observed: Observed, +} + +/// A pair representing a key-value entry with a time-to-live (TTL) expiration. +pub struct TimeToLivePair { + pub key: PreHashObject, + pub value: pyo3::PyObject, + pub expire_at: std::time::SystemTime, +} + +pub struct TTLPolicyOccupied<'a> { + instance: &'a mut TTLPolicy, + bucket: hashbrown::raw::Bucket, +} + +/// Represents the possible situations when a key is absent from the TTL policy's data structure. +/// +/// This enum helps track different scenarios during key insertion. +enum AbsentSituation { + /// A valid insertion slot is available + Slot(hashbrown::raw::InsertSlot), + + /// An expired entry's bucket is found + Expired(hashbrown::raw::Bucket), + + /// No suitable slot or expired entry is found + None, +} + +pub struct TTLPolicyAbsent<'a> { + instance: &'a mut TTLPolicy, + situation: AbsentSituation, +} + +pub struct TTLIterator { + first: NoLifetimeSliceIter, + second: NoLifetimeSliceIter, +} + +impl TTLPolicy { + #[inline] + pub fn new(maxsize: usize, mut capacity: usize, secs: f64) -> pyo3::PyResult { + let maxsize = non_zero_or!(maxsize, isize::MAX as usize); + capacity = capacity.min(maxsize.get()); + + Ok(Self { + table: new_table!(capacity)?, + entries: VecDeque::new(), + maxsize, + ttl: std::time::Duration::from_secs_f64(secs), + n_shifts: 0, + observed: Observed::new(), + }) + } + + #[inline] + pub fn maxsize(&self) -> usize { + self.maxsize.get() + } + + #[inline] + pub fn ttl(&self) -> std::time::Duration { + self.ttl + } + + #[inline] + pub fn real_len(&self) -> usize { + let mut c = 0usize; + + for item in &self.entries { + if item.expire_at > std::time::SystemTime::now() { + break; + } + + c += 1; + } + + self.table.len() - c + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.real_len() == 0 + } + + #[inline] + pub fn is_full(&self) -> bool { + self.real_len() == self.maxsize.get() + } + + #[inline] + pub fn capacity(&self) -> (usize, usize) { + (self.table.capacity(), self.entries.capacity()) + } + + #[inline] + fn decrement_indexes(&mut self, start: usize, end: usize) { + if start <= 1 && end == self.entries.len() && self.n_shifts < MAX_N_SHIFT { + self.n_shifts += 1; + return; + } + + if (end - start) > self.table.buckets() / 2 { + unsafe { + for bucket in self.table.iter() { + let i = bucket.as_mut(); + if start <= (*i) - self.n_shifts && (*i) - self.n_shifts < end { + *i -= 1; + } + } + } + } else { + let shifted = self.entries.range(start..end); + for (i, entry) in (start..end).zip(shifted) { + let old = self + .table + .get_mut(entry.key.hash, |x| (*x) - self.n_shifts == i) + .expect("index not found"); + + *old -= 1; + } + } + } + + #[inline] + pub fn expire(&mut self, py: pyo3::Python<'_>) { + while !self.entries.is_empty() { + if self.entries[0].expire_at > std::time::SystemTime::now() { + break; + } + + unsafe { + self.popitem(py).unwrap_unchecked(); + } + } + } + + #[inline] + pub fn popitem(&mut self, py: pyo3::Python<'_>) -> pyo3::PyResult> { + let ret = self.entries.front(); + if ret.is_none() { + return Ok(None); + } + + let ret = unsafe { ret.unwrap_unchecked() }; + + match self.table.try_find(ret.key.hash, |x| { + self.entries[(*x) - self.n_shifts].key.equal(py, &ret.key) + })? { + Some(bucket) => { + unsafe { self.table.remove(bucket) }; + } + None => unreachable!("popitem key not found in table"), + } + + let ret = unsafe { self.entries.pop_front().unwrap_unchecked() }; + + self.observed.change(); + + self.decrement_indexes(1, self.entries.len()); + Ok(Some(ret)) + } + + #[rustfmt::skip] + pub fn entry( + &mut self, + py: pyo3::Python<'_>, + key: &PreHashObject, + ) -> pyo3::PyResult> { + match self + .table + .try_find(key.hash, |x| self.entries[(*x) - self.n_shifts].key.equal(py, key))? + { + Some(bucket) => { + let pair = &self.entries[unsafe { *bucket.as_ptr() } - self.n_shifts]; + + if pair.expire_at > std::time::SystemTime::now() { + Ok(Entry::Occupied(TTLPolicyOccupied { instance: self, bucket })) + } else { + Ok(Entry::Absent(TTLPolicyAbsent { instance: self, situation: AbsentSituation::Expired(bucket) })) + } + } + None => { + Ok( + Entry::Absent(TTLPolicyAbsent { instance: self, situation: AbsentSituation::None }) + ) + }, + } + } + + #[rustfmt::skip] + pub fn entry_with_slot( + &mut self, + py: pyo3::Python<'_>, + key: &PreHashObject, + ) -> pyo3::PyResult> { + match self.table.try_find_or_find_insert_slot( + key.hash, + |x| self.entries[(*x) - self.n_shifts].key.equal(py, key), + |x| self.entries[(*x) - self.n_shifts].key.hash, + )? { + Ok(bucket) => { + let pair = &self.entries[unsafe { *bucket.as_ptr() } - self.n_shifts]; + + if pair.expire_at > std::time::SystemTime::now() { + Ok(Entry::Occupied(TTLPolicyOccupied { instance: self, bucket })) + } else { + Ok(Entry::Absent(TTLPolicyAbsent { instance: self, situation: AbsentSituation::Expired(bucket) })) + } + }, + Err(insert_slot) => { + Ok( + Entry::Absent(TTLPolicyAbsent { instance: self, situation: AbsentSituation::Slot(insert_slot) }) + ) + }, + } + } + + pub fn lookup( + &self, + py: pyo3::Python<'_>, + key: &PreHashObject, + ) -> pyo3::PyResult> { + match self + .table + .try_find(key.hash, |x| { + self.entries[(*x) - self.n_shifts].key.equal(py, key) + })? + .map(|bucket| unsafe { bucket.as_ref() }) + { + Some(index) => { + let pair = &self.entries[(*index) - self.n_shifts]; + + if pair.expire_at > std::time::SystemTime::now() { + Ok(Some(pair)) + } else { + Ok(None) + } + } + None => Ok(None), + } + } + + #[inline] + pub fn clear(&mut self) { + self.table.clear(); + self.entries.clear(); + self.n_shifts = 0; + self.observed.change(); + } + + #[inline] + pub fn shrink_to_fit(&mut self, py: pyo3::Python<'_>) { + self.expire(py); + + self.table.shrink_to(self.table.len(), |x| { + self.entries[(*x) - self.n_shifts].key.hash + }); + self.entries.shrink_to_fit(); + self.observed.change(); + } + + pub fn extend(&mut self, py: pyo3::Python<'_>, iterable: pyo3::PyObject) -> pyo3::PyResult<()> { + use pyo3::types::{PyAnyMethods, PyDictMethods}; + + if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { + let dict = unsafe { + iterable + .downcast_bound::(py) + .unwrap_unchecked() + }; + + for (key, value) in dict.iter() { + let hk = + unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; + + match self.entry_with_slot(py, &hk)? { + Entry::Occupied(mut entry) => { + entry.update(value.unbind())?; + } + Entry::Absent(entry) => { + entry.insert(py, hk, value.unbind())?; + } + } + } + } else { + for pair in iterable.bind(py).try_iter()? { + let (key, value) = pair?.extract::<(pyo3::PyObject, pyo3::PyObject)>()?; + + let hk = PreHashObject::from_pyobject(py, key)?; + + match self.entry_with_slot(py, &hk)? { + Entry::Occupied(mut entry) => { + entry.update(value)?; + } + Entry::Absent(entry) => { + entry.insert(py, hk, value)?; + } + } + } + } + + Ok(()) + } + + #[inline] + pub fn entries_iter(&self) -> std::collections::vec_deque::Iter<'_, TimeToLivePair> { + self.entries.iter() + } + + pub fn equal(&self, py: pyo3::Python<'_>, other: &Self) -> pyo3::PyResult { + if self.maxsize != other.maxsize { + return Ok(false); + } + + if self.real_len() != other.real_len() { + return Ok(false); + } + + let now = std::time::SystemTime::now(); + + unsafe { + for index1 in self.table.iter().map(|x| x.as_ref()) { + let pair1 = &self.entries[(*index1) - self.n_shifts]; + + if pair1.expire_at < now { + continue; + } + + match other.table.try_find(pair1.key.hash, |x| { + pair1 + .key + .equal(py, &other.entries[(*x) - other.n_shifts].key) + })? { + Some(bucket) => { + let pair2 = &other.entries[(*bucket.as_ref()) - other.n_shifts]; + + if pair1.expire_at < now { + return Ok(false); + } + + if !crate::common::pyobject_equal( + py, + pair1.value.as_ptr(), + pair2.value.as_ptr(), + )? { + return Ok(false); + } + } + None => return Ok(false), + } + } + } + + Ok(true) + } + + #[inline] + pub fn iter(&mut self, py: pyo3::Python<'_>) -> TTLIterator { + self.expire(py); + + let (a, b) = self.entries.as_slices(); + + TTLIterator { + first: NoLifetimeSliceIter::new(a), + second: NoLifetimeSliceIter::new(b), + } + } +} + +impl TimeToLivePair { + #[inline] + pub fn new( + key: PreHashObject, + value: pyo3::PyObject, + expire_at: std::time::SystemTime, + ) -> Self { + Self { + key, + value, + expire_at, + } + } + + #[inline] + pub fn duration(&self) -> std::time::Duration { + self.expire_at + .duration_since(std::time::SystemTime::now()) + .unwrap_or_default() + } +} + +impl<'a> TTLPolicyOccupied<'a> { + #[inline] + pub fn update(&mut self, value: pyo3::PyObject) -> pyo3::PyResult { + // We have to move the value to the end of the vector + let mut item = self + .instance + .entries + .remove(unsafe { *self.bucket.as_ptr() } - self.instance.n_shifts) + .unwrap(); + + self.instance.decrement_indexes( + unsafe { *self.bucket.as_ptr() } + 1 - self.instance.n_shifts, + self.instance.entries.len(), + ); + + unsafe { + *self.bucket.as_mut() = self.instance.entries.len() + self.instance.n_shifts; + } + + item.expire_at = std::time::SystemTime::now() + self.instance.ttl; + let old_value = std::mem::replace(&mut item.value, value); + + self.instance.entries.push_back(item); + + // In contrast to all algorithms, in this algorithm we need to change the observed value + // because we moved an element + self.instance.observed.change(); + + Ok(old_value) + } + + #[inline] + pub fn remove(self) -> TimeToLivePair { + // let (PreHashObject { hash, .. }, _) = &self.instance.entries[self.index - self.instance.n_shifts]; + let (mut index, _) = unsafe { self.instance.table.remove(self.bucket) }; + index -= self.instance.n_shifts; + + self.instance + .decrement_indexes(index + 1, self.instance.entries.len()); + + let m = self.instance.entries.remove(index).unwrap(); + + self.instance.observed.change(); + m + } + + #[inline] + pub fn into_value(self) -> &'a mut TimeToLivePair { + let index = unsafe { self.bucket.as_ref() }; + &mut self.instance.entries[index - self.instance.n_shifts] + } +} + +impl TTLPolicyAbsent<'_> { + #[inline] + pub fn insert( + self, + py: pyo3::Python<'_>, + key: PreHashObject, + value: pyo3::PyObject, + ) -> pyo3::PyResult<()> { + let expire_at = std::time::SystemTime::now() + self.instance.ttl; + + match self.situation { + AbsentSituation::Expired(bucket) => { + // This means the key is available but expired + // So we have to move the value to the end of the vector + // and update the bucket ( like TTLPolicyOccupied::update ) + let mut item = self + .instance + .entries + .remove(unsafe { *bucket.as_ptr() } - self.instance.n_shifts) + .unwrap(); + + self.instance.decrement_indexes( + unsafe { *bucket.as_ptr() } + 1 - self.instance.n_shifts, + self.instance.entries.len(), + ); + + unsafe { + *bucket.as_mut() = self.instance.entries.len() + self.instance.n_shifts; + } + + // Actually we don't need to update the key in this situation + item.key = key; + item.value = value; + item.expire_at = expire_at; + + self.instance.entries.push_back(item); + } + AbsentSituation::Slot(slot) => unsafe { + // This means the key is not available and we have insert_slot + // for inserting it + + self.instance.expire(py); // Remove expired pairs to make room for the new pair + + if self.instance.table.len() >= self.instance.maxsize.get() { + self.instance.popitem(py)?; + } + + self.instance.table.insert_in_slot( + key.hash, + slot, + self.instance.entries.len() + self.instance.n_shifts, + ); + + self.instance + .entries + .push_back(TimeToLivePair::new(key, value, expire_at)); + }, + AbsentSituation::None => { + // This is same as AbsentSituation::Slot but we don't have any slot + + self.instance.expire(py); // Remove expired pairs to make room for the new pair + + if self.instance.table.len() >= self.instance.maxsize.get() { + self.instance.popitem(py)?; + } + + self.instance.table.insert( + key.hash, + self.instance.entries.len() + self.instance.n_shifts, + |index| { + self.instance.entries[(*index) - self.instance.n_shifts] + .key + .hash + }, + ); + + self.instance + .entries + .push_back(TimeToLivePair::new(key, value, expire_at)); + } + } + + self.instance.observed.change(); + Ok(()) + } +} + +impl Iterator for TTLIterator { + type Item = std::ptr::NonNull; + + fn next(&mut self) -> Option { + match self.first.next() { + Some(val) => Some(val), + None => { + core::mem::swap(&mut self.first, &mut self.second); + self.first.next() + } + } + } +} + +unsafe impl Send for TTLIterator {} From 325c201cdb43ca44aa0650e57c5a68d63f3a7752 Mon Sep 17 00:00:00 2001 From: awolverp Date: Mon, 14 Apr 2025 12:56:11 +0330 Subject: [PATCH 20/37] Support timedelta as ttl in TTLCache --- python/cachebox/_cachebox.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/python/cachebox/_cachebox.py b/python/cachebox/_cachebox.py index ac9b226..c93675b 100644 --- a/python/cachebox/_cachebox.py +++ b/python/cachebox/_cachebox.py @@ -1,4 +1,5 @@ from . import _core +from datetime import timedelta import typing @@ -1200,11 +1201,17 @@ class TTLCache(BaseCacheImpl[KT, VT]): def __init__( self, maxsize: int, - ttl: float, + ttl: typing.Union[float, timedelta], iterable: typing.Union[typing.Union[dict, typing.Iterable[tuple]], None] = None, *, capacity: int = 0, ) -> None: + if isinstance(ttl, timedelta): + ttl = ttl.total_seconds() + + if ttl <= 0: + raise ValueError("ttl must be a positive number and non-zero") + self._raw = _core.TTLCache(maxsize, ttl, capacity=capacity) if iterable is not None: From 72822dcc902b1c26a1e6c406765f0e34ae38d739 Mon Sep 17 00:00:00 2001 From: awolverp Date: Tue, 15 Apr 2025 17:19:20 +0330 Subject: [PATCH 21/37] Fix and optimize TTLCache --- python/cachebox/_cachebox.py | 273 +++++++++++++++++++++++++++++++---- python/tests/mixin.py | 10 +- python/tests/test_caches.py | 154 ++++++++++++++++++++ src/bridge/ttlcache.rs | 12 +- src/policies/ttl.rs | 65 +++++---- 5 files changed, 445 insertions(+), 69 deletions(-) diff --git a/python/cachebox/_cachebox.py b/python/cachebox/_cachebox.py index c93675b..5b3f5ad 100644 --- a/python/cachebox/_cachebox.py +++ b/python/cachebox/_cachebox.py @@ -140,7 +140,19 @@ def insert(self, key: KT, value: VT) -> typing.Optional[VT]: return self._raw.insert(key, value) def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """Equals to `self[key]`, but returns `default` if the cache don't have this key present.""" + """ + Retrieves the value for a given key from the cache. + + Returns the value associated with the key if present, otherwise returns the specified default value. + Equivalent to `self[key]`, but provides a fallback default if the key is not found. + + Args: + key: The key to look up in the cache. + default: The value to return if the key is not present in the cache. Defaults to None. + + Returns: + The value associated with the key, or the default value if the key is not found. + """ try: return self._raw.get(key) except _core.CoreKeyError: @@ -330,17 +342,36 @@ def is_full(self) -> bool: def insert(self, key: KT, value: VT) -> typing.Optional[VT]: """ - Equals to `self[key] = value`, but returns a value: + Inserts a key-value pair into the cache, returning the previous value if the key existed. - - If the cache did not have this key present, None is returned. - - If the cache did have this key present, the value is updated, - and the old value is returned. The key is not updated, though; + Equivalent to `self[key] = value`, but with additional return value semantics: + + - If the key was not previously in the cache, returns None. + - If the key was already present, updates the value and returns the old value. + The key itself is not modified. + + Args: + key: The key to insert. + value: The value to associate with the key. + + Returns: + The previous value associated with the key, or None if the key was not present. """ return self._raw.insert(key, value) def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: - """ - Equals to `self[key]`, but returns `default` if the cache don't have this key present. + """ " + Retrieves the value for a given key from the cache. + + Returns the value associated with the key if present, otherwise returns the specified default value. + Equivalent to `self[key]`, but provides a fallback default if the key is not found. + + Args: + key: The key to look up in the cache. + default: The value to return if the key is not present in the cache. Defaults to None. + + Returns: + The value associated with the key, or the default value if the key is not found. """ try: return self._raw.get(key) @@ -554,17 +585,36 @@ def is_full(self) -> bool: def insert(self, key: KT, value: VT) -> typing.Optional[VT]: """ - Equals to `self[key] = value`, but returns a value: + Inserts a key-value pair into the cache, returning the previous value if the key existed. - - If the cache did not have this key present, None is returned. - - If the cache did have this key present, the value is updated, - and the old value is returned. The key is not updated, though; + Equivalent to `self[key] = value`, but with additional return value semantics: + + - If the key was not previously in the cache, returns None. + - If the key was already present, updates the value and returns the old value. + The key itself is not modified. + + Args: + key: The key to insert. + value: The value to associate with the key. + + Returns: + The previous value associated with the key, or None if the key was not present. """ return self._raw.insert(key, value) def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: """ - Equals to `self[key]`, but returns `default` if the cache don't have this key present. + Retrieves the value for a given key from the cache. + + Returns the value associated with the key if present, otherwise returns the specified default value. + Equivalent to `self[key]`, but provides a fallback default if the key is not found. + + Args: + key: The key to look up in the cache. + default: The value to return if the key is not present in the cache. Defaults to None. + + Returns: + The value associated with the key, or the default value if the key is not found. """ try: return self._raw.get(key) @@ -778,11 +828,20 @@ def is_full(self) -> bool: def insert(self, key: KT, value: VT) -> typing.Optional[VT]: """ - Equals to `self[key] = value`, but returns a value: + Inserts a key-value pair into the cache, returning the previous value if the key existed. - - If the cache did not have this key present, None is returned. - - If the cache did have this key present, the value is updated, - and the old value is returned. The key is not updated, though; + Equivalent to `self[key] = value`, but with additional return value semantics: + + - If the key was not previously in the cache, returns None. + - If the key was already present, updates the value and returns the old value. + The key itself is not modified. + + Args: + key: The key to insert. + value: The value to associate with the key. + + Returns: + The previous value associated with the key, or None if the key was not present. """ return self._raw.insert(key, value) @@ -797,7 +856,17 @@ def peek(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: """ - Equals to `self[key]`, but returns `default` if the cache don't have this key present. + Retrieves the value for a given key from the cache. + + Returns the value associated with the key if present, otherwise returns the specified default value. + Equivalent to `self[key]`, but provides a fallback default if the key is not found. + + Args: + key: The key to look up in the cache. + default: The value to return if the key is not present in the cache. Defaults to None. + + Returns: + The value associated with the key, or the default value if the key is not found. """ try: return self._raw.get(key) @@ -1009,11 +1078,20 @@ def is_full(self) -> bool: def insert(self, key: KT, value: VT) -> typing.Optional[VT]: """ - Equals to `self[key] = value`, but returns a value: + Inserts a key-value pair into the cache, returning the previous value if the key existed. - - If the cache did not have this key present, None is returned. - - If the cache did have this key present, the value is updated, - and the old value is returned. The key is not updated, though; + Equivalent to `self[key] = value`, but with additional return value semantics: + + - If the key was not previously in the cache, returns None. + - If the key was already present, updates the value and returns the old value. + The key itself is not modified. + + Args: + key: The key to insert. + value: The value to associate with the key. + + Returns: + The previous value associated with the key, or None if the key was not present. """ return self._raw.insert(key, value) @@ -1028,7 +1106,17 @@ def peek(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: """ - Equals to `self[key]`, but returns `default` if the cache don't have this key present. + Retrieves the value for a given key from the cache. + + Returns the value associated with the key if present, otherwise returns the specified default value. + Equivalent to `self[key]`, but provides a fallback default if the key is not found. + + Args: + key: The key to look up in the cache. + default: The value to return if the key is not present in the cache. Defaults to None. + + Returns: + The value associated with the key, or the default value if the key is not found. """ try: return self._raw.get(key) @@ -1193,9 +1281,10 @@ def __repr__(self) -> str: class TTLCache(BaseCacheImpl[KT, VT]): """ - TTL Cache implementation - Time-To-Live Policy (thread-safe). + A thread-safe Time-To-Live (TTL) cache implementation with configurable maximum size and expiration. - In simple terms, the TTL cache will automatically remove the element in the cache that has expired. + This cache automatically removes elements that have expired based on their time-to-live setting. + Supports various operations like insertion, retrieval, and iteration with O(1) complexity. """ def __init__( @@ -1206,6 +1295,18 @@ def __init__( *, capacity: int = 0, ) -> None: + """ + Initialize a new TTL cache instance. + + Args: + maxsize: Maximum number of elements the cache can hold. + ttl: Time-to-live for cache entries, either as seconds or a timedelta. + iterable: Optional initial items to populate the cache, can be a dict or iterable of tuples. + capacity: Optional initial capacity for the underlying cache storage. Defaults to 0. + + Raises: + ValueError: If the time-to-live (ttl) is not a positive number. + """ if isinstance(ttl, timedelta): ttl = ttl.total_seconds() @@ -1249,23 +1350,66 @@ def is_full(self) -> bool: def insert(self, key: KT, value: VT) -> typing.Optional[VT]: """ - Equals to `self[key] = value`, but returns a value: + Inserts a key-value pair into the cache, returning the previous value if the key existed. - - If the cache did not have this key present, None is returned. - - If the cache did have this key present, the value is updated, - and the old value is returned. The key is not updated, though; + Equivalent to `self[key] = value`, but with additional return value semantics: + + - If the key was not previously in the cache, returns None. + - If the key was already present, updates the value and returns the old value. + The key itself is not modified. + + Args: + key: The key to insert. + value: The value to associate with the key. + + Returns: + The previous value associated with the key, or None if the key was not present. """ return self._raw.insert(key, value) def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: """ - Equals to `self[key]`, but returns `default` if the cache don't have this key present. + Retrieves the value for a given key from the cache. + + Returns the value associated with the key if present, otherwise returns the specified default value. + Equivalent to `self[key]`, but provides a fallback default if the key is not found. + + Args: + key: The key to look up in the cache. + default: The value to return if the key is not present in the cache. Defaults to None. + + Returns: + The value associated with the key, or the default value if the key is not found. """ try: return self._raw.get(key).value() except _core.CoreKeyError: return default + def get_with_expire( + self, key: KT, default: typing.Optional[DT] = None + ) -> typing.Tuple[typing.Union[VT, DT], float]: + """ + Retrieves the value and expiration duration for a given key from the cache. + + Returns a tuple containing the value associated with the key and its duration. + If the key is not found, returns the default value and 0.0 duration. + + Args: + key: The key to look up in the cache. + default: The value to return if the key is not present in the cache. Defaults to None. + + Returns: + A tuple of (value, duration), where value is the cached value or default, + and duration is the time-to-live for the key (or 0.0 if not found). + """ + try: + pair = self._raw.get(key) + except _core.CoreKeyError: + return default, 0.0 + else: + return (pair.value(), pair.duration()) + def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: """ Removes specified key and return the corresponding value. If the key is not found, returns the `default`. @@ -1275,6 +1419,29 @@ def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, except _core.CoreKeyError: return default + def pop_with_expire( + self, key: KT, default: typing.Optional[DT] = None + ) -> typing.Tuple[typing.Union[VT, DT], float]: + """ + Removes the specified key from the cache and returns its value and expiration duration. + + If the key is not found, returns the default value and 0.0 duration. + + Args: + key: The key to remove from the cache. + default: The value to return if the key is not present in the cache. Defaults to None. + + Returns: + A tuple of (value, duration), where value is the cached value or default, + and duration is the time-to-live for the key (or 0.0 if not found). + """ + try: + pair = self._raw.remove(key) + except _core.CoreKeyError: + return default, 0.0 + else: + return (pair.value(), pair.duration()) + def setdefault(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: """ Inserts key with a value of default if key is not in the cache. @@ -1292,6 +1459,25 @@ def popitem(self) -> typing.Tuple[KT, VT]: else: return (val.key(), val.value()) + def popitem_with_expire(self) -> typing.Tuple[KT, VT, float]: + """ + Removes and returns the element that has been in the cache the longest, along with its key and expiration duration. + + If the cache is empty, raises a KeyError. + + Returns: + A tuple of (key, value, duration), where: + - key is the key of the removed item + - value is the value of the removed item + - duration is the time-to-live for the removed item + """ + try: + val = self._raw.popitem() + except _core.CoreKeyError: + raise KeyError() from None + else: + return (val.key(), val.value(), val.duration()) + def drain(self, n: int) -> int: # pragma: no cover """Does the `popitem()` `n` times and returns count of removed items.""" if n <= 0: @@ -1351,6 +1537,15 @@ def clear(self, *, reuse: bool = False) -> None: """ self._raw.clear(reuse) + def items_with_expire(self) -> IteratorView[typing.Tuple[KT, VT, float]]: + """ + Returns an iterable object of the cache's items (key-value pairs along with their expiration duration). + + Notes: + - You should not make any changes in cache while using this iterable object. + """ + return IteratorView(self._raw.items(), lambda x: (x.key(), x.value(), x.duration())) + def items(self) -> IteratorView[typing.Tuple[KT, VT]]: """ Returns an iterable object of the cache's items (key-value pairs). @@ -1378,6 +1573,26 @@ def values(self) -> IteratorView[VT]: """ return IteratorView(self._raw.items(), lambda x: x.value()) + def first(self, n: int = 0) -> typing.Optional[KT]: + """ + Returns the first key in cache; this is the one which will be removed by `popitem()` (if n == 0). + + By using `n` parameter, you can browse order index by index. + """ + if n < 0: + n = len(self._raw) + n + + if n < 0: + return None + + return self._raw.get_index(n) + + def last(self) -> typing.Optional[KT]: + """ + Returns the last key in cache. Equals to `self.first(-1)`. + """ + return self._raw.get_index(len(self._raw) - 1) + def __iter__(self) -> IteratorView[KT]: return self.keys() diff --git a/python/tests/mixin.py b/python/tests/mixin.py index c4f4d38..aec8f55 100644 --- a/python/tests/mixin.py +++ b/python/tests/mixin.py @@ -1,4 +1,4 @@ -from cachebox import BaseCacheImpl +from cachebox import BaseCacheImpl, TTLCache import dataclasses import pytest import typing @@ -313,8 +313,12 @@ def test_iterators(self): for key, value in obj.items(): assert obj[key] == value - for key, value in obj.items(): - obj[key] = value * 2 + try: + for key, value in obj.items(): + obj[key] = value * 2 + except RuntimeError: + if not isinstance(obj, TTLCache): + raise with pytest.raises(RuntimeError): for key, value in obj.items(): diff --git a/python/tests/test_caches.py b/python/tests/test_caches.py index 080c93d..08096bc 100644 --- a/python/tests/test_caches.py +++ b/python/tests/test_caches.py @@ -4,9 +4,11 @@ RRCache, LRUCache, LFUCache, + TTLCache, ) import pytest from .mixin import _TestMixin +import time class TestCache(_TestMixin): @@ -248,3 +250,155 @@ def inner(c1, c2): assert list(c1.items()) == list(c2.items()) self._test_pickle(inner) + + +class TestTTLCache(_TestMixin): + CACHE = TTLCache + KWARGS = {"ttl": 10} + + def test_policy(self): + obj = self.CACHE(2, 0.5) + assert obj.ttl == 0.5 + + obj.insert(0, 1) + time.sleep(0.8) + + with pytest.raises(KeyError): + obj[0] + + obj = self.CACHE(2, 20) + + obj.insert(0, 0) + obj.insert(1, 1) + obj.insert(2, 2) + + assert 0 not in obj + assert (1, 1) == obj.popitem() + + def test_update_with_ttl(self): + obj = self.CACHE(2, 0.5) + + # obj.update({1: 1, 2: 2, 3: 3}) + obj.update((i + 1, i + 1) for i in range(3)) + + with pytest.raises(KeyError): + obj[1] + + time.sleep(0.8) + + with pytest.raises(KeyError): + obj[2] + + with pytest.raises(KeyError): + obj[3] + + def test_policy_ttl_no_care(self): + cache = TTLCache(5, 10) + + cache[0] = 0 + cache[1] = 1 + cache[2] = 2 + + assert cache[0] == 0 + assert cache[1] == 1 + + assert cache.popitem() == (0, 0) + + cache[3] = 3 + + assert cache.popitem() == (1, 1) + assert cache.popitem() == (2, 2) + assert cache.popitem() == (3, 3) + + with pytest.raises(KeyError): + cache.popitem() + + for i in range(5): + cache[i] = i + + for i in range(5): + assert i in cache + + cache[10] = 10 + + assert 0 not in cache + assert 10 in cache + + assert cache.popitem() == (1, 1) + + del cache[2] + del cache[3] + del cache[4] + + assert cache.popitem() == (10, 10) + + def test_pickle(self): + def inner(c1, c2): + assert list(c1.items()) == list(c2.items()) + + self._test_pickle(inner) + + def test_first_last(self): + obj = self.CACHE(5, **self.KWARGS, capacity=5) + + for i in range(5): + obj[i] = i * 2 + + assert obj.first() == 0 + assert obj.last() == 4 + + obj[10] = 20 + + assert obj.first() == 1 + assert obj.last() == 10 + + def test_get_with_expire(self): + obj = TTLCache(2, 10) + + obj.insert(1, 1) + time.sleep(0.1) + value, dur = obj.get_with_expire(1) + assert 1 == value + assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur + + value, dur = obj.get_with_expire("no-exists") + assert value is None + assert 0 == dur + + value, dur = obj.get_with_expire("no-exists", "value") + assert "value" == value + assert 0 == dur + + def test_pop_with_expire(self): + obj = TTLCache(2, 10) + + obj.insert(1, 1) + time.sleep(0.1) + value, dur = obj.pop_with_expire(1) + assert 1 == value + assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur + + value, dur = obj.pop_with_expire("no-exists") + assert value is None + assert 0 == dur + + value, dur = obj.pop_with_expire("no-exists", "value") + assert "value" == value + assert 0 == dur + + def test_popitem_with_expire(self): + obj = TTLCache(2, 10) + + obj.insert(1, 1) + obj.insert(2, 2) + time.sleep(0.1) + key, value, dur = obj.popitem_with_expire() + assert (1, 1) == (key, value) + assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur + + key, value, dur = obj.popitem_with_expire() + assert (2, 2) == (key, value) + assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur + + with pytest.raises(KeyError): + obj.popitem_with_expire() diff --git a/src/bridge/ttlcache.rs b/src/bridge/ttlcache.rs index 703095d..8f2cacc 100644 --- a/src/bridge/ttlcache.rs +++ b/src/bridge/ttlcache.rs @@ -227,14 +227,14 @@ impl TTLCache { pyo3::Py::new(slf.py(), result) } - // fn get_index(&self, py: pyo3::Python<'_>, index: usize) -> Option { - // let lock = self.raw.lock(); + fn get_index(&self, py: pyo3::Python<'_>, index: usize) -> Option { + let lock = self.raw.lock(); - // lock.get_index(index).map(|(key, _)| key.obj.clone_ref(py)) - // } + lock.get_index(index).map(|pair| pair.key.obj.clone_ref(py)) + } - fn __getnewargs__(&self) -> (usize,) { - (0,) + fn __getnewargs__(&self) -> (usize, f64) { + (0, 0.0f64) } // fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { diff --git a/src/policies/ttl.rs b/src/policies/ttl.rs index b8c1f84..e8d0028 100644 --- a/src/policies/ttl.rs +++ b/src/policies/ttl.rs @@ -382,6 +382,11 @@ impl TTLPolicy { second: NoLifetimeSliceIter::new(b), } } + + #[inline(always)] + pub fn get_index(&self, n: usize) -> Option<&TimeToLivePair> { + self.entries.get(n) + } } impl TimeToLivePair { @@ -410,28 +415,27 @@ impl<'a> TTLPolicyOccupied<'a> { #[inline] pub fn update(&mut self, value: pyo3::PyObject) -> pyo3::PyResult { // We have to move the value to the end of the vector - let mut item = self - .instance - .entries - .remove(unsafe { *self.bucket.as_ptr() } - self.instance.n_shifts) - .unwrap(); + let (mut index, slot) = unsafe { self.instance.table.remove(self.bucket.clone()) }; + index -= self.instance.n_shifts; - self.instance.decrement_indexes( - unsafe { *self.bucket.as_ptr() } + 1 - self.instance.n_shifts, - self.instance.entries.len(), - ); + self.instance + .decrement_indexes(index + 1, self.instance.entries.len()); - unsafe { - *self.bucket.as_mut() = self.instance.entries.len() + self.instance.n_shifts; - } + let mut item = self.instance.entries.remove(index).unwrap(); item.expire_at = std::time::SystemTime::now() + self.instance.ttl; let old_value = std::mem::replace(&mut item.value, value); - self.instance.entries.push_back(item); + unsafe { + self.instance.table.insert_in_slot( + item.key.hash, + slot, + self.instance.entries.len() + self.instance.n_shifts, + ); + + self.instance.entries.push_back(item); + } - // In contrast to all algorithms, in this algorithm we need to change the observed value - // because we moved an element self.instance.observed.change(); Ok(old_value) @@ -474,27 +478,26 @@ impl TTLPolicyAbsent<'_> { // This means the key is available but expired // So we have to move the value to the end of the vector // and update the bucket ( like TTLPolicyOccupied::update ) - let mut item = self - .instance - .entries - .remove(unsafe { *bucket.as_ptr() } - self.instance.n_shifts) - .unwrap(); + let (mut index, slot) = unsafe { self.instance.table.remove(bucket) }; + index -= self.instance.n_shifts; - self.instance.decrement_indexes( - unsafe { *bucket.as_ptr() } + 1 - self.instance.n_shifts, - self.instance.entries.len(), - ); + self.instance + .decrement_indexes(index + 1, self.instance.entries.len()); - unsafe { - *bucket.as_mut() = self.instance.entries.len() + self.instance.n_shifts; - } + let mut item = self.instance.entries.remove(index).unwrap(); - // Actually we don't need to update the key in this situation - item.key = key; + item.expire_at = std::time::SystemTime::now() + self.instance.ttl; item.value = value; - item.expire_at = expire_at; - self.instance.entries.push_back(item); + unsafe { + self.instance.table.insert_in_slot( + item.key.hash, + slot, + self.instance.entries.len() + self.instance.n_shifts, + ); + + self.instance.entries.push_back(item); + } } AbsentSituation::Slot(slot) => unsafe { // This means the key is not available and we have insert_slot From d716b0f723abc60562f612cac03f812cf276d8dd Mon Sep 17 00:00:00 2001 From: awolverp Date: Tue, 15 Apr 2025 17:34:40 +0330 Subject: [PATCH 22/37] Refactor pickle of TTLCache --- src/bridge/ttlcache.rs | 99 ++++++++++++++++++----------------- src/policies/ttl.rs | 114 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+), 47 deletions(-) diff --git a/src/bridge/ttlcache.rs b/src/bridge/ttlcache.rs index 8f2cacc..1fd668e 100644 --- a/src/bridge/ttlcache.rs +++ b/src/bridge/ttlcache.rs @@ -237,53 +237,58 @@ impl TTLCache { (0, 0.0f64) } - // fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { - // let lock = self.raw.lock(); - - // let state = unsafe { - // let list = pyo3::ffi::PyList_New(0); - // if list.is_null() { - // return Err(pyo3::PyErr::fetch(py)); - // } - - // for (hk, val) in lock.entries_iter() { - // let tp = tuple!( - // py, - // 2, - // 0 => hk.obj.clone_ref(py).as_ptr(), - // 1 => val.clone_ref(py).as_ptr(), - // ); - - // if let Err(x) = tp { - // pyo3::ffi::Py_DECREF(list); - // return Err(x); - // } - - // if pyo3::ffi::PyList_Append(list, tp.unwrap_unchecked()) == -1 { - // pyo3::ffi::Py_DECREF(list); - // return Err(pyo3::PyErr::fetch(py)); - // } - // } - - // let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); - // let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity().0); - - // tuple!( - // py, - // 3, - // 0 => maxsize, - // 1 => list, - // 2 => capacity, - // )? - // }; - - // Ok(unsafe { pyo3::Py::from_owned_ptr(py, state) }) - // } - - // pub fn __setstate__(&self, py: pyo3::Python<'_>, state: pyo3::PyObject) -> pyo3::PyResult<()> { - // let mut lock = self.raw.lock(); - // lock.from_pickle(py, state.as_ptr()) - // } + fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { + let lock = self.raw.lock(); + + let state = unsafe { + let list = pyo3::ffi::PyList_New(0); + if list.is_null() { + return Err(pyo3::PyErr::fetch(py)); + } + + for element in lock.entries_iter() { + let tp = tuple!( + py, + 3, + 0 => element.key.obj.clone_ref(py).as_ptr(), + 1 => element.value.clone_ref(py).as_ptr(), + 2 => pyo3::ffi::PyFloat_FromDouble( + element.expire_at.duration_since(std::time::UNIX_EPOCH).unwrap_unchecked().as_secs_f64() + ), + ); + + if let Err(x) = tp { + pyo3::ffi::Py_DECREF(list); + return Err(x); + } + + if pyo3::ffi::PyList_Append(list, tp.unwrap_unchecked()) == -1 { + pyo3::ffi::Py_DECREF(list); + return Err(pyo3::PyErr::fetch(py)); + } + } + + let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); + let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity().0); + let ttl = pyo3::ffi::PyFloat_FromDouble(lock.ttl().as_secs_f64()); + + tuple!( + py, + 4, + 0 => maxsize, + 1 => list, + 2 => capacity, + 3 => ttl, + )? + }; + + Ok(unsafe { pyo3::Py::from_owned_ptr(py, state) }) + } + + pub fn __setstate__(&self, py: pyo3::Python<'_>, state: pyo3::PyObject) -> pyo3::PyResult<()> { + let mut lock = self.raw.lock(); + lock.from_pickle(py, state.as_ptr()) + } pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { for value in self.raw.lock().entries_iter() { diff --git a/src/policies/ttl.rs b/src/policies/ttl.rs index e8d0028..1506637 100644 --- a/src/policies/ttl.rs +++ b/src/policies/ttl.rs @@ -387,6 +387,83 @@ impl TTLPolicy { pub fn get_index(&self, n: usize) -> Option<&TimeToLivePair> { self.entries.get(n) } + + #[allow(clippy::wrong_self_convention)] + pub fn from_pickle( + &mut self, + py: pyo3::Python<'_>, + state: *mut pyo3::ffi::PyObject, + ) -> pyo3::PyResult<()> { + use pyo3::types::PyAnyMethods; + + unsafe { + tuple!(check state, size=4)?; + let (maxsize, iterable, capacity) = extract_pickle_tuple!(py, state => list); + + // SAFETY: we check `iterable` type in `extract_pickle_tuple` macro + if maxsize < (pyo3::ffi::PyObject_Size(iterable.as_ptr()) as usize) { + return Err(pyo3::PyErr::new::( + "the iterable object size is more than maxsize!", + )); + } + + let ttl = { + let obj = pyo3::ffi::PyTuple_GetItem(state, 3); + pyo3::ffi::PyFloat_AsDouble(obj) + }; + + let mut new = Self::new(maxsize, capacity, ttl)?; + + for pair in iterable.bind(py).try_iter()? { + let (key, value, timestamp) = + pair?.extract::<(pyo3::PyObject, pyo3::PyObject, f64)>()?; + + let hk = PreHashObject::from_pyobject(py, key)?; + + match new.entry_with_slot(py, &hk)? { + Entry::Absent(entry) => { + entry.pickle_insert( + hk, + value, + std::time::UNIX_EPOCH + std::time::Duration::from_secs_f64(timestamp), + )?; + } + _ => std::hint::unreachable_unchecked(), + } + } + + new.expire(py); + new.shrink_to_fit(py); + + *self = new; + Ok(()) + } + + // use pyo3::types::PyAnyMethods; + + // unsafe { + // tuple!(check state, size=3)?; + // let (maxsize, iterable, capacity) = extract_pickle_tuple!(py, state => list); + + // let mut new = Self::new(maxsize, capacity)?; + + // for pair in iterable.bind(py).try_iter()? { + // let (key, value) = pair?.extract::<(pyo3::PyObject, pyo3::PyObject)>()?; + + // let hk = PreHashObject::from_pyobject(py, key)?; + + // match new.entry_with_slot(py, &hk)? { + // Entry::Absent(entry) => { + // entry.insert(py, hk, value)?; + // } + // _ => std::hint::unreachable_unchecked(), + // } + // } + + // *self = new; + // Ok(()) + // } + } } impl TimeToLivePair { @@ -464,6 +541,43 @@ impl<'a> TTLPolicyOccupied<'a> { } impl TTLPolicyAbsent<'_> { + #[inline] + unsafe fn pickle_insert( + self, + key: PreHashObject, + value: pyo3::PyObject, + expire_at: std::time::SystemTime, + ) -> pyo3::PyResult<()> { + match self.situation { + AbsentSituation::Expired(_) => { + return Err(pyo3::PyErr::new::( + "pikcle object is suspicious!", + )) + } + AbsentSituation::Slot(slot) => unsafe { + // This means the key is not available and we have insert_slot + // for inserting it + + // We don't need to check maxsize, we sure `len(iterable) <= maxsize` in loading pickle + + self.instance.table.insert_in_slot( + key.hash, + slot, + self.instance.entries.len() + self.instance.n_shifts, + ); + + self.instance + .entries + .push_back(TimeToLivePair::new(key, value, expire_at)); + }, + AbsentSituation::None => unreachable!("this should never happen"), + } + + // We don't need change observed value here + // self.instance.observed.change(); + Ok(()) + } + #[inline] pub fn insert( self, From e8510d88a25b5d0adcddd1b9ff7e194fb4e4ea23 Mon Sep 17 00:00:00 2001 From: awolverp Date: Wed, 16 Apr 2025 15:41:25 +0330 Subject: [PATCH 23/37] Update TTLCache --- python/cachebox/_cachebox.py | 8 ++++---- src/bridge/ttlcache.rs | 8 ++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/python/cachebox/_cachebox.py b/python/cachebox/_cachebox.py index 5b3f5ad..f9251db 100644 --- a/python/cachebox/_cachebox.py +++ b/python/cachebox/_cachebox.py @@ -1457,7 +1457,7 @@ def popitem(self) -> typing.Tuple[KT, VT]: except _core.CoreKeyError: raise KeyError() from None else: - return (val.key(), val.value()) + return val.pack2() def popitem_with_expire(self) -> typing.Tuple[KT, VT, float]: """ @@ -1476,7 +1476,7 @@ def popitem_with_expire(self) -> typing.Tuple[KT, VT, float]: except _core.CoreKeyError: raise KeyError() from None else: - return (val.key(), val.value(), val.duration()) + return val.pack3() def drain(self, n: int) -> int: # pragma: no cover """Does the `popitem()` `n` times and returns count of removed items.""" @@ -1544,7 +1544,7 @@ def items_with_expire(self) -> IteratorView[typing.Tuple[KT, VT, float]]: Notes: - You should not make any changes in cache while using this iterable object. """ - return IteratorView(self._raw.items(), lambda x: (x.key(), x.value(), x.duration())) + return IteratorView(self._raw.items(), lambda x: x.pack3()) def items(self) -> IteratorView[typing.Tuple[KT, VT]]: """ @@ -1553,7 +1553,7 @@ def items(self) -> IteratorView[typing.Tuple[KT, VT]]: Notes: - You should not make any changes in cache while using this iterable object. """ - return IteratorView(self._raw.items(), lambda x: (x.key(), x.value())) + return IteratorView(self._raw.items(), lambda x: x.pack2()) def keys(self) -> IteratorView[KT]: """ diff --git a/src/bridge/ttlcache.rs b/src/bridge/ttlcache.rs index 1fd668e..7b968ef 100644 --- a/src/bridge/ttlcache.rs +++ b/src/bridge/ttlcache.rs @@ -339,6 +339,14 @@ impl TTLPair { fn duration(slf: pyo3::PyRef<'_, Self>) -> f64 { slf.duration.as_secs_f64() } + + fn pack2(slf: pyo3::PyRef<'_, Self>) -> (pyo3::PyObject, pyo3::PyObject) { + (slf.key.clone_ref(slf.py()), slf.value.clone_ref(slf.py())) + } + + fn pack3(slf: pyo3::PyRef<'_, Self>) -> (pyo3::PyObject, pyo3::PyObject, f64) { + (slf.key.clone_ref(slf.py()), slf.value.clone_ref(slf.py()), slf.duration.as_secs_f64()) + } } #[pyo3::pymethods] From b983b54f147e9f64b4f8cbf1d6904654d541f399 Mon Sep 17 00:00:00 2001 From: awolverp Date: Wed, 16 Apr 2025 16:06:00 +0330 Subject: [PATCH 24/37] Clean code --- python/tests/mixin.py | 2 +- src/bridge/ttlcache.rs | 24 ++++--- src/common.rs | 146 ++++++++++++++++++++++++----------------- src/policies/ttl.rs | 57 +++++----------- 4 files changed, 117 insertions(+), 112 deletions(-) diff --git a/python/tests/mixin.py b/python/tests/mixin.py index aec8f55..4c6fafb 100644 --- a/python/tests/mixin.py +++ b/python/tests/mixin.py @@ -396,7 +396,7 @@ def _test_pickle(self, check_order: typing.Callable): c1[9] c2 = pickle.loads(pickle.dumps(c1)) - assert c1 == c2 + assert c1 == c2, f"{c1} - {c2}" assert c1.capacity() == c2.capacity() check_order(c1, c2) diff --git a/src/bridge/ttlcache.rs b/src/bridge/ttlcache.rs index 7b968ef..f89a51b 100644 --- a/src/bridge/ttlcache.rs +++ b/src/bridge/ttlcache.rs @@ -1,6 +1,7 @@ use crate::common::Entry; use crate::common::ObservedIterator; use crate::common::PreHashObject; +use crate::common::TimeToLivePair; #[pyo3::pyclass(module = "cachebox._core", frozen, subclass)] pub struct TTLCache { @@ -253,7 +254,8 @@ impl TTLCache { 0 => element.key.obj.clone_ref(py).as_ptr(), 1 => element.value.clone_ref(py).as_ptr(), 2 => pyo3::ffi::PyFloat_FromDouble( - element.expire_at.duration_since(std::time::UNIX_EPOCH).unwrap_unchecked().as_secs_f64() + element.expire_at.unwrap_unchecked() + .duration_since(std::time::UNIX_EPOCH).unwrap_unchecked().as_secs_f64() ), ); @@ -305,18 +307,18 @@ impl TTLCache { } impl TTLPair { - fn clone_from_pair(py: pyo3::Python<'_>, pair: &crate::policies::ttl::TimeToLivePair) -> Self { + fn clone_from_pair(py: pyo3::Python<'_>, pair: &TimeToLivePair) -> Self { TTLPair { key: pair.key.obj.clone_ref(py), value: pair.value.clone_ref(py), - duration: pair.duration(), + duration: unsafe { pair.duration().unwrap_unchecked() }, } } } -impl From for TTLPair { - fn from(value: crate::policies::ttl::TimeToLivePair) -> Self { - let duration = value.duration(); +impl From for TTLPair { + fn from(value: TimeToLivePair) -> Self { + let duration = unsafe { value.duration().unwrap_unchecked() }; TTLPair { key: value.key.obj, @@ -345,7 +347,11 @@ impl TTLPair { } fn pack3(slf: pyo3::PyRef<'_, Self>) -> (pyo3::PyObject, pyo3::PyObject, f64) { - (slf.key.clone_ref(slf.py()), slf.value.clone_ref(slf.py()), slf.duration.as_secs_f64()) + ( + slf.key.clone_ref(slf.py()), + slf.value.clone_ref(slf.py()), + slf.duration.as_secs_f64(), + ) } } @@ -361,7 +367,7 @@ impl ttlcache_items { slf.ptr.proceed(slf.py())?; - let mut element: std::ptr::NonNull; + let mut element: std::ptr::NonNull; loop { element = { if let Some(x) = iter.next() { @@ -371,7 +377,7 @@ impl ttlcache_items { } }; - if unsafe { element.as_ref().expire_at } > slf.now { + if unsafe { !element.as_ref().is_expired(slf.now) } { break; } } diff --git a/src/common.rs b/src/common.rs index 368bac3..74608c0 100644 --- a/src/common.rs +++ b/src/common.rs @@ -1,32 +1,6 @@ -#[inline] -pub fn pyobject_equal( - py: pyo3::Python<'_>, - arg1: *mut pyo3::ffi::PyObject, - arg2: *mut pyo3::ffi::PyObject, -) -> pyo3::PyResult { - unsafe { - if std::ptr::eq(arg1, arg2) { - return Ok(true); - } - - let boolean = pyo3::ffi::PyObject_RichCompareBool(arg1, arg2, pyo3::ffi::Py_EQ); - - if boolean < 0 { - Err(pyo3::PyErr::take(py).unwrap_unchecked()) - } else { - Ok(boolean == 1) - } - } -} - -#[rustfmt::skip] macro_rules! non_zero_or { ($num:expr, $_else:expr) => { - unsafe { - core::num::NonZeroUsize::new_unchecked( - if $num == 0 { $_else } else { $num } - ) - } + unsafe { core::num::NonZeroUsize::new_unchecked(if $num == 0 { $_else } else { $num }) } }; } @@ -151,6 +125,27 @@ macro_rules! extract_pickle_tuple { }}; } +#[inline] +pub fn pyobject_equal( + py: pyo3::Python<'_>, + arg1: *mut pyo3::ffi::PyObject, + arg2: *mut pyo3::ffi::PyObject, +) -> pyo3::PyResult { + unsafe { + if std::ptr::eq(arg1, arg2) { + return Ok(true); + } + + let boolean = pyo3::ffi::PyObject_RichCompareBool(arg1, arg2, pyo3::ffi::Py_EQ); + + if boolean < 0 { + Err(pyo3::PyErr::take(py).unwrap_unchecked()) + } else { + Ok(boolean == 1) + } + } +} + /// Converts an isize value to a u64 value, mapping negative values to the upper half of the u64 range. /// /// This function ensures a bijective mapping between isize and u64, preserving the order of values @@ -175,6 +170,38 @@ pub struct PreHashObject { pub hash: u64, } +/// A view into a single entry in a table, which may either be absent or occupied. +/// +/// This is common in policies and will be used by `entry(...)` methods of them. +pub enum Entry { + Occupied(O), + Absent(V), +} + +/// Observe caches' changes +#[derive(Debug)] +pub struct Observed(u16); + +/// Checks the [`Observed`] on iterators +#[derive(Debug)] +pub struct ObservedIterator { + pub ptr: core::ptr::NonNull, + pub statepoint: u16, +} + +pub struct NoLifetimeSliceIter { + pub pointer: std::ptr::NonNull, + pub index: usize, + pub len: usize, +} + +/// A pair representing a key-value entry with a time-to-live (TTL) expiration. +pub struct TimeToLivePair { + pub key: PreHashObject, + pub value: pyo3::PyObject, + pub expire_at: Option, +} + impl PreHashObject { /// Creates a new [`PreHashObject`] pub fn new(obj: pyo3::PyObject, hash: u64) -> Self { @@ -208,23 +235,6 @@ impl std::fmt::Debug for PreHashObject { } } -/// A view into a single entry in a table, which may either be absent or occupied. -/// -/// This is common in policies and will be used by `entry(...)` methods of them. -pub enum Entry { - Occupied(O), - Absent(V), -} - -// impl Entry { -// pub fn map(self, f: impl FnOnce(O) -> T) -> Option { -// match self { -// Entry::Occupied(c) => Some(f(c)), -// Entry::Absent(_) => None, -// } -// } -// } - /// A trait for adding `try_find` and `try_find_entry` methods to [`hashbrown::HashTable`] pub trait TryFindMethods { /// Searches for an element in the table. @@ -299,10 +309,6 @@ impl TryFindMethods for hashbrown::raw::RawTable { } } -/// Observe caches' changes -#[derive(Debug)] -pub struct Observed(u16); - impl Observed { pub fn new() -> Self { Self(0) @@ -362,13 +368,6 @@ unsafe fn _get_state(py: pyo3::Python<'_>, ptr: *mut pyo3::ffi::PyObject) -> pyo Ok(c as u16) } -/// Checks the [`Observed`] on iterators -#[derive(Debug)] -pub struct ObservedIterator { - pub ptr: core::ptr::NonNull, - pub statepoint: u16, -} - impl ObservedIterator { pub fn new(ptr: *mut pyo3::ffi::PyObject, state: u16) -> Self { unsafe { @@ -405,12 +404,6 @@ impl Drop for ObservedIterator { unsafe impl Send for ObservedIterator {} unsafe impl Sync for ObservedIterator {} -pub struct NoLifetimeSliceIter { - pub pointer: std::ptr::NonNull, - pub index: usize, - pub len: usize, -} - impl NoLifetimeSliceIter { #[inline] pub fn new(slice: &[T]) -> Self { @@ -437,3 +430,34 @@ impl Iterator for NoLifetimeSliceIter { } } } + +impl TimeToLivePair { + #[inline] + pub fn new( + key: PreHashObject, + value: pyo3::PyObject, + expire_at: Option, + ) -> Self { + Self { + key, + value, + expire_at, + } + } + + #[inline] + pub fn duration(&self) -> Option { + self.expire_at.map(|x| { + x.duration_since(std::time::SystemTime::now()) + .unwrap_or_default() + }) + } + + #[inline(always)] + pub fn is_expired(&self, now: std::time::SystemTime) -> bool { + match self.expire_at { + Some(x) => x < now, + None => false, + } + } +} diff --git a/src/policies/ttl.rs b/src/policies/ttl.rs index 1506637..8e15a39 100644 --- a/src/policies/ttl.rs +++ b/src/policies/ttl.rs @@ -3,6 +3,7 @@ use crate::common::Entry; use crate::common::NoLifetimeSliceIter; use crate::common::Observed; use crate::common::PreHashObject; +use crate::common::TimeToLivePair; use crate::common::TryFindMethods; use std::collections::VecDeque; @@ -17,13 +18,6 @@ pub struct TTLPolicy { pub observed: Observed, } -/// A pair representing a key-value entry with a time-to-live (TTL) expiration. -pub struct TimeToLivePair { - pub key: PreHashObject, - pub value: pyo3::PyObject, - pub expire_at: std::time::SystemTime, -} - pub struct TTLPolicyOccupied<'a> { instance: &'a mut TTLPolicy, bucket: hashbrown::raw::Bucket, @@ -81,10 +75,11 @@ impl TTLPolicy { #[inline] pub fn real_len(&self) -> usize { + let now = std::time::SystemTime::now(); let mut c = 0usize; for item in &self.entries { - if item.expire_at > std::time::SystemTime::now() { + if !item.is_expired(now) { break; } @@ -140,8 +135,10 @@ impl TTLPolicy { #[inline] pub fn expire(&mut self, py: pyo3::Python<'_>) { + let now = std::time::SystemTime::now(); + while !self.entries.is_empty() { - if self.entries[0].expire_at > std::time::SystemTime::now() { + if !self.entries[0].is_expired(now) { break; } @@ -190,7 +187,7 @@ impl TTLPolicy { Some(bucket) => { let pair = &self.entries[unsafe { *bucket.as_ptr() } - self.n_shifts]; - if pair.expire_at > std::time::SystemTime::now() { + if !pair.is_expired(std::time::SystemTime::now()) { Ok(Entry::Occupied(TTLPolicyOccupied { instance: self, bucket })) } else { Ok(Entry::Absent(TTLPolicyAbsent { instance: self, situation: AbsentSituation::Expired(bucket) })) @@ -218,7 +215,7 @@ impl TTLPolicy { Ok(bucket) => { let pair = &self.entries[unsafe { *bucket.as_ptr() } - self.n_shifts]; - if pair.expire_at > std::time::SystemTime::now() { + if !pair.is_expired(std::time::SystemTime::now()) { Ok(Entry::Occupied(TTLPolicyOccupied { instance: self, bucket })) } else { Ok(Entry::Absent(TTLPolicyAbsent { instance: self, situation: AbsentSituation::Expired(bucket) })) @@ -247,7 +244,7 @@ impl TTLPolicy { Some(index) => { let pair = &self.entries[(*index) - self.n_shifts]; - if pair.expire_at > std::time::SystemTime::now() { + if !pair.is_expired(std::time::SystemTime::now()) { Ok(Some(pair)) } else { Ok(None) @@ -339,7 +336,7 @@ impl TTLPolicy { for index1 in self.table.iter().map(|x| x.as_ref()) { let pair1 = &self.entries[(*index1) - self.n_shifts]; - if pair1.expire_at < now { + if pair1.is_expired(now) { continue; } @@ -351,7 +348,7 @@ impl TTLPolicy { Some(bucket) => { let pair2 = &other.entries[(*bucket.as_ref()) - other.n_shifts]; - if pair1.expire_at < now { + if pair2.is_expired(now) { return Ok(false); } @@ -466,28 +463,6 @@ impl TTLPolicy { } } -impl TimeToLivePair { - #[inline] - pub fn new( - key: PreHashObject, - value: pyo3::PyObject, - expire_at: std::time::SystemTime, - ) -> Self { - Self { - key, - value, - expire_at, - } - } - - #[inline] - pub fn duration(&self) -> std::time::Duration { - self.expire_at - .duration_since(std::time::SystemTime::now()) - .unwrap_or_default() - } -} - impl<'a> TTLPolicyOccupied<'a> { #[inline] pub fn update(&mut self, value: pyo3::PyObject) -> pyo3::PyResult { @@ -500,7 +475,7 @@ impl<'a> TTLPolicyOccupied<'a> { let mut item = self.instance.entries.remove(index).unwrap(); - item.expire_at = std::time::SystemTime::now() + self.instance.ttl; + item.expire_at = Some(std::time::SystemTime::now() + self.instance.ttl); let old_value = std::mem::replace(&mut item.value, value); unsafe { @@ -568,7 +543,7 @@ impl TTLPolicyAbsent<'_> { self.instance .entries - .push_back(TimeToLivePair::new(key, value, expire_at)); + .push_back(TimeToLivePair::new(key, value, Some(expire_at))); }, AbsentSituation::None => unreachable!("this should never happen"), } @@ -600,7 +575,7 @@ impl TTLPolicyAbsent<'_> { let mut item = self.instance.entries.remove(index).unwrap(); - item.expire_at = std::time::SystemTime::now() + self.instance.ttl; + item.expire_at = Some(expire_at); item.value = value; unsafe { @@ -631,7 +606,7 @@ impl TTLPolicyAbsent<'_> { self.instance .entries - .push_back(TimeToLivePair::new(key, value, expire_at)); + .push_back(TimeToLivePair::new(key, value, Some(expire_at))); }, AbsentSituation::None => { // This is same as AbsentSituation::Slot but we don't have any slot @@ -654,7 +629,7 @@ impl TTLPolicyAbsent<'_> { self.instance .entries - .push_back(TimeToLivePair::new(key, value, expire_at)); + .push_back(TimeToLivePair::new(key, value, Some(expire_at))); } } From 325902cf46f0f322389f4a00d96fb08b28c68a19 Mon Sep 17 00:00:00 2001 From: awolverp Date: Wed, 16 Apr 2025 17:56:09 +0330 Subject: [PATCH 25/37] Optimize TTLCache and add a new method `expire()` --- python/cachebox/_cachebox.py | 9 +++++++++ src/common.rs | 14 ++++++++++++++ src/policies/ttl.rs | 21 ++++----------------- 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/python/cachebox/_cachebox.py b/python/cachebox/_cachebox.py index f9251db..defc0d0 100644 --- a/python/cachebox/_cachebox.py +++ b/python/cachebox/_cachebox.py @@ -1593,6 +1593,15 @@ def last(self) -> typing.Optional[KT]: """ return self._raw.get_index(len(self._raw) - 1) + def expire(self) -> None: + """ + Manually removes expired key-value pairs from memory and releases their memory. + + Notes: + - This operation is typically automatic and does not require manual invocation. + """ + self._raw.expire() + def __iter__(self) -> IteratorView[KT]: return self.keys() diff --git a/src/common.rs b/src/common.rs index 74608c0..4852afb 100644 --- a/src/common.rs +++ b/src/common.rs @@ -202,6 +202,20 @@ pub struct TimeToLivePair { pub expire_at: Option, } +/// Represents the possible situations when a key is absent in VTTL or TTL policy's data structure. +/// +/// This enum helps track different scenarios during key insertion. +pub enum AbsentSituation { + /// A valid insertion slot is available + Slot(hashbrown::raw::InsertSlot), + + /// An expired entry's bucket is found + Expired(hashbrown::raw::Bucket), + + /// No suitable slot or expired entry is found + None, +} + impl PreHashObject { /// Creates a new [`PreHashObject`] pub fn new(obj: pyo3::PyObject, hash: u64) -> Self { diff --git a/src/policies/ttl.rs b/src/policies/ttl.rs index 8e15a39..d1d4c01 100644 --- a/src/policies/ttl.rs +++ b/src/policies/ttl.rs @@ -5,6 +5,7 @@ use crate::common::Observed; use crate::common::PreHashObject; use crate::common::TimeToLivePair; use crate::common::TryFindMethods; +use crate::common::AbsentSituation; use std::collections::VecDeque; @@ -23,23 +24,9 @@ pub struct TTLPolicyOccupied<'a> { bucket: hashbrown::raw::Bucket, } -/// Represents the possible situations when a key is absent from the TTL policy's data structure. -/// -/// This enum helps track different scenarios during key insertion. -enum AbsentSituation { - /// A valid insertion slot is available - Slot(hashbrown::raw::InsertSlot), - - /// An expired entry's bucket is found - Expired(hashbrown::raw::Bucket), - - /// No suitable slot or expired entry is found - None, -} - pub struct TTLPolicyAbsent<'a> { instance: &'a mut TTLPolicy, - situation: AbsentSituation, + situation: AbsentSituation, } pub struct TTLIterator { @@ -137,8 +124,8 @@ impl TTLPolicy { pub fn expire(&mut self, py: pyo3::Python<'_>) { let now = std::time::SystemTime::now(); - while !self.entries.is_empty() { - if !self.entries[0].is_expired(now) { + while let Some(e) = self.entries.front() { + if !e.is_expired(now) { break; } From 914f75484d8bc6270fc459f26fb70c56f9a5f675 Mon Sep 17 00:00:00 2001 From: awolverp Date: Wed, 16 Apr 2025 18:27:55 +0330 Subject: [PATCH 26/37] * Write vttl policy base - not tested * Remove subclass flag of core classes * Set __slots__ for cache classes --- python/cachebox/_cachebox.py | 14 +- src/bridge/cache.rs | 2 +- src/bridge/fifocache.rs | 2 +- src/bridge/lfucache.rs | 2 +- src/bridge/lrucache.rs | 2 +- src/bridge/rrcache.rs | 2 +- src/bridge/ttlcache.rs | 9 +- src/policies/lfu.rs | 1 + src/policies/mod.rs | 1 + src/policies/ttl.rs | 3 +- src/policies/vttl.rs | 377 +++++++++++++++++++++++++++++++++++ 11 files changed, 405 insertions(+), 10 deletions(-) create mode 100644 src/policies/vttl.rs diff --git a/python/cachebox/_cachebox.py b/python/cachebox/_cachebox.py index defc0d0..fd8bbee 100644 --- a/python/cachebox/_cachebox.py +++ b/python/cachebox/_cachebox.py @@ -77,6 +77,8 @@ class Cache(BaseCacheImpl[KT, VT]): and provides dictionary-like access with additional cache-specific operations. """ + __slots__ = ("_raw",) + def __init__( self, maxsize: int, @@ -293,6 +295,8 @@ class FIFOCache(BaseCacheImpl[KT, VT]): - Allows optional initial data population """ + __slots__ = ("_raw",) + def __init__( self, maxsize: int, @@ -533,6 +537,8 @@ class RRCache(BaseCacheImpl[KT, VT]): Supports operations like insertion, retrieval, deletion, and iteration with O(1) complexity. """ + __slots__ = ("_raw",) + def __init__( self, maxsize: int, @@ -776,6 +782,8 @@ class LRUCache(BaseCacheImpl[KT, VT]): - Supports initialization from dictionaries or iterables """ + __slots__ = ("_raw",) + def __init__( self, maxsize: int, @@ -1028,6 +1036,8 @@ class LFUCache(BaseCacheImpl[KT, VT]): - Provides methods for key-value management similar to dict """ + __slots__ = ("_raw",) + def __init__( self, maxsize: int, @@ -1287,6 +1297,8 @@ class TTLCache(BaseCacheImpl[KT, VT]): Supports various operations like insertion, retrieval, and iteration with O(1) complexity. """ + __slots__ = ("_raw",) + def __init__( self, maxsize: int, @@ -1596,7 +1608,7 @@ def last(self) -> typing.Optional[KT]: def expire(self) -> None: """ Manually removes expired key-value pairs from memory and releases their memory. - + Notes: - This operation is typically automatic and does not require manual invocation. """ diff --git a/src/bridge/cache.rs b/src/bridge/cache.rs index 1b59aaa..227f1e3 100644 --- a/src/bridge/cache.rs +++ b/src/bridge/cache.rs @@ -2,7 +2,7 @@ use crate::common::Entry; use crate::common::ObservedIterator; use crate::common::PreHashObject; -#[pyo3::pyclass(module = "cachebox._core", frozen, subclass)] +#[pyo3::pyclass(module = "cachebox._core", frozen)] pub struct Cache { raw: crate::mutex::Mutex, } diff --git a/src/bridge/fifocache.rs b/src/bridge/fifocache.rs index 8ee688f..0877aaf 100644 --- a/src/bridge/fifocache.rs +++ b/src/bridge/fifocache.rs @@ -2,7 +2,7 @@ use crate::common::Entry; use crate::common::ObservedIterator; use crate::common::PreHashObject; -#[pyo3::pyclass(module = "cachebox._core", frozen, subclass)] +#[pyo3::pyclass(module = "cachebox._core", frozen)] pub struct FIFOCache { raw: crate::mutex::Mutex, } diff --git a/src/bridge/lfucache.rs b/src/bridge/lfucache.rs index 0894d54..b7401c6 100644 --- a/src/bridge/lfucache.rs +++ b/src/bridge/lfucache.rs @@ -2,7 +2,7 @@ use crate::common::Entry; use crate::common::ObservedIterator; use crate::common::PreHashObject; -#[pyo3::pyclass(module = "cachebox._core", frozen, subclass)] +#[pyo3::pyclass(module = "cachebox._core", frozen)] pub struct LFUCache { raw: crate::mutex::Mutex, } diff --git a/src/bridge/lrucache.rs b/src/bridge/lrucache.rs index 05f625f..a857eba 100644 --- a/src/bridge/lrucache.rs +++ b/src/bridge/lrucache.rs @@ -2,7 +2,7 @@ use crate::common::Entry; use crate::common::ObservedIterator; use crate::common::PreHashObject; -#[pyo3::pyclass(module = "cachebox._core", frozen, subclass)] +#[pyo3::pyclass(module = "cachebox._core", frozen)] pub struct LRUCache { raw: crate::mutex::Mutex, } diff --git a/src/bridge/rrcache.rs b/src/bridge/rrcache.rs index e8b51b5..ebc26c6 100644 --- a/src/bridge/rrcache.rs +++ b/src/bridge/rrcache.rs @@ -3,7 +3,7 @@ use crate::common::Entry; use crate::common::ObservedIterator; use crate::common::PreHashObject; -#[pyo3::pyclass(module = "cachebox._core", frozen, subclass)] +#[pyo3::pyclass(module = "cachebox._core", frozen)] pub struct RRCache { raw: crate::mutex::Mutex, } diff --git a/src/bridge/ttlcache.rs b/src/bridge/ttlcache.rs index f89a51b..ed64a15 100644 --- a/src/bridge/ttlcache.rs +++ b/src/bridge/ttlcache.rs @@ -3,7 +3,7 @@ use crate::common::ObservedIterator; use crate::common::PreHashObject; use crate::common::TimeToLivePair; -#[pyo3::pyclass(module = "cachebox._core", frozen, subclass)] +#[pyo3::pyclass(module = "cachebox._core", frozen)] pub struct TTLCache { raw: crate::mutex::Mutex, } @@ -230,10 +230,15 @@ impl TTLCache { fn get_index(&self, py: pyo3::Python<'_>, index: usize) -> Option { let lock = self.raw.lock(); - lock.get_index(index).map(|pair| pair.key.obj.clone_ref(py)) } + fn expire(&self, py: pyo3::Python<'_>) { + let mut lock = self.raw.lock(); + lock.expire(py); + lock.shrink_to_fit(py); + } + fn __getnewargs__(&self) -> (usize, f64) { (0, 0.0f64) } diff --git a/src/policies/lfu.rs b/src/policies/lfu.rs index 4d25137..bd989d1 100644 --- a/src/policies/lfu.rs +++ b/src/policies/lfu.rs @@ -248,6 +248,7 @@ impl LFUPolicy { Ok(()) } + #[inline] pub fn iter(&mut self) -> LFUIterator { self.heap.iter(|a, b| a.2.cmp(&b.2)) } diff --git a/src/policies/mod.rs b/src/policies/mod.rs index 4140d2d..2945250 100644 --- a/src/policies/mod.rs +++ b/src/policies/mod.rs @@ -4,3 +4,4 @@ pub mod lru; pub mod nopolicy; pub mod random; pub mod ttl; +pub mod vttl; diff --git a/src/policies/ttl.rs b/src/policies/ttl.rs index d1d4c01..d2aaae5 100644 --- a/src/policies/ttl.rs +++ b/src/policies/ttl.rs @@ -1,11 +1,11 @@ use super::fifo::MAX_N_SHIFT; +use crate::common::AbsentSituation; use crate::common::Entry; use crate::common::NoLifetimeSliceIter; use crate::common::Observed; use crate::common::PreHashObject; use crate::common::TimeToLivePair; use crate::common::TryFindMethods; -use crate::common::AbsentSituation; use std::collections::VecDeque; @@ -482,7 +482,6 @@ impl<'a> TTLPolicyOccupied<'a> { #[inline] pub fn remove(self) -> TimeToLivePair { - // let (PreHashObject { hash, .. }, _) = &self.instance.entries[self.index - self.instance.n_shifts]; let (mut index, _) = unsafe { self.instance.table.remove(self.bucket) }; index -= self.instance.n_shifts; diff --git a/src/policies/vttl.rs b/src/policies/vttl.rs new file mode 100644 index 0000000..ae12ac3 --- /dev/null +++ b/src/policies/vttl.rs @@ -0,0 +1,377 @@ +use crate::common::AbsentSituation; +use crate::common::Entry; +use crate::common::Observed; +use crate::common::PreHashObject; +use crate::common::TimeToLivePair; +use crate::common::TryFindMethods; +use crate::lazyheap; + +use std::ptr::NonNull; + +macro_rules! compare_fn { + () => { + |a, b| { + if a.expire_at.is_none() && b.expire_at.is_none() { + return std::cmp::Ordering::Equal; + } else if b.expire_at.is_none() { + return std::cmp::Ordering::Less; + } else if a.expire_at.is_none() { + return std::cmp::Ordering::Greater; + } + + a.expire_at.cmp(&b.expire_at) + } + }; +} + +pub struct VTTLPolicy { + table: hashbrown::raw::RawTable>, + heap: lazyheap::LazyHeap, + maxsize: std::num::NonZeroUsize, + pub observed: Observed, +} + +pub struct VTTLPolicyOccupied<'a> { + instance: &'a mut VTTLPolicy, + bucket: hashbrown::raw::Bucket>, +} + +pub struct VTTLPolicyAbsent<'a> { + instance: &'a mut VTTLPolicy, + situation: AbsentSituation>, +} + +pub type VTTLIterator = lazyheap::Iter; + +impl VTTLPolicy { + #[inline] + pub fn new(maxsize: usize, mut capacity: usize) -> pyo3::PyResult { + let maxsize = non_zero_or!(maxsize, isize::MAX as usize); + capacity = capacity.min(maxsize.get()); + + Ok(Self { + table: new_table!(capacity)?, + heap: lazyheap::LazyHeap::new(), + maxsize, + observed: Observed::new(), + }) + } + + #[inline] + pub fn maxsize(&self) -> usize { + self.maxsize.get() + } + + #[inline] + pub fn real_len(&mut self) -> usize { + self.expire(); + self.table.len() + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.table.is_empty() + } + + #[inline] + pub fn is_full(&self) -> bool { + self.table.len() == self.maxsize.get() + } + + #[inline] + pub fn capacity(&self) -> usize { + self.table.capacity() + } + + #[inline] + pub fn expire(&mut self) { + self.heap.sort_by(compare_fn!()); + + let now = std::time::SystemTime::now(); + + while let Some(x) = self.heap.front() { + if unsafe { !x.as_ref().is_expired(now) } { + break; + } + + unsafe { + self.table + .remove_entry(x.as_ref().key.hash, |x| { + std::ptr::eq(x.as_ptr(), x.as_ptr()) + }) + .unwrap(); + } + + self.heap.pop_front(compare_fn!()); + self.observed.change(); + } + } + + pub fn popitem(&mut self) -> Option { + self.heap.sort_by(compare_fn!()); + + let front = self.heap.front()?; + + unsafe { + self.table + .remove_entry(front.as_ref().key.hash, |x| { + std::ptr::eq(x.as_ptr(), front.as_ptr()) + }) + .unwrap(); + } + + self.observed.change(); + Some(self.heap.pop_front(compare_fn!()).unwrap()) + } + + #[rustfmt::skip] + pub fn entry( + &mut self, + py: pyo3::Python<'_>, + key: &PreHashObject, + ) -> pyo3::PyResult> { + match self + .table + .try_find(key.hash, |ptr| unsafe { ptr.as_ref().key.equal(py, key) })? + { + Some(bucket) => unsafe { + let pair = bucket.as_ref(); + + if !pair.as_ref().is_expired(std::time::SystemTime::now()) { + Ok(Entry::Occupied(VTTLPolicyOccupied { instance: self, bucket })) + } else { + Ok(Entry::Absent(VTTLPolicyAbsent { instance: self, situation: AbsentSituation::Expired(bucket) })) + } + } + None => { + Ok( + Entry::Absent(VTTLPolicyAbsent { instance: self, situation: AbsentSituation::None }) + ) + }, + } + } + + #[rustfmt::skip] + pub fn entry_with_slot( + &mut self, + py: pyo3::Python<'_>, + key: &PreHashObject, + ) -> pyo3::PyResult> { + match self + .table + .try_find_or_find_insert_slot( + key.hash, + |ptr| unsafe { ptr.as_ref().key.equal(py, key) }, + |ptr| unsafe { ptr.as_ref().key.hash }, + )? { + Ok(bucket) => unsafe { + let pair = bucket.as_ref(); + + if !pair.as_ref().is_expired(std::time::SystemTime::now()) { + Ok(Entry::Occupied(VTTLPolicyOccupied { instance: self, bucket })) + } else { + Ok(Entry::Absent(VTTLPolicyAbsent { instance: self, situation: AbsentSituation::Expired(bucket) })) + } + } + Err(slot) => { + Ok( + Entry::Absent(VTTLPolicyAbsent { instance: self, situation: AbsentSituation::Slot(slot) }) + ) + }, + } + } + + pub fn lookup( + &self, + py: pyo3::Python<'_>, + key: &PreHashObject, + ) -> pyo3::PyResult> { + match self + .table + .try_find(key.hash, |ptr| unsafe { ptr.as_ref().key.equal(py, key) })? + .map(|bucket| unsafe { bucket.as_ref() }) + { + Some(pair) => unsafe { + if !pair.as_ref().is_expired(std::time::SystemTime::now()) { + Ok(Some(pair.as_ref())) + } else { + Ok(None) + } + }, + None => Ok(None), + } + } + + #[inline] + pub fn clear(&mut self) { + self.table.clear(); + self.heap.clear(); + self.observed.change(); + } + + #[inline] + pub fn shrink_to_fit(&mut self) { + self.table + .shrink_to(self.table.len(), |x| unsafe { x.as_ref().key.hash }); + + self.heap.shrink_to_fit(); + self.observed.change(); + } + + #[inline] + pub fn iter(&mut self) -> VTTLIterator { + self.heap.iter(compare_fn!()) + } + + pub fn equal(&mut self, py: pyo3::Python<'_>, other: &mut Self) -> pyo3::PyResult { + if self.maxsize != other.maxsize { + return Ok(false); + } + + if self.real_len() != other.real_len() { + return Ok(false); + } + + unsafe { + for node in self.table.iter().map(|x| x.as_ref()) { + let pair1 = node.as_ref(); + + // NOTE: there's no need to check if the pair is expired + // because we already expired all expired pairs by using real_len method + + match other + .table + .try_find(pair1.key.hash, |x| pair1.key.equal(py, &x.as_ref().key))? + { + Some(bucket) => { + let pair2 = bucket.as_ref().as_ref(); + + if !crate::common::pyobject_equal( + py, + pair1.value.as_ptr(), + pair2.value.as_ptr(), + )? { + return Ok(false); + } + } + None => return Ok(false), + } + } + } + + Ok(true) + } +} + +impl VTTLPolicyOccupied<'_> { + #[inline] + pub fn update( + &mut self, + value: pyo3::PyObject, + ttl: Option, + ) -> pyo3::PyResult { + let item = unsafe { self.bucket.as_mut() }; + + unsafe { + item.as_mut().expire_at = + ttl.map(|x| std::time::SystemTime::now() + std::time::Duration::from_secs_f64(x)); + } + self.instance.heap.queue_sort(); + + // In update we don't need to change this; because this does not change the memory address ranges + // self.instance.observed.change(); + + Ok(unsafe { std::mem::replace(&mut item.as_mut().value, value) }) + } + + #[inline] + pub fn remove(self) -> TimeToLivePair { + let (item, _) = unsafe { self.instance.table.remove(self.bucket) }; + let item = self.instance.heap.remove(item, compare_fn!()); + + self.instance.observed.change(); + item + } + + #[inline] + pub fn into_value(self) -> NonNull { + let item = unsafe { self.bucket.as_mut() }; + *item + } +} + +impl VTTLPolicyAbsent<'_> { + #[inline] + pub fn insert( + self, + key: PreHashObject, + value: pyo3::PyObject, + ttl: Option, + ) -> pyo3::PyResult<()> { + let expire_at = + ttl.map(|x| std::time::SystemTime::now() + std::time::Duration::from_secs_f64(x)); + + match self.situation { + AbsentSituation::Expired(bucket) => { + // This means the key is available but expired + // So we have to update the values of the old key + // and queue the heap's sort + let item = unsafe { bucket.as_mut() }; + + unsafe { + item.as_mut().expire_at = ttl.map(|x| { + std::time::SystemTime::now() + std::time::Duration::from_secs_f64(x) + }); + item.as_mut().value = value; + } + + self.instance.heap.queue_sort(); + + // Like VTTLPolicyOccupied::update, Here we don't need to change this + // self.instance.observed.change(); + } + AbsentSituation::Slot(slot) => { + self.instance.expire(); // Remove expired pairs to make room for the new pair + + if self.instance.table.len() >= self.instance.maxsize.get() { + self.instance.popitem(); + } + + let hash = key.hash; + let node = self + .instance + .heap + .push(TimeToLivePair::new(key, value, expire_at)); + + unsafe { + self.instance.table.insert_in_slot(hash, slot, node); + } + + self.instance.observed.change(); + } + AbsentSituation::None => { + self.instance.expire(); // Remove expired pairs to make room for the new pair + + if self.instance.table.len() >= self.instance.maxsize.get() { + self.instance.popitem(); + } + + let hash = key.hash; + let node = self + .instance + .heap + .push(TimeToLivePair::new(key, value, expire_at)); + + self.instance + .table + .insert(hash, node, |x| unsafe { x.as_ref().key.hash }); + + self.instance.observed.change(); + } + } + + Ok(()) + } +} + +unsafe impl Send for VTTLPolicy {} From 0e836a290f589d46eb22347a6c5c79f00fda6a94 Mon Sep 17 00:00:00 2001 From: awolverp Date: Thu, 17 Apr 2025 13:10:46 +0330 Subject: [PATCH 27/37] * Refactor and test the VTTLCache * Do some runtime optimizations * Add some new methods to VTTLCache: `expire`, `items_with_expire`, `items` * Optimize update methods --- Cargo.toml | 2 +- python/cachebox/__init__.py | 1 + python/cachebox/_cachebox.py | 378 ++++++++++++++++++++++++++++++++++- python/tests/test_caches.py | 145 ++++++++++++++ src/bridge/cache.rs | 2 +- src/bridge/fifocache.rs | 2 +- src/bridge/lfucache.rs | 2 +- src/bridge/lrucache.rs | 2 +- src/bridge/mod.rs | 57 ++++++ src/bridge/rrcache.rs | 2 +- src/bridge/ttlcache.rs | 74 +------ src/bridge/vttlcache.rs | 343 +++++++++++++++++++++++++++++++ src/common.rs | 16 +- src/lazyheap.rs | 11 +- src/lib.rs | 3 +- src/linked_list.rs | 7 +- src/policies/fifo.rs | 20 +- src/policies/lfu.rs | 20 +- src/policies/lru.rs | 19 +- src/policies/nopolicy.rs | 18 +- src/policies/random.rs | 19 +- src/policies/ttl.rs | 51 +---- src/policies/vttl.rs | 152 ++++++++++++-- 23 files changed, 1145 insertions(+), 201 deletions(-) create mode 100644 src/bridge/vttlcache.rs diff --git a/Cargo.toml b/Cargo.toml index 65f4659..481c13a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ crate-type = ["cdylib"] codegen-units = 1 debug = false incremental = false -lto = "fat" +lto = true panic = "abort" strip = "symbols" diff --git a/python/cachebox/__init__.py b/python/cachebox/__init__.py index 9698b5f..cc980f9 100644 --- a/python/cachebox/__init__.py +++ b/python/cachebox/__init__.py @@ -10,6 +10,7 @@ LRUCache as LRUCache, LFUCache as LFUCache, TTLCache as TTLCache, + VTTLCache as VTTLCache, BaseCacheImpl as BaseCacheImpl, IteratorView as IteratorView, ) diff --git a/python/cachebox/_cachebox.py b/python/cachebox/_cachebox.py index fd8bbee..e792c46 100644 --- a/python/cachebox/_cachebox.py +++ b/python/cachebox/_cachebox.py @@ -1,5 +1,5 @@ from . import _core -from datetime import timedelta +from datetime import timedelta, datetime import typing @@ -1628,3 +1628,379 @@ def __repr__(self) -> str: self._raw.ttl(), _items_to_str(self.items(), len(self._raw)), ) + + +class VTTLCache(BaseCacheImpl[KT, VT]): + """ + A thread-safe, time-to-live (TTL) cache implementation with per-key expiration policy. + + This cache allows storing key-value pairs with optional expiration times. When an item expires, + it is automatically removed from the cache. The cache supports a maximum size and provides + various methods for inserting, retrieving, and managing cached items. + + Key features: + - Per-key time-to-live (TTL) support + - Configurable maximum cache size + - Thread-safe operations + - Automatic expiration of items + + Supports dictionary-like operations such as get, insert, update, and iteration. + """ + + __slots__ = ("_raw",) + + def __init__( + self, + maxsize: int, + iterable: typing.Union[typing.Union[dict, typing.Iterable[tuple]], None] = None, + ttl: typing.Union[float, timedelta, datetime, None] = None, # This is not a global TTL! + *, + capacity: int = 0, + ) -> None: + """ + Initialize a new VTTLCache instance. + + Args: + maxsize (int): Maximum size of the cache. Zero indicates unlimited size. + iterable (dict or Iterable[tuple], optional): Initial data to populate the cache. + ttl (float or timedelta or datetime, optional): Time-to-live duration for `iterable` items. + capacity (int, optional): Preallocated capacity for the cache to minimize reallocations. + + Raises: + ValueError: If provided TTL is zero or negative. + """ + self._raw = _core.VTTLCache(maxsize, capacity=capacity) + + if iterable is not None: + self.update(iterable, ttl) + + @property + def maxsize(self) -> int: + return self._raw.maxsize() + + def capacity(self) -> int: + """Returns the number of elements the map can hold without reallocating.""" + return self._raw.capacity() + + def __len__(self) -> int: + return len(self._raw) + + def __sizeof__(self): # pragma: no cover + return self._raw.__sizeof__() + + def __contains__(self, key: KT) -> bool: + return key in self._raw + + def __bool__(self) -> bool: + return not self.is_empty() + + def is_empty(self) -> bool: + return self._raw.is_empty() + + def is_full(self) -> bool: + return self._raw.is_full() + + def insert( + self, key: KT, value: VT, ttl: typing.Union[float, timedelta, datetime, None] = None + ) -> typing.Optional[VT]: + """ + Insert a key-value pair into the cache with an optional time-to-live (TTL). + Returns the previous value associated with the key, if it existed. + + Args: + key (KT): The key to insert. + value (VT): The value to associate with the key. + ttl (float or timedelta or datetime, optional): Time-to-live duration for the item. + If a timedelta or datetime is provided, it will be converted to seconds. + + Raises: + ValueError: If the provided TTL is zero or negative. + """ + if ttl is not None: + if isinstance(ttl, timedelta): + ttl = ttl.total_seconds() + + if isinstance(ttl, datetime): + ttl = (ttl - datetime.now()).total_seconds() + + if ttl <= 0: + raise ValueError("ttl must be positive and non-zero") + + return self._raw.insert(key, value, ttl) + + def get(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + """ + Retrieves the value for a given key from the cache. + + Returns the value associated with the key if present, otherwise returns the specified default value. + Equivalent to `self[key]`, but provides a fallback default if the key is not found. + + Args: + key: The key to look up in the cache. + default: The value to return if the key is not present in the cache. Defaults to None. + + Returns: + The value associated with the key, or the default value if the key is not found. + """ + try: + return self._raw.get(key).value() + except _core.CoreKeyError: + return default + + def get_with_expire( + self, key: KT, default: typing.Optional[DT] = None + ) -> typing.Tuple[typing.Union[VT, DT], float]: + """ + Retrieves the value and expiration duration for a given key from the cache. + + Returns a tuple containing the value associated with the key and its duration. + If the key is not found, returns the default value and 0.0 duration. + + Args: + key: The key to look up in the cache. + default: The value to return if the key is not present in the cache. Defaults to None. + + Returns: + A tuple of (value, duration), where value is the cached value or default, + and duration is the time-to-live for the key (or 0.0 if not found). + """ + try: + pair = self._raw.get(key) + except _core.CoreKeyError: + return default, 0.0 + else: + return (pair.value(), pair.duration()) + + def pop(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + """ + Removes specified key and return the corresponding value. If the key is not found, returns the `default`. + """ + try: + return self._raw.remove(key).value() + except _core.CoreKeyError: + return default + + def pop_with_expire( + self, key: KT, default: typing.Optional[DT] = None + ) -> typing.Tuple[typing.Union[VT, DT], float]: + """ + Removes the specified key from the cache and returns its value and expiration duration. + + If the key is not found, returns the default value and 0.0 duration. + + Args: + key: The key to remove from the cache. + default: The value to return if the key is not present in the cache. Defaults to None. + + Returns: + A tuple of (value, duration), where value is the cached value or default, + and duration is the time-to-live for the key (or 0.0 if not found). + """ + try: + pair = self._raw.remove(key) + except _core.CoreKeyError: + return default, 0.0 + else: + return (pair.value(), pair.duration()) + + def setdefault( + self, + key: KT, + default: typing.Optional[DT] = None, + ttl: typing.Union[float, timedelta, datetime, None] = None, + ) -> typing.Union[VT, DT]: + """ + Inserts a key-value pair into the cache with an optional time-to-live (TTL). + + If the key is not in the cache, it will be inserted with the default value. + If the key already exists, its current value is returned. + + Args: + key: The key to insert or retrieve from the cache. + default: The value to insert if the key is not present. Defaults to None. + ttl: Optional time-to-live for the key. Can be a float (seconds), timedelta, or datetime. + If not specified, the key will not expire. + + Returns: + The value associated with the key, either existing or the default value. + + Raises: + ValueError: If the provided TTL is not a positive value. + """ + if ttl is not None: + if isinstance(ttl, timedelta): + ttl = ttl.total_seconds() + + if isinstance(ttl, datetime): + ttl = (ttl - datetime.now()).total_seconds() + + if ttl <= 0: + raise ValueError("ttl must be positive and non-zero") + + return self._raw.setdefault(key, default, ttl) + + def popitem(self) -> typing.Tuple[KT, VT]: + """ + Removes and returns the key-value pair that is closest to expiration. + + Returns: + A tuple containing the key and value of the removed item. + + Raises: + KeyError: If the cache is empty. + """ + try: + val = self._raw.popitem() + except _core.CoreKeyError: + raise KeyError() from None + else: + return val.pack2() + + def popitem_with_expire(self) -> typing.Tuple[KT, VT, float]: + """ + Removes and returns the key-value pair that is closest to expiration, along with its expiration duration. + + Returns: + A tuple containing the key, value, and expiration duration of the removed item. + + Raises: + KeyError: If the cache is empty. + """ + try: + val = self._raw.popitem() + except _core.CoreKeyError: + raise KeyError() from None + else: + return val.pack3() + + def drain(self, n: int) -> int: # pragma: no cover + """Does the `popitem()` `n` times and returns count of removed items.""" + if n <= 0: + return 0 + + for i in range(n): + try: + self._raw.popitem() + except _core.CoreKeyError: + return i + + return i + + def update( + self, + iterable: typing.Union[dict, typing.Iterable[tuple]], + ttl: typing.Union[float, timedelta, datetime, None] = None, + ) -> None: + """Updates the cache with elements from a dictionary or an iterable object of key/value pairs.""" + if hasattr(iterable, "items"): + iterable = iterable.items() + + if ttl is not None: + if isinstance(ttl, timedelta): + ttl = ttl.total_seconds() + + if isinstance(ttl, datetime): + ttl = (ttl - datetime.now()).total_seconds() + + if ttl <= 0: + raise ValueError("ttl must be positive and non-zero") + + self._raw.update(iterable, ttl) + + def __setitem__(self, key: KT, value: VT) -> None: + self.insert(key, value, None) + + def __getitem__(self, key: KT) -> VT: + try: + return self._raw.get(key).value() + except _core.CoreKeyError: + raise KeyError(key) from None + + def __delitem__(self, key: KT) -> None: + try: + self._raw.remove(key) + except _core.CoreKeyError: + raise KeyError(key) from None + + def __eq__(self, other) -> bool: + if not isinstance(other, VTTLCache): + return False # pragma: no cover + + return self._raw == other._raw + + def __ne__(self, other) -> bool: + if not isinstance(other, VTTLCache): + return False # pragma: no cover + + return self._raw != other._raw + + def shrink_to_fit(self) -> None: + """Shrinks the cache to fit len(self) elements.""" + self._raw.shrink_to_fit() + + def clear(self, *, reuse: bool = False) -> None: + """ + Removes all items from cache. + + If reuse is True, will not free the memory for reusing in the future. + """ + self._raw.clear(reuse) + + def items_with_expire(self) -> IteratorView[typing.Tuple[KT, VT, float]]: + """ + Returns an iterable object of the cache's items (key-value pairs along with their expiration duration). + + Notes: + - You should not make any changes in cache while using this iterable object. + """ + return IteratorView(self._raw.items(), lambda x: x.pack3()) + + def items(self) -> IteratorView[typing.Tuple[KT, VT]]: + """ + Returns an iterable object of the cache's items (key-value pairs). + + Notes: + - You should not make any changes in cache while using this iterable object. + """ + return IteratorView(self._raw.items(), lambda x: x.pack2()) + + def keys(self) -> IteratorView[KT]: + """ + Returns an iterable object of the cache's keys. + + Notes: + - You should not make any changes in cache while using this iterable object. + """ + return IteratorView(self._raw.items(), lambda x: x.key()) + + def values(self) -> IteratorView[VT]: + """ + Returns an iterable object of the cache's values. + + Notes: + - You should not make any changes in cache while using this iterable object. + """ + return IteratorView(self._raw.items(), lambda x: x.value()) + + def expire(self) -> None: + """ + Manually removes expired key-value pairs from memory and releases their memory. + + Notes: + - This operation is typically automatic and does not require manual invocation. + """ + self._raw.expire() + + def __iter__(self) -> IteratorView[KT]: + return self.keys() + + def __repr__(self) -> str: + cls = type(self) + + return "%s.%s[%d/%d](%s)" % ( + cls.__module__, + cls.__name__, + len(self._raw), + self._raw.maxsize(), + _items_to_str(self.items(), len(self._raw)), + ) diff --git a/python/tests/test_caches.py b/python/tests/test_caches.py index 08096bc..135712e 100644 --- a/python/tests/test_caches.py +++ b/python/tests/test_caches.py @@ -5,6 +5,7 @@ LRUCache, LFUCache, TTLCache, + VTTLCache, ) import pytest from .mixin import _TestMixin @@ -402,3 +403,147 @@ def test_popitem_with_expire(self): with pytest.raises(KeyError): obj.popitem_with_expire() + + +class TestVTTLCache(_TestMixin): + CACHE = VTTLCache + + def test_policy(self): + obj = VTTLCache(2) + + obj.insert(0, 1, 0.5) + time.sleep(0.501) + + with pytest.raises(KeyError): + obj[0] + + obj.insert("name", "nick", 0.3) + obj.insert("age", 18, None) + time.sleep(0.301) + + with pytest.raises(KeyError): + obj["name"] + + del obj["age"] + + obj.insert(0, 0, 70) + obj.insert(1, 1, 60) + obj.insert(2, 2, 90) + + assert 1 not in obj + assert (0, 0) == obj.popitem() + + def test_update_with_ttl(self): + obj = VTTLCache(3) + + obj.update({1: 1, 2: 2, 3: 3}, 0.5) + time.sleep(0.501) + + with pytest.raises(KeyError): + obj[1] + + with pytest.raises(KeyError): + obj[2] + + with pytest.raises(KeyError): + obj[3] + + def test_get_with_expire(self): + obj = VTTLCache(2) + + obj.insert(1, 1, 10) + time.sleep(0.1) + value, dur = obj.get_with_expire(1) + assert 1 == value + assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur + + value, dur = obj.get_with_expire("no-exists") + assert value is None + assert 0 == dur + + value, dur = obj.get_with_expire("no-exists", "value") + assert "value" == value + assert 0 == dur + + def test_pop_with_expire(self): + obj = VTTLCache(2) + + obj.insert(1, 1, 10) + time.sleep(0.1) + value, dur = obj.pop_with_expire(1) + assert 1 == value + assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur + + value, dur = obj.pop_with_expire("no-exists") + assert value is None + assert 0 == dur + + value, dur = obj.pop_with_expire("no-exists", "value") + assert "value" == value + assert 0 == dur + + def test_popitem_with_expire(self): + obj = VTTLCache(2) + + obj.insert(1, 1, 10) + obj.insert(2, 2, 6) + time.sleep(0.1) + key, value, dur = obj.popitem_with_expire() + assert (2, 2) == (key, value) + assert 6 > dur > 5, "6 > dur > 5 failed [dur: %f]" % dur + + key, value, dur = obj.popitem_with_expire() + assert (1, 1) == (key, value) + assert 10 > dur > 9, "10 > dur > 9 failed [dur: %f]" % dur + + with pytest.raises(KeyError): + obj.popitem_with_expire() + + def test_pickle(self): + def inner(c1, c2): + assert list(c1.items()) == list(c2.items()) + + import pickle + import tempfile + + c1 = self.CACHE(maxsize=0, **self.KWARGS) + c2 = pickle.loads(pickle.dumps(c1)) + assert c1 == c2 + assert c1.capacity() == c2.capacity() + + c1 = self.CACHE(maxsize=100, **self.KWARGS) + + for i in range(10): + c1.insert(i, i * 2, i + 2) + + c2 = pickle.loads(pickle.dumps(c1)) + assert c1 == c2 + assert c1.capacity() == c2.capacity() + inner(c1, c2) + + with tempfile.TemporaryFile("w+b") as fd: + c1 = self.CACHE(maxsize=100, **self.KWARGS) + c1.update({i: i for i in range(10)}) + + for i in range(10): + c1.insert(i, i * 2, i + 2) + + pickle.dump(c1, fd) + fd.seek(0) + c2 = pickle.load(fd) + assert c1 == c2 + assert c1.capacity() == c2.capacity() + inner(c1, c2) + + c1 = self.CACHE(maxsize=100, **self.KWARGS) + + for i in range(10): + c1.insert(i, i * 2, i + 0.5) + + time.sleep(0.51) + + c2 = pickle.loads(pickle.dumps(c1)) + + assert len(c2) == len(c1) + assert c1.capacity() == c2.capacity() + inner(c1, c2) diff --git a/src/bridge/cache.rs b/src/bridge/cache.rs index 227f1e3..a066567 100644 --- a/src/bridge/cache.rs +++ b/src/bridge/cache.rs @@ -77,7 +77,7 @@ impl Cache { let mut lock = self.raw.lock(); match lock.entry_with_slot(py, &key)? { - Entry::Occupied(mut entry) => Ok(Some(entry.update(value)?)), + Entry::Occupied(entry) => Ok(Some(entry.update(value)?)), Entry::Absent(entry) => { entry.insert(key, value)?; Ok(None) diff --git a/src/bridge/fifocache.rs b/src/bridge/fifocache.rs index 0877aaf..8ac8b0d 100644 --- a/src/bridge/fifocache.rs +++ b/src/bridge/fifocache.rs @@ -81,7 +81,7 @@ impl FIFOCache { let mut lock = self.raw.lock(); match lock.entry_with_slot(py, &key)? { - Entry::Occupied(mut entry) => Ok(Some(entry.update(value)?)), + Entry::Occupied(entry) => Ok(Some(entry.update(value)?)), Entry::Absent(entry) => { entry.insert(py, key, value)?; Ok(None) diff --git a/src/bridge/lfucache.rs b/src/bridge/lfucache.rs index b7401c6..eed90ec 100644 --- a/src/bridge/lfucache.rs +++ b/src/bridge/lfucache.rs @@ -80,7 +80,7 @@ impl LFUCache { let mut lock = self.raw.lock(); match lock.entry_with_slot(py, &key)? { - Entry::Occupied(mut entry) => Ok(Some(entry.update(value)?)), + Entry::Occupied(entry) => Ok(Some(entry.update(value)?)), Entry::Absent(entry) => { entry.insert(key, value, freq)?; Ok(None) diff --git a/src/bridge/lrucache.rs b/src/bridge/lrucache.rs index a857eba..70c7020 100644 --- a/src/bridge/lrucache.rs +++ b/src/bridge/lrucache.rs @@ -78,7 +78,7 @@ impl LRUCache { let mut lock = self.raw.lock(); match lock.entry_with_slot(py, &key)? { - Entry::Occupied(mut entry) => Ok(Some(entry.update(value)?)), + Entry::Occupied(entry) => Ok(Some(entry.update(value)?)), Entry::Absent(entry) => { entry.insert(key, value)?; Ok(None) diff --git a/src/bridge/mod.rs b/src/bridge/mod.rs index 2e37db4..3a39df7 100644 --- a/src/bridge/mod.rs +++ b/src/bridge/mod.rs @@ -2,9 +2,66 @@ use pyo3::create_exception; create_exception!(cachebox._core, CoreKeyError, pyo3::exceptions::PyException); +#[pyo3::pyclass(module = "cachebox._core", frozen)] +pub struct TTLPair { + key: pyo3::PyObject, + value: pyo3::PyObject, + duration: std::time::Duration, +} + +impl TTLPair { + fn clone_from_pair(py: pyo3::Python<'_>, pair: &crate::common::TimeToLivePair) -> Self { + TTLPair { + key: pair.key.obj.clone_ref(py), + value: pair.value.clone_ref(py), + duration: pair.duration().unwrap_or_default(), + } + } +} + +impl From for TTLPair { + fn from(value: crate::common::TimeToLivePair) -> Self { + let duration = value.duration().unwrap_or_default(); + + TTLPair { + key: value.key.obj, + value: value.value, + duration, + } + } +} + +#[pyo3::pymethods] +impl TTLPair { + fn key(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyObject { + slf.key.clone_ref(slf.py()) + } + + fn value(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyObject { + slf.value.clone_ref(slf.py()) + } + + fn duration(slf: pyo3::PyRef<'_, Self>) -> f64 { + slf.duration.as_secs_f64() + } + + fn pack2(slf: pyo3::PyRef<'_, Self>) -> (pyo3::PyObject, pyo3::PyObject) { + (slf.key.clone_ref(slf.py()), slf.value.clone_ref(slf.py())) + } + + fn pack3(slf: pyo3::PyRef<'_, Self>) -> (pyo3::PyObject, pyo3::PyObject, f64) { + ( + slf.key.clone_ref(slf.py()), + slf.value.clone_ref(slf.py()), + slf.duration.as_secs_f64(), + ) + } +} + pub mod cache; pub mod fifocache; pub mod lfucache; pub mod lrucache; pub mod rrcache; pub mod ttlcache; +pub mod vttlcache; diff --git a/src/bridge/rrcache.rs b/src/bridge/rrcache.rs index ebc26c6..2c7f6f7 100644 --- a/src/bridge/rrcache.rs +++ b/src/bridge/rrcache.rs @@ -71,7 +71,7 @@ impl RRCache { let mut lock = self.raw.lock(); match lock.entry_with_slot(py, &key)? { - Entry::Occupied(mut entry) => Ok(Some(entry.update(value)?)), + Entry::Occupied(entry) => Ok(Some(entry.update(value)?)), Entry::Absent(entry) => { entry.insert(key, value)?; Ok(None) diff --git a/src/bridge/ttlcache.rs b/src/bridge/ttlcache.rs index ed64a15..e4e6b5c 100644 --- a/src/bridge/ttlcache.rs +++ b/src/bridge/ttlcache.rs @@ -8,13 +8,6 @@ pub struct TTLCache { raw: crate::mutex::Mutex, } -#[pyo3::pyclass(module = "cachebox._core", frozen)] -pub struct TTLPair { - key: pyo3::PyObject, - value: pyo3::PyObject, - duration: std::time::Duration, -} - #[allow(non_camel_case_types)] #[pyo3::pyclass(module = "cachebox._core")] pub struct ttlcache_items { @@ -94,7 +87,7 @@ impl TTLCache { let mut lock = self.raw.lock(); match lock.entry_with_slot(py, &key)? { - Entry::Occupied(mut entry) => Ok(Some(entry.update(value)?)), + Entry::Occupied(entry) => Ok(Some(entry.update(value)?)), Entry::Absent(entry) => { entry.insert(py, key, value)?; Ok(None) @@ -102,12 +95,12 @@ impl TTLCache { } } - fn get(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { + fn get(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { let key = PreHashObject::from_pyobject(py, key)?; let lock = self.raw.lock(); match lock.lookup(py, &key)? { - Some(val) => Ok(TTLPair::clone_from_pair(py, val)), + Some(val) => Ok(super::TTLPair::clone_from_pair(py, val)), None => Err(pyo3::PyErr::new::(key.obj)), } } @@ -157,24 +150,24 @@ impl TTLCache { } } - fn remove(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { + fn remove(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { let key = PreHashObject::from_pyobject(py, key)?; let mut lock = self.raw.lock(); match lock.entry(py, &key)? { Entry::Occupied(entry) => { let val = entry.remove(); - Ok(TTLPair::from(val)) + Ok(super::TTLPair::from(val)) } Entry::Absent(_) => Err(pyo3::PyErr::new::(key.obj)), } } - fn popitem(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { + fn popitem(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { let mut lock = self.raw.lock(); match lock.popitem(py)? { - Some(val) => Ok(TTLPair::from(val)), + Some(val) => Ok(super::TTLPair::from(val)), None => Err(pyo3::PyErr::new::(())), } } @@ -311,55 +304,6 @@ impl TTLCache { } } -impl TTLPair { - fn clone_from_pair(py: pyo3::Python<'_>, pair: &TimeToLivePair) -> Self { - TTLPair { - key: pair.key.obj.clone_ref(py), - value: pair.value.clone_ref(py), - duration: unsafe { pair.duration().unwrap_unchecked() }, - } - } -} - -impl From for TTLPair { - fn from(value: TimeToLivePair) -> Self { - let duration = unsafe { value.duration().unwrap_unchecked() }; - - TTLPair { - key: value.key.obj, - value: value.value, - duration, - } - } -} - -#[pyo3::pymethods] -impl TTLPair { - fn key(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyObject { - slf.key.clone_ref(slf.py()) - } - - fn value(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyObject { - slf.value.clone_ref(slf.py()) - } - - fn duration(slf: pyo3::PyRef<'_, Self>) -> f64 { - slf.duration.as_secs_f64() - } - - fn pack2(slf: pyo3::PyRef<'_, Self>) -> (pyo3::PyObject, pyo3::PyObject) { - (slf.key.clone_ref(slf.py()), slf.value.clone_ref(slf.py())) - } - - fn pack3(slf: pyo3::PyRef<'_, Self>) -> (pyo3::PyObject, pyo3::PyObject, f64) { - ( - slf.key.clone_ref(slf.py()), - slf.value.clone_ref(slf.py()), - slf.duration.as_secs_f64(), - ) - } -} - #[pyo3::pymethods] impl ttlcache_items { fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { @@ -367,7 +311,7 @@ impl ttlcache_items { } #[allow(unused_mut)] - fn __next__(mut slf: pyo3::PyRefMut<'_, Self>) -> pyo3::PyResult { + fn __next__(mut slf: pyo3::PyRefMut<'_, Self>) -> pyo3::PyResult { let mut iter = slf.iter.lock(); slf.ptr.proceed(slf.py())?; @@ -387,7 +331,7 @@ impl ttlcache_items { } } - Ok(TTLPair::clone_from_pair(slf.py(), unsafe { + Ok(super::TTLPair::clone_from_pair(slf.py(), unsafe { element.as_ref() })) } diff --git a/src/bridge/vttlcache.rs b/src/bridge/vttlcache.rs new file mode 100644 index 0000000..00bf778 --- /dev/null +++ b/src/bridge/vttlcache.rs @@ -0,0 +1,343 @@ +use crate::common::Entry; +use crate::common::ObservedIterator; +use crate::common::PreHashObject; +use crate::common::TimeToLivePair; + +#[pyo3::pyclass(module = "cachebox._core", frozen)] +pub struct VTTLCache { + raw: crate::mutex::Mutex, +} + +#[allow(non_camel_case_types)] +#[pyo3::pyclass(module = "cachebox._core")] +pub struct vttlcache_items { + pub ptr: ObservedIterator, + pub iter: crate::mutex::Mutex, + pub now: std::time::SystemTime, +} + +#[pyo3::pymethods] +impl VTTLCache { + #[new] + #[pyo3(signature=(maxsize, *, capacity=0))] + fn __new__(maxsize: usize, capacity: usize) -> pyo3::PyResult { + let raw = crate::policies::vttl::VTTLPolicy::new(maxsize, capacity)?; + + let self_ = Self { + raw: crate::mutex::Mutex::new(raw), + }; + Ok(self_) + } + + fn _state(&self) -> u16 { + self.raw.lock().observed.get() + } + + fn maxsize(&self) -> usize { + self.raw.lock().maxsize() + } + + fn capacity(&self) -> usize { + self.raw.lock().capacity() + } + + fn __len__(&self) -> usize { + self.raw.lock().real_len() + } + + fn __sizeof__(&self) -> usize { + let lock = self.raw.lock(); + + lock.capacity() + * (std::mem::size_of::() + std::mem::size_of::()) + } + + fn __contains__(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let lock = self.raw.lock(); + + match lock.lookup(py, &key)? { + Some(_) => Ok(true), + None => Ok(false), + } + } + + fn is_empty(&self) -> bool { + self.raw.lock().is_empty() + } + + fn is_full(&self) -> bool { + self.raw.lock().is_full() + } + + #[pyo3(signature=(key, value, ttl=None))] + fn insert( + &self, + py: pyo3::Python<'_>, + key: pyo3::PyObject, + value: pyo3::PyObject, + ttl: Option, + ) -> pyo3::PyResult> { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.entry_with_slot(py, &key)? { + Entry::Occupied(entry) => Ok(Some(entry.update(value, ttl)?)), + Entry::Absent(entry) => { + entry.insert(key, value, ttl)?; + Ok(None) + } + } + } + + fn get(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let lock = self.raw.lock(); + + match lock.lookup(py, &key)? { + Some(val) => Ok(super::TTLPair::clone_from_pair(py, val)), + None => Err(pyo3::PyErr::new::(key.obj)), + } + } + + #[pyo3(signature=(iterable, ttl=None))] + fn update( + slf: pyo3::PyRef<'_, Self>, + py: pyo3::Python<'_>, + iterable: pyo3::PyObject, + ttl: Option, + ) -> pyo3::PyResult<()> { + if slf.as_ptr() == iterable.as_ptr() { + return Ok(()); + } + + let mut lock = slf.raw.lock(); + lock.extend(py, iterable, ttl) + } + + fn __richcmp__( + slf: pyo3::PyRef<'_, Self>, + other: pyo3::PyObject, + op: pyo3::class::basic::CompareOp, + ) -> pyo3::PyResult { + let other = other.extract::>(slf.py())?; + + match op { + pyo3::class::basic::CompareOp::Eq => { + if slf.as_ptr() == other.as_ptr() { + return Ok(true); + } + + let mut t1 = slf.raw.lock(); + let mut t2 = other.raw.lock(); + t1.equal(slf.py(), &mut t2) + } + pyo3::class::basic::CompareOp::Ne => { + if slf.as_ptr() == other.as_ptr() { + return Ok(false); + } + + let mut t1 = slf.raw.lock(); + let mut t2 = other.raw.lock(); + t1.equal(slf.py(), &mut t2).map(|r| !r) + } + _ => Err(pyo3::PyErr::new::( + "only '==' or '!=' are supported", + )), + } + } + + fn remove(&self, py: pyo3::Python<'_>, key: pyo3::PyObject) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.entry(py, &key)? { + Entry::Occupied(entry) => { + let val = entry.remove(); + Ok(super::TTLPair::from(val)) + } + Entry::Absent(_) => Err(pyo3::PyErr::new::(key.obj)), + } + } + + fn popitem(&self) -> pyo3::PyResult { + let mut lock = self.raw.lock(); + + match lock.popitem() { + Some(val) => Ok(super::TTLPair::from(val)), + None => Err(pyo3::PyErr::new::(())), + } + } + + fn clear(&self, reuse: bool) { + let mut lock = self.raw.lock(); + lock.clear(); + + if !reuse { + lock.shrink_to_fit(); + } + } + + fn shrink_to_fit(&self) { + let mut lock = self.raw.lock(); + lock.shrink_to_fit(); + } + + #[pyo3(signature=(key, default, ttl=None))] + fn setdefault( + &self, + py: pyo3::Python<'_>, + key: pyo3::PyObject, + default: pyo3::PyObject, + ttl: Option, + ) -> pyo3::PyResult { + let key = PreHashObject::from_pyobject(py, key)?; + let mut lock = self.raw.lock(); + + match lock.entry(py, &key)? { + Entry::Occupied(entry) => unsafe { + let val = entry.into_value(); + Ok(val.as_ref().value.clone_ref(py)) + }, + Entry::Absent(entry) => { + entry.insert(key, default.clone_ref(py), ttl)?; + Ok(default) + } + } + } + + fn items(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyResult> { + let mut lock = slf.raw.lock(); + let state = lock.observed.get(); + let iter = lock.iter(); + + let result = vttlcache_items { + ptr: ObservedIterator::new(slf.as_ptr(), state), + iter: crate::mutex::Mutex::new(iter), + now: std::time::SystemTime::now(), + }; + + pyo3::Py::new(slf.py(), result) + } + + fn expire(&self) { + let mut lock = self.raw.lock(); + lock.expire(); + lock.shrink_to_fit(); + } + + fn __getnewargs__(&self) -> (usize,) { + (0,) + } + + fn __getstate__(&self, py: pyo3::Python<'_>) -> pyo3::PyResult { + let mut lock = self.raw.lock(); + lock.expire(); + + let state = unsafe { + let list = pyo3::ffi::PyList_New(0); + if list.is_null() { + return Err(pyo3::PyErr::fetch(py)); + } + + for ptr in lock.iter() { + let node = ptr.as_ref(); + + let ttlobject = pyo3::ffi::PyLong_FromDouble(node.expire_at.map_or(0.0, |x| { + x.duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs_f64() + })); + + if ttlobject.is_null() { + pyo3::ffi::Py_DECREF(list); + return Err(pyo3::PyErr::fetch(py)); + } + + let tp = tuple!( + py, + 3, + 0 => node.key.obj.clone_ref(py).as_ptr(), + 1 => node.value.clone_ref(py).as_ptr(), + 2 => ttlobject, + ); + + if let Err(x) = tp { + pyo3::ffi::Py_DECREF(list); + return Err(x); + } + + if pyo3::ffi::PyList_Append(list, tp.unwrap_unchecked()) == -1 { + pyo3::ffi::Py_DECREF(list); + return Err(pyo3::PyErr::fetch(py)); + } + } + + let maxsize = pyo3::ffi::PyLong_FromSize_t(lock.maxsize()); + let capacity = pyo3::ffi::PyLong_FromSize_t(lock.capacity()); + + tuple!( + py, + 3, + 0 => maxsize, + 1 => list, + 2 => capacity, + )? + }; + + Ok(unsafe { pyo3::Py::from_owned_ptr(py, state) }) + } + + pub fn __setstate__(&self, py: pyo3::Python<'_>, state: pyo3::PyObject) -> pyo3::PyResult<()> { + let mut lock = self.raw.lock(); + lock.from_pickle(py, state.as_ptr()) + } + + pub fn __traverse__(&self, visit: pyo3::PyVisit<'_>) -> Result<(), pyo3::PyTraverseError> { + for node in self.raw.lock().iter() { + let value = unsafe { node.as_ref() }; + + visit.call(&value.key.obj)?; + visit.call(&value.value)?; + } + Ok(()) + } + + pub fn __clear__(&self) { + let mut lock = self.raw.lock(); + lock.clear() + } +} + +#[pyo3::pymethods] +impl vttlcache_items { + fn __iter__(slf: pyo3::PyRef<'_, Self>) -> pyo3::PyRef<'_, Self> { + slf + } + + #[allow(unused_mut)] + fn __next__(mut slf: pyo3::PyRefMut<'_, Self>) -> pyo3::PyResult { + let mut iter = slf.iter.lock(); + + slf.ptr.proceed(slf.py())?; + + let mut element: std::ptr::NonNull; + loop { + element = { + if let Some(x) = iter.next() { + x + } else { + return Err(pyo3::PyErr::new::(())); + } + }; + + if unsafe { !element.as_ref().is_expired(slf.now) } { + break; + } + } + + Ok(super::TTLPair::clone_from_pair(slf.py(), unsafe { + element.as_ref() + })) + } +} diff --git a/src/common.rs b/src/common.rs index 4852afb..58d7326 100644 --- a/src/common.rs +++ b/src/common.rs @@ -218,11 +218,13 @@ pub enum AbsentSituation { impl PreHashObject { /// Creates a new [`PreHashObject`] + #[inline] pub fn new(obj: pyo3::PyObject, hash: u64) -> Self { Self { obj, hash } } /// Calculates the hash of `object` and creates a new [`PreHashObject`] + #[inline] pub fn from_pyobject(py: pyo3::Python<'_>, object: pyo3::PyObject) -> pyo3::PyResult { unsafe { let py_hash = pyo3::ffi::PyObject_Hash(object.as_ptr()); @@ -238,6 +240,7 @@ impl PreHashObject { } /// Check equality of two objects by using [`pyo3::ffi::PyObject_RichCompareBool`] + #[inline] pub fn equal(&self, py: pyo3::Python<'_>, other: &Self) -> pyo3::PyResult { pyobject_equal(py, self.obj.as_ptr(), other.obj.as_ptr()) } @@ -267,7 +270,7 @@ pub trait TryFindMethods { } impl TryFindMethods for hashbrown::raw::RawTable { - #[inline(always)] + #[inline] fn try_find( &self, hash: u64, @@ -292,7 +295,7 @@ impl TryFindMethods for hashbrown::raw::RawTable { } } - #[inline(always)] + #[inline] fn try_find_or_find_insert_slot( &mut self, hash: u64, @@ -324,10 +327,12 @@ impl TryFindMethods for hashbrown::raw::RawTable { } impl Observed { + #[cold] pub fn new() -> Self { Self(0) } + #[inline(always)] pub fn change(&mut self) { if self.0 == u16::MAX { self.0 = 0; @@ -341,6 +346,7 @@ impl Observed { } } +#[inline] unsafe fn _get_state(py: pyo3::Python<'_>, ptr: *mut pyo3::ffi::PyObject) -> pyo3::PyResult { unsafe fn inner( py: pyo3::Python<'_>, @@ -394,6 +400,7 @@ impl ObservedIterator { } } + #[inline] pub fn proceed(&self, py: pyo3::Python<'_>) -> pyo3::PyResult<()> { let state = unsafe { _get_state(py, self.ptr.as_ptr())? }; @@ -419,7 +426,6 @@ unsafe impl Send for ObservedIterator {} unsafe impl Sync for ObservedIterator {} impl NoLifetimeSliceIter { - #[inline] pub fn new(slice: &[T]) -> Self { let pointer: std::ptr::NonNull = std::ptr::NonNull::from(slice).cast(); @@ -434,6 +440,7 @@ impl NoLifetimeSliceIter { impl Iterator for NoLifetimeSliceIter { type Item = std::ptr::NonNull; + #[inline] fn next(&mut self) -> Option { if self.index >= self.len { None @@ -459,7 +466,6 @@ impl TimeToLivePair { } } - #[inline] pub fn duration(&self) -> Option { self.expire_at.map(|x| { x.duration_since(std::time::SystemTime::now()) @@ -467,7 +473,7 @@ impl TimeToLivePair { }) } - #[inline(always)] + #[inline] pub fn is_expired(&self, now: std::time::SystemTime) -> bool { match self.expire_at { Some(x) => x < now, diff --git a/src/lazyheap.rs b/src/lazyheap.rs index 65e1b4e..f1c5743 100644 --- a/src/lazyheap.rs +++ b/src/lazyheap.rs @@ -31,7 +31,6 @@ pub struct Iter { } impl LazyHeap { - #[inline] pub fn new() -> Self { Self { data: std::collections::VecDeque::new(), @@ -50,6 +49,7 @@ impl LazyHeap { self.data.front() } + #[inline] pub fn push(&mut self, value: T) -> NonNull { unsafe { let node: NonNull = NonNull::new_unchecked(Box::into_raw(Box::new(value))).cast(); @@ -78,6 +78,7 @@ impl LazyHeap { self.is_sorted = true; } + #[inline] fn unlink_front(&mut self) -> Option { let node = self.data.pop_front()?; let node = unsafe { Box::from_raw(node.as_ptr()) }; @@ -90,23 +91,23 @@ impl LazyHeap { self.unlink_front() } + #[inline] fn unlink_back(&mut self) -> Option { let node = self.data.pop_back()?; let node = unsafe { Box::from_raw(node.as_ptr()) }; Some(*node) } - #[inline] pub fn pop_back(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) -> Option { self.sort_by(compare); self.unlink_back() } - #[inline] pub fn get(&self, index: usize) -> Option<&NonNull> { self.data.get(index) } + #[inline] pub fn remove(&mut self, node: NonNull, compare: F) -> T where F: Fn(&T, &T) -> std::cmp::Ordering, @@ -126,18 +127,15 @@ impl LazyHeap { *boxed_node } - #[inline] pub fn clear(&mut self) { while self.unlink_back().is_some() {} self.is_sorted = true; } - #[inline] pub fn shrink_to_fit(&mut self) { self.data.shrink_to_fit(); } - #[inline] pub fn iter(&mut self, compare: impl Fn(&T, &T) -> std::cmp::Ordering) -> Iter { self.sort_by(compare); @@ -172,6 +170,7 @@ impl Drop for LazyHeap { impl Iterator for Iter { type Item = NonNull; + #[inline] fn next(&mut self) -> Option { match self.first.next() { Some(val) => Some(unsafe { *val.as_ptr() }), diff --git a/src/lib.rs b/src/lib.rs index e3057e1..7f6296d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,7 +25,8 @@ fn _core(py: pyo3::Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; - m.add_class::()?; + m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/src/linked_list.rs b/src/linked_list.rs index 58a71cd..5cbd819 100644 --- a/src/linked_list.rs +++ b/src/linked_list.rs @@ -24,7 +24,6 @@ pub struct Node { } impl LinkedList { - #[inline] pub fn new() -> Self { Self { head: None, @@ -33,6 +32,7 @@ impl LinkedList { } } + #[inline] pub fn push_back(&mut self, key: PreHashObject, val: pyo3::PyObject) -> NonNull { unsafe { let node = NonNull::new_unchecked(Box::into_raw(Box::new(Node { @@ -56,6 +56,7 @@ impl LinkedList { } } + #[inline] pub fn pop_front(&mut self) -> Option<(PreHashObject, pyo3::PyObject)> { unsafe { self.head.map(|node| { @@ -77,11 +78,11 @@ impl LinkedList { } } - #[inline] pub fn clear(&mut self) { while self.pop_front().is_some() {} } + #[inline] pub unsafe fn remove(&mut self, node: NonNull) -> (PreHashObject, pyo3::PyObject) { let node = Box::from_raw(node.as_ptr()); let result = node.element; @@ -106,6 +107,7 @@ impl LinkedList { result } + #[inline] pub unsafe fn move_back(&mut self, node: NonNull) { if (*node.as_ptr()).next.is_none() { // Means this node is our self.tail @@ -142,7 +144,6 @@ impl LinkedList { self.tail = Some(node); } - #[inline] pub fn iter(&self) -> Iter { Iter { head: self.head, diff --git a/src/policies/fifo.rs b/src/policies/fifo.rs index c7a54d2..c3ddd34 100644 --- a/src/policies/fifo.rs +++ b/src/policies/fifo.rs @@ -46,7 +46,6 @@ pub struct FIFOIterator { } impl FIFOPolicy { - #[inline] pub fn new(maxsize: usize, mut capacity: usize) -> pyo3::PyResult { let maxsize = non_zero_or!(maxsize, isize::MAX as usize); capacity = capacity.min(maxsize.get()); @@ -60,7 +59,6 @@ impl FIFOPolicy { }) } - #[inline] pub fn maxsize(&self) -> usize { self.maxsize.get() } @@ -75,12 +73,10 @@ impl FIFOPolicy { self.table.is_empty() } - #[inline] pub fn is_full(&self) -> bool { self.table.len() == self.maxsize.get() } - #[inline] pub fn capacity(&self) -> (usize, usize) { (self.table.capacity(), self.entries.capacity()) } @@ -143,6 +139,7 @@ impl FIFOPolicy { Ok(Some(ret)) } + #[inline] #[rustfmt::skip] pub fn entry( &mut self, @@ -166,6 +163,7 @@ impl FIFOPolicy { } } + #[inline] #[rustfmt::skip] pub fn entry_with_slot( &mut self, @@ -186,6 +184,7 @@ impl FIFOPolicy { } } + #[inline] pub fn lookup( &self, py: pyo3::Python<'_>, @@ -203,7 +202,6 @@ impl FIFOPolicy { } } - #[inline] pub fn clear(&mut self) { self.table.clear(); self.entries.clear(); @@ -211,7 +209,6 @@ impl FIFOPolicy { self.observed.change(); } - #[inline] pub fn shrink_to_fit(&mut self) { self.table.shrink_to(self.table.len(), |x| { self.entries[(*x) - self.n_shifts].0.hash @@ -220,7 +217,6 @@ impl FIFOPolicy { self.observed.change(); } - #[inline] pub fn entries_iter( &self, ) -> std::collections::vec_deque::Iter<'_, (PreHashObject, pyo3::PyObject)> { @@ -258,6 +254,7 @@ impl FIFOPolicy { Ok(true) } + #[inline] pub fn extend(&mut self, py: pyo3::Python<'_>, iterable: pyo3::PyObject) -> pyo3::PyResult<()> { use pyo3::types::{PyAnyMethods, PyDictMethods}; @@ -273,7 +270,7 @@ impl FIFOPolicy { unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; match self.entry_with_slot(py, &hk)? { - Entry::Occupied(mut entry) => { + Entry::Occupied(entry) => { entry.update(value.unbind())?; } Entry::Absent(entry) => { @@ -288,7 +285,7 @@ impl FIFOPolicy { let hk = PreHashObject::from_pyobject(py, key)?; match self.entry_with_slot(py, &hk)? { - Entry::Occupied(mut entry) => { + Entry::Occupied(entry) => { entry.update(value)?; } Entry::Absent(entry) => { @@ -301,7 +298,6 @@ impl FIFOPolicy { Ok(()) } - #[inline] pub fn iter(&self) -> FIFOIterator { let (a, b) = self.entries.as_slices(); @@ -351,7 +347,7 @@ impl FIFOPolicy { impl<'a> FIFOPolicyOccupied<'a> { #[inline] - pub fn update(&mut self, value: pyo3::PyObject) -> pyo3::PyResult { + pub fn update(self, value: pyo3::PyObject) -> pyo3::PyResult { let index = unsafe { self.bucket.as_ref() }; let item = &mut self.instance.entries[index - self.instance.n_shifts]; let old_value = std::mem::replace(&mut item.1, value); @@ -376,7 +372,6 @@ impl<'a> FIFOPolicyOccupied<'a> { m } - #[inline] pub fn into_value(self) -> &'a mut (PreHashObject, pyo3::PyObject) { let index = unsafe { self.bucket.as_ref() }; &mut self.instance.entries[index - self.instance.n_shifts] @@ -426,6 +421,7 @@ impl FIFOPolicyAbsent<'_> { impl Iterator for FIFOIterator { type Item = std::ptr::NonNull<(PreHashObject, pyo3::PyObject)>; + #[inline] fn next(&mut self) -> Option { match self.first.next() { Some(val) => Some(val), diff --git a/src/policies/lfu.rs b/src/policies/lfu.rs index bd989d1..a03b2a7 100644 --- a/src/policies/lfu.rs +++ b/src/policies/lfu.rs @@ -27,7 +27,6 @@ pub struct LFUPolicyAbsent<'a> { pub type LFUIterator = lazyheap::Iter<(PreHashObject, pyo3::Py, usize)>; impl LFUPolicy { - #[inline] pub fn new(maxsize: usize, mut capacity: usize) -> pyo3::PyResult { let maxsize = non_zero_or!(maxsize, isize::MAX as usize); capacity = capacity.min(maxsize.get()); @@ -40,7 +39,6 @@ impl LFUPolicy { }) } - #[inline] pub fn maxsize(&self) -> usize { self.maxsize.get() } @@ -55,16 +53,15 @@ impl LFUPolicy { self.table.is_empty() } - #[inline] pub fn is_full(&self) -> bool { self.table.len() == self.maxsize.get() } - #[inline] pub fn capacity(&self) -> usize { self.table.capacity() } + #[inline] pub fn popitem(&mut self) -> Option { self.heap.sort_by(|a, b| a.2.cmp(&b.2)); let front = self.heap.front()?; @@ -81,6 +78,7 @@ impl LFUPolicy { Some(self.heap.pop_front(|a, b| a.2.cmp(&b.2)).unwrap()) } + #[inline] #[rustfmt::skip] pub fn entry( &mut self, @@ -104,6 +102,7 @@ impl LFUPolicy { } } + #[inline] #[rustfmt::skip] pub fn entry_with_slot( &mut self, @@ -128,6 +127,7 @@ impl LFUPolicy { } } + #[inline] pub fn lookup( &mut self, py: pyo3::Python<'_>, @@ -157,14 +157,12 @@ impl LFUPolicy { Ok(result) } - #[inline] pub fn clear(&mut self) { self.table.clear(); self.heap.clear(); self.observed.change(); } - #[inline] pub fn shrink_to_fit(&mut self) { self.table .shrink_to(self.table.len(), |x| unsafe { x.as_ref().0.hash }); @@ -205,6 +203,7 @@ impl LFUPolicy { Ok(true) } + #[inline] pub fn extend(&mut self, py: pyo3::Python<'_>, iterable: pyo3::PyObject) -> pyo3::PyResult<()> { use pyo3::types::{PyAnyMethods, PyDictMethods}; @@ -220,7 +219,7 @@ impl LFUPolicy { unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; match self.entry_with_slot(py, &hk)? { - Entry::Occupied(mut entry) => { + Entry::Occupied(entry) => { entry.update(value.unbind())?; } Entry::Absent(entry) => { @@ -235,7 +234,7 @@ impl LFUPolicy { let hk = PreHashObject::from_pyobject(py, key)?; match self.entry_with_slot(py, &hk)? { - Entry::Occupied(mut entry) => { + Entry::Occupied(entry) => { entry.update(value)?; } Entry::Absent(entry) => { @@ -248,7 +247,6 @@ impl LFUPolicy { Ok(()) } - #[inline] pub fn iter(&mut self) -> LFUIterator { self.heap.iter(|a, b| a.2.cmp(&b.2)) } @@ -261,7 +259,6 @@ impl LFUPolicy { } #[allow(clippy::wrong_self_convention)] - #[inline] pub fn from_pickle( &mut self, py: pyo3::Python<'_>, @@ -306,7 +303,7 @@ impl LFUPolicy { impl LFUPolicyOccupied<'_> { #[inline] - pub fn update(&mut self, value: pyo3::PyObject) -> pyo3::PyResult { + pub fn update(self, value: pyo3::PyObject) -> pyo3::PyResult { let item = unsafe { self.bucket.as_mut() }; unsafe { item.as_mut().2 += 1; @@ -329,7 +326,6 @@ impl LFUPolicyOccupied<'_> { item } - #[inline] pub fn into_value(self) -> NonNull { let item = unsafe { self.bucket.as_mut() }; *item diff --git a/src/policies/lru.rs b/src/policies/lru.rs index 3d9fdd2..fe02a84 100644 --- a/src/policies/lru.rs +++ b/src/policies/lru.rs @@ -24,7 +24,6 @@ pub struct LRUPolicyAbsent<'a> { } impl LRUPolicy { - #[inline] pub fn new(maxsize: usize, mut capacity: usize) -> pyo3::PyResult { let maxsize = non_zero_or!(maxsize, isize::MAX as usize); capacity = capacity.min(maxsize.get()); @@ -37,7 +36,6 @@ impl LRUPolicy { }) } - #[inline] pub fn maxsize(&self) -> usize { self.maxsize.get() } @@ -52,16 +50,15 @@ impl LRUPolicy { self.table.is_empty() } - #[inline] pub fn is_full(&self) -> bool { self.table.len() == self.maxsize.get() } - #[inline] pub fn capacity(&self) -> usize { self.table.capacity() } + #[inline] pub fn popitem(&mut self) -> Option<(PreHashObject, pyo3::PyObject)> { let ret = self.list.head?; @@ -77,6 +74,7 @@ impl LRUPolicy { Some(self.list.pop_front().unwrap()) } + #[inline] #[rustfmt::skip] pub fn entry( &mut self, @@ -100,6 +98,7 @@ impl LRUPolicy { } } + #[inline] #[rustfmt::skip] pub fn entry_with_slot( &mut self, @@ -126,6 +125,7 @@ impl LRUPolicy { } } + #[inline] pub fn lookup( &mut self, py: pyo3::Python<'_>, @@ -154,14 +154,12 @@ impl LRUPolicy { Ok(result) } - #[inline] pub fn clear(&mut self) { self.table.clear(); self.list.clear(); self.observed.change(); } - #[inline] pub fn shrink_to_fit(&mut self) { self.table .shrink_to(self.table.len(), |x| unsafe { x.as_ref().element.0.hash }); @@ -201,6 +199,7 @@ impl LRUPolicy { Ok(true) } + #[inline] pub fn extend(&mut self, py: pyo3::Python<'_>, iterable: pyo3::PyObject) -> pyo3::PyResult<()> { use pyo3::types::{PyAnyMethods, PyDictMethods}; @@ -216,7 +215,7 @@ impl LRUPolicy { unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; match self.entry_with_slot(py, &hk)? { - Entry::Occupied(mut entry) => { + Entry::Occupied(entry) => { entry.update(value.unbind())?; } Entry::Absent(entry) => { @@ -231,7 +230,7 @@ impl LRUPolicy { let hk = PreHashObject::from_pyobject(py, key)?; match self.entry_with_slot(py, &hk)? { - Entry::Occupied(mut entry) => { + Entry::Occupied(entry) => { entry.update(value)?; } Entry::Absent(entry) => { @@ -257,7 +256,6 @@ impl LRUPolicy { } #[allow(clippy::wrong_self_convention)] - #[inline] pub fn from_pickle( &mut self, py: pyo3::Python<'_>, @@ -291,7 +289,7 @@ impl LRUPolicy { impl<'a> LRUPolicyOccupied<'a> { #[inline] - pub fn update(&mut self, value: pyo3::PyObject) -> pyo3::PyResult { + pub fn update(self, value: pyo3::PyObject) -> pyo3::PyResult { let item = unsafe { self.bucket.as_mut() }; unsafe { self.instance.list.move_back(*item); @@ -313,7 +311,6 @@ impl<'a> LRUPolicyOccupied<'a> { item } - #[inline] pub fn into_value(self) -> &'a mut (PreHashObject, pyo3::PyObject) { unsafe { self.instance.list.move_back(*self.bucket.as_ptr()); diff --git a/src/policies/nopolicy.rs b/src/policies/nopolicy.rs index 6b6a149..f6c543c 100644 --- a/src/policies/nopolicy.rs +++ b/src/policies/nopolicy.rs @@ -20,7 +20,6 @@ pub struct NoPolicyAbsent<'a> { } impl NoPolicy { - #[inline] pub fn new(maxsize: usize, mut capacity: usize) -> pyo3::PyResult { let maxsize = non_zero_or!(maxsize, isize::MAX as usize); capacity = capacity.min(maxsize.get()); @@ -32,7 +31,6 @@ impl NoPolicy { }) } - #[inline] pub fn maxsize(&self) -> usize { self.maxsize.get() } @@ -47,21 +45,19 @@ impl NoPolicy { self.table.is_empty() } - #[inline] pub fn is_full(&self) -> bool { self.table.len() == self.maxsize.get() } - #[inline] pub fn capacity(&self) -> usize { self.table.capacity() } - #[inline] pub fn iter(&self) -> hashbrown::raw::RawIter<(PreHashObject, pyo3::PyObject)> { unsafe { self.table.iter() } } + #[inline] #[rustfmt::skip] pub fn entry( &mut self, @@ -82,6 +78,7 @@ impl NoPolicy { } } + #[inline] #[rustfmt::skip] pub fn entry_with_slot( &mut self, @@ -102,6 +99,7 @@ impl NoPolicy { } } + #[inline] pub fn lookup( &self, py: pyo3::Python<'_>, @@ -156,18 +154,17 @@ impl NoPolicy { Ok(result) } - #[inline] pub fn clear(&mut self) { self.table.clear(); self.observed.change(); } - #[inline] pub fn shrink_to_fit(&mut self) { self.table.shrink_to(self.table.len(), |(x, _)| x.hash); self.observed.change(); } + #[inline] pub fn extend(&mut self, py: pyo3::Python<'_>, iterable: pyo3::PyObject) -> pyo3::PyResult<()> { use pyo3::types::{PyAnyMethods, PyDictMethods}; @@ -183,7 +180,7 @@ impl NoPolicy { unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; match self.entry_with_slot(py, &hk)? { - Entry::Occupied(mut entry) => { + Entry::Occupied(entry) => { entry.update(value.unbind())?; } Entry::Absent(entry) => { @@ -198,7 +195,7 @@ impl NoPolicy { let hk = PreHashObject::from_pyobject(py, key)?; match self.entry_with_slot(py, &hk)? { - Entry::Occupied(mut entry) => { + Entry::Occupied(entry) => { entry.update(value)?; } Entry::Absent(entry) => { @@ -251,7 +248,7 @@ impl NoPolicy { impl<'a> NoPolicyOccupied<'a> { #[inline] - pub fn update(&mut self, value: pyo3::PyObject) -> pyo3::PyResult { + pub fn update(self, value: pyo3::PyObject) -> pyo3::PyResult { unsafe { // In update we don't need to change this; because this does not change the memory address ranges // self.instance.observed.change(); @@ -267,7 +264,6 @@ impl<'a> NoPolicyOccupied<'a> { x } - #[inline] pub fn into_value(self) -> &'a mut (PreHashObject, pyo3::PyObject) { unsafe { self.bucket.as_mut() } } diff --git a/src/policies/random.rs b/src/policies/random.rs index c6773fb..efce243 100644 --- a/src/policies/random.rs +++ b/src/policies/random.rs @@ -20,7 +20,6 @@ pub struct RandomPolicyAbsent<'a> { } impl RandomPolicy { - #[inline] pub fn new(maxsize: usize, mut capacity: usize) -> pyo3::PyResult { let maxsize = non_zero_or!(maxsize, isize::MAX as usize); capacity = capacity.min(maxsize.get()); @@ -32,7 +31,6 @@ impl RandomPolicy { }) } - #[inline] pub fn maxsize(&self) -> usize { self.maxsize.get() } @@ -47,17 +45,14 @@ impl RandomPolicy { self.table.is_empty() } - #[inline] pub fn is_full(&self) -> bool { self.table.len() == self.maxsize.get() } - #[inline] pub fn capacity(&self) -> usize { self.table.capacity() } - #[inline] pub fn iter(&self) -> hashbrown::raw::RawIter<(PreHashObject, pyo3::PyObject)> { unsafe { self.table.iter() } } @@ -77,6 +72,7 @@ impl RandomPolicy { } } + #[inline] #[rustfmt::skip] pub fn entry( &mut self, @@ -97,6 +93,7 @@ impl RandomPolicy { } } + #[inline] #[rustfmt::skip] pub fn entry_with_slot( &mut self, @@ -117,6 +114,7 @@ impl RandomPolicy { } } + #[inline] pub fn lookup( &self, py: pyo3::Python<'_>, @@ -171,18 +169,17 @@ impl RandomPolicy { Ok(result) } - #[inline] pub fn clear(&mut self) { self.table.clear(); self.observed.change(); } - #[inline] pub fn shrink_to_fit(&mut self) { self.table.shrink_to(self.table.len(), |(x, _)| x.hash); self.observed.change(); } + #[inline] pub fn extend(&mut self, py: pyo3::Python<'_>, iterable: pyo3::PyObject) -> pyo3::PyResult<()> { use pyo3::types::{PyAnyMethods, PyDictMethods}; @@ -198,7 +195,7 @@ impl RandomPolicy { unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; match self.entry_with_slot(py, &hk)? { - Entry::Occupied(mut entry) => { + Entry::Occupied(entry) => { entry.update(value.unbind())?; } Entry::Absent(entry) => { @@ -213,7 +210,7 @@ impl RandomPolicy { let hk = PreHashObject::from_pyobject(py, key)?; match self.entry_with_slot(py, &hk)? { - Entry::Occupied(mut entry) => { + Entry::Occupied(entry) => { entry.update(value)?; } Entry::Absent(entry) => { @@ -263,7 +260,6 @@ impl RandomPolicy { Ok(()) } - #[inline] pub fn random_key(&self) -> Option<&PreHashObject> { if self.table.is_empty() { None @@ -280,7 +276,7 @@ impl RandomPolicy { impl<'a> RandomPolicyOccupied<'a> { #[inline] - pub fn update(&mut self, value: pyo3::PyObject) -> pyo3::PyResult { + pub fn update(self, value: pyo3::PyObject) -> pyo3::PyResult { unsafe { let old_value = std::mem::replace(&mut self.bucket.as_mut().1, value); @@ -298,7 +294,6 @@ impl<'a> RandomPolicyOccupied<'a> { x } - #[inline] pub fn into_value(self) -> &'a mut (PreHashObject, pyo3::PyObject) { unsafe { self.bucket.as_mut() } } diff --git a/src/policies/ttl.rs b/src/policies/ttl.rs index d2aaae5..9b9d549 100644 --- a/src/policies/ttl.rs +++ b/src/policies/ttl.rs @@ -35,7 +35,6 @@ pub struct TTLIterator { } impl TTLPolicy { - #[inline] pub fn new(maxsize: usize, mut capacity: usize, secs: f64) -> pyo3::PyResult { let maxsize = non_zero_or!(maxsize, isize::MAX as usize); capacity = capacity.min(maxsize.get()); @@ -50,12 +49,10 @@ impl TTLPolicy { }) } - #[inline] pub fn maxsize(&self) -> usize { self.maxsize.get() } - #[inline] pub fn ttl(&self) -> std::time::Duration { self.ttl } @@ -81,12 +78,10 @@ impl TTLPolicy { self.real_len() == 0 } - #[inline] pub fn is_full(&self) -> bool { self.real_len() == self.maxsize.get() } - #[inline] pub fn capacity(&self) -> (usize, usize) { (self.table.capacity(), self.entries.capacity()) } @@ -161,6 +156,7 @@ impl TTLPolicy { Ok(Some(ret)) } + #[inline] #[rustfmt::skip] pub fn entry( &mut self, @@ -188,6 +184,7 @@ impl TTLPolicy { } } + #[inline] #[rustfmt::skip] pub fn entry_with_slot( &mut self, @@ -216,6 +213,7 @@ impl TTLPolicy { } } + #[inline] pub fn lookup( &self, py: pyo3::Python<'_>, @@ -241,7 +239,6 @@ impl TTLPolicy { } } - #[inline] pub fn clear(&mut self) { self.table.clear(); self.entries.clear(); @@ -249,7 +246,6 @@ impl TTLPolicy { self.observed.change(); } - #[inline] pub fn shrink_to_fit(&mut self, py: pyo3::Python<'_>) { self.expire(py); @@ -260,6 +256,7 @@ impl TTLPolicy { self.observed.change(); } + #[inline] pub fn extend(&mut self, py: pyo3::Python<'_>, iterable: pyo3::PyObject) -> pyo3::PyResult<()> { use pyo3::types::{PyAnyMethods, PyDictMethods}; @@ -275,7 +272,7 @@ impl TTLPolicy { unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; match self.entry_with_slot(py, &hk)? { - Entry::Occupied(mut entry) => { + Entry::Occupied(entry) => { entry.update(value.unbind())?; } Entry::Absent(entry) => { @@ -290,7 +287,7 @@ impl TTLPolicy { let hk = PreHashObject::from_pyobject(py, key)?; match self.entry_with_slot(py, &hk)? { - Entry::Occupied(mut entry) => { + Entry::Occupied(entry) => { entry.update(value)?; } Entry::Absent(entry) => { @@ -303,7 +300,6 @@ impl TTLPolicy { Ok(()) } - #[inline] pub fn entries_iter(&self) -> std::collections::vec_deque::Iter<'_, TimeToLivePair> { self.entries.iter() } @@ -355,7 +351,6 @@ impl TTLPolicy { Ok(true) } - #[inline] pub fn iter(&mut self, py: pyo3::Python<'_>) -> TTLIterator { self.expire(py); @@ -367,7 +362,6 @@ impl TTLPolicy { } } - #[inline(always)] pub fn get_index(&self, n: usize) -> Option<&TimeToLivePair> { self.entries.get(n) } @@ -422,37 +416,12 @@ impl TTLPolicy { *self = new; Ok(()) } - - // use pyo3::types::PyAnyMethods; - - // unsafe { - // tuple!(check state, size=3)?; - // let (maxsize, iterable, capacity) = extract_pickle_tuple!(py, state => list); - - // let mut new = Self::new(maxsize, capacity)?; - - // for pair in iterable.bind(py).try_iter()? { - // let (key, value) = pair?.extract::<(pyo3::PyObject, pyo3::PyObject)>()?; - - // let hk = PreHashObject::from_pyobject(py, key)?; - - // match new.entry_with_slot(py, &hk)? { - // Entry::Absent(entry) => { - // entry.insert(py, hk, value)?; - // } - // _ => std::hint::unreachable_unchecked(), - // } - // } - - // *self = new; - // Ok(()) - // } } } impl<'a> TTLPolicyOccupied<'a> { #[inline] - pub fn update(&mut self, value: pyo3::PyObject) -> pyo3::PyResult { + pub fn update(self, value: pyo3::PyObject) -> pyo3::PyResult { // We have to move the value to the end of the vector let (mut index, slot) = unsafe { self.instance.table.remove(self.bucket.clone()) }; index -= self.instance.n_shifts; @@ -494,7 +463,6 @@ impl<'a> TTLPolicyOccupied<'a> { m } - #[inline] pub fn into_value(self) -> &'a mut TimeToLivePair { let index = unsafe { self.bucket.as_ref() }; &mut self.instance.entries[index - self.instance.n_shifts] @@ -502,7 +470,6 @@ impl<'a> TTLPolicyOccupied<'a> { } impl TTLPolicyAbsent<'_> { - #[inline] unsafe fn pickle_insert( self, key: PreHashObject, @@ -531,11 +498,9 @@ impl TTLPolicyAbsent<'_> { .entries .push_back(TimeToLivePair::new(key, value, Some(expire_at))); }, - AbsentSituation::None => unreachable!("this should never happen"), + AbsentSituation::None => unsafe { std::hint::unreachable_unchecked() }, } - // We don't need change observed value here - // self.instance.observed.change(); Ok(()) } diff --git a/src/policies/vttl.rs b/src/policies/vttl.rs index ae12ac3..edb0bbf 100644 --- a/src/policies/vttl.rs +++ b/src/policies/vttl.rs @@ -44,7 +44,6 @@ pub struct VTTLPolicyAbsent<'a> { pub type VTTLIterator = lazyheap::Iter; impl VTTLPolicy { - #[inline] pub fn new(maxsize: usize, mut capacity: usize) -> pyo3::PyResult { let maxsize = non_zero_or!(maxsize, isize::MAX as usize); capacity = capacity.min(maxsize.get()); @@ -57,7 +56,6 @@ impl VTTLPolicy { }) } - #[inline] pub fn maxsize(&self) -> usize { self.maxsize.get() } @@ -73,12 +71,10 @@ impl VTTLPolicy { self.table.is_empty() } - #[inline] pub fn is_full(&self) -> bool { self.table.len() == self.maxsize.get() } - #[inline] pub fn capacity(&self) -> usize { self.table.capacity() } @@ -107,6 +103,7 @@ impl VTTLPolicy { } } + #[inline] pub fn popitem(&mut self) -> Option { self.heap.sort_by(compare_fn!()); @@ -124,6 +121,7 @@ impl VTTLPolicy { Some(self.heap.pop_front(compare_fn!()).unwrap()) } + #[inline] #[rustfmt::skip] pub fn entry( &mut self, @@ -151,6 +149,7 @@ impl VTTLPolicy { } } + #[inline] #[rustfmt::skip] pub fn entry_with_slot( &mut self, @@ -181,6 +180,7 @@ impl VTTLPolicy { } } + #[inline] pub fn lookup( &self, py: pyo3::Python<'_>, @@ -202,14 +202,12 @@ impl VTTLPolicy { } } - #[inline] pub fn clear(&mut self) { self.table.clear(); self.heap.clear(); self.observed.change(); } - #[inline] pub fn shrink_to_fit(&mut self) { self.table .shrink_to(self.table.len(), |x| unsafe { x.as_ref().key.hash }); @@ -218,7 +216,6 @@ impl VTTLPolicy { self.observed.change(); } - #[inline] pub fn iter(&mut self) -> VTTLIterator { self.heap.iter(compare_fn!()) } @@ -261,15 +258,111 @@ impl VTTLPolicy { Ok(true) } -} -impl VTTLPolicyOccupied<'_> { #[inline] - pub fn update( + pub fn extend( &mut self, - value: pyo3::PyObject, + py: pyo3::Python<'_>, + iterable: pyo3::PyObject, ttl: Option, - ) -> pyo3::PyResult { + ) -> pyo3::PyResult<()> { + use pyo3::types::{PyAnyMethods, PyDictMethods}; + + if unsafe { pyo3::ffi::PyDict_CheckExact(iterable.as_ptr()) == 1 } { + let dict = unsafe { + iterable + .downcast_bound::(py) + .unwrap_unchecked() + }; + + for (key, value) in dict.iter() { + let hk = + unsafe { PreHashObject::from_pyobject(py, key.unbind()).unwrap_unchecked() }; + + match self.entry_with_slot(py, &hk)? { + Entry::Occupied(entry) => { + entry.update(value.unbind(), ttl)?; + } + Entry::Absent(entry) => { + entry.insert(hk, value.unbind(), ttl)?; + } + } + } + } else { + for pair in iterable.bind(py).try_iter()? { + let (key, value) = pair?.extract::<(pyo3::PyObject, pyo3::PyObject)>()?; + + let hk = PreHashObject::from_pyobject(py, key)?; + + match self.entry_with_slot(py, &hk)? { + Entry::Occupied(entry) => { + entry.update(value, ttl)?; + } + Entry::Absent(entry) => { + entry.insert(hk, value, ttl)?; + } + } + } + } + + Ok(()) + } + + #[allow(clippy::wrong_self_convention)] + pub fn from_pickle( + &mut self, + py: pyo3::Python<'_>, + state: *mut pyo3::ffi::PyObject, + ) -> pyo3::PyResult<()> { + use pyo3::types::PyAnyMethods; + + unsafe { + tuple!(check state, size=3)?; + let (maxsize, iterable, capacity) = extract_pickle_tuple!(py, state => list); + + // SAFETY: we check `iterable` type in `extract_pickle_tuple` macro + if maxsize < (pyo3::ffi::PyObject_Size(iterable.as_ptr()) as usize) { + return Err(pyo3::PyErr::new::( + "iterable object size is greater than maxsize", + )); + } + + let mut new = Self::new(maxsize, capacity)?; + + for pair in iterable.bind(py).try_iter()? { + let (key, value, timestamp) = + pair?.extract::<(pyo3::PyObject, pyo3::PyObject, f64)>()?; + + let hk = PreHashObject::from_pyobject(py, key)?; + + let ttl = { + if timestamp == 0.0 { + None + } else { + Some(std::time::UNIX_EPOCH + std::time::Duration::from_secs_f64(timestamp)) + } + }; + + match new.entry_with_slot(py, &hk)? { + Entry::Absent(entry) => { + entry.pickle_insert(hk, value, ttl)?; + } + _ => std::hint::unreachable_unchecked(), + } + } + + new.expire(); + new.shrink_to_fit(); + + *self = new; + Ok(()) + } + } +} + +impl VTTLPolicyOccupied<'_> { + #[inline] + pub fn update(self, value: pyo3::PyObject, ttl: Option) -> pyo3::PyResult { let item = unsafe { self.bucket.as_mut() }; unsafe { @@ -293,7 +386,6 @@ impl VTTLPolicyOccupied<'_> { item } - #[inline] pub fn into_value(self) -> NonNull { let item = unsafe { self.bucket.as_mut() }; *item @@ -301,6 +393,40 @@ impl VTTLPolicyOccupied<'_> { } impl VTTLPolicyAbsent<'_> { + unsafe fn pickle_insert( + self, + key: PreHashObject, + value: pyo3::PyObject, + expire_at: Option, + ) -> pyo3::PyResult<()> { + match self.situation { + AbsentSituation::Expired(_) => { + return Err(pyo3::PyErr::new::( + "pikcle object is suspicious!", + )) + } + AbsentSituation::Slot(slot) => { + // This means the key is not available and we have insert_slot + // for inserting it + + // We don't need to check maxsize, we sure `len(iterable) <= maxsize` in loading pickle + + let hash = key.hash; + let node = self + .instance + .heap + .push(TimeToLivePair::new(key, value, expire_at)); + + unsafe { + self.instance.table.insert_in_slot(hash, slot, node); + } + } + AbsentSituation::None => unsafe { std::hint::unreachable_unchecked() }, + } + + Ok(()) + } + #[inline] pub fn insert( self, From faf19ecf5918a2240521e22449ca41d805558acb Mon Sep 17 00:00:00 2001 From: awolverp Date: Thu, 17 Apr 2025 13:50:53 +0330 Subject: [PATCH 28/37] Update tests --- python/cachebox/_cachebox.py | 26 ++++++++++++++------------ python/tests/mixin.py | 2 +- python/tests/test_caches.py | 26 ++++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 13 deletions(-) diff --git a/python/cachebox/_cachebox.py b/python/cachebox/_cachebox.py index e792c46..b57758c 100644 --- a/python/cachebox/_cachebox.py +++ b/python/cachebox/_cachebox.py @@ -905,7 +905,7 @@ def popitem(self) -> typing.Tuple[KT, VT]: """ try: return self._raw.popitem() - except _core.CoreKeyError: + except _core.CoreKeyError: # pragma: no cover raise KeyError() from None def drain(self, n: int) -> int: # pragma: no cover @@ -1105,7 +1105,9 @@ def insert(self, key: KT, value: VT) -> typing.Optional[VT]: """ return self._raw.insert(key, value) - def peek(self, key: KT, default: typing.Optional[DT] = None) -> typing.Union[VT, DT]: + def peek( + self, key: KT, default: typing.Optional[DT] = None + ) -> typing.Union[VT, DT]: # pragma: no cover """ Searches for a key-value in the cache and returns it (without moving the key to recently used). """ @@ -1156,7 +1158,7 @@ def popitem(self) -> typing.Tuple[KT, VT]: """ try: return self._raw.popitem() - except _core.CoreKeyError: + except _core.CoreKeyError: # pragma: no cover raise KeyError() from None def drain(self, n: int) -> int: # pragma: no cover @@ -1585,7 +1587,7 @@ def values(self) -> IteratorView[VT]: """ return IteratorView(self._raw.items(), lambda x: x.value()) - def first(self, n: int = 0) -> typing.Optional[KT]: + def first(self, n: int = 0) -> typing.Optional[KT]: # pragma: no cover """ Returns the first key in cache; this is the one which will be removed by `popitem()` (if n == 0). @@ -1605,7 +1607,7 @@ def last(self) -> typing.Optional[KT]: """ return self._raw.get_index(len(self._raw) - 1) - def expire(self) -> None: + def expire(self) -> None: # pragma: no cover """ Manually removes expired key-value pairs from memory and releases their memory. @@ -1716,11 +1718,11 @@ def insert( Raises: ValueError: If the provided TTL is zero or negative. """ - if ttl is not None: + if ttl is not None: # pragma: no cover if isinstance(ttl, timedelta): ttl = ttl.total_seconds() - if isinstance(ttl, datetime): + elif isinstance(ttl, datetime): ttl = (ttl - datetime.now()).total_seconds() if ttl <= 0: @@ -1827,11 +1829,11 @@ def setdefault( Raises: ValueError: If the provided TTL is not a positive value. """ - if ttl is not None: + if ttl is not None: # pragma: no cover if isinstance(ttl, timedelta): ttl = ttl.total_seconds() - if isinstance(ttl, datetime): + elif isinstance(ttl, datetime): ttl = (ttl - datetime.now()).total_seconds() if ttl <= 0: @@ -1895,11 +1897,11 @@ def update( if hasattr(iterable, "items"): iterable = iterable.items() - if ttl is not None: + if ttl is not None: # pragma: no cover if isinstance(ttl, timedelta): ttl = ttl.total_seconds() - if isinstance(ttl, datetime): + elif isinstance(ttl, datetime): ttl = (ttl - datetime.now()).total_seconds() if ttl <= 0: @@ -1982,7 +1984,7 @@ def values(self) -> IteratorView[VT]: """ return IteratorView(self._raw.items(), lambda x: x.value()) - def expire(self) -> None: + def expire(self) -> None: # pragma: no cover """ Manually removes expired key-value pairs from memory and releases their memory. diff --git a/python/tests/mixin.py b/python/tests/mixin.py index 4c6fafb..b9191e4 100644 --- a/python/tests/mixin.py +++ b/python/tests/mixin.py @@ -75,7 +75,7 @@ def test___len__(self): cache = self.CACHE(10, **self.KWARGS, capacity=10) assert len(cache) == 0 - assert cache.is_empty() + assert cache.is_empty() ^ bool(cache) cache[0] = 0 assert len(cache) == 1 diff --git a/python/tests/test_caches.py b/python/tests/test_caches.py index 135712e..d585d9e 100644 --- a/python/tests/test_caches.py +++ b/python/tests/test_caches.py @@ -7,6 +7,7 @@ TTLCache, VTTLCache, ) +from datetime import timedelta import pytest from .mixin import _TestMixin import time @@ -101,6 +102,17 @@ def test_first_last(self): class TestRRCache(_TestMixin): CACHE = RRCache + def test_popitem(self): + obj = RRCache(3) + with pytest.raises(KeyError): + obj.popitem() + with pytest.raises(KeyError): + obj.random_key() + + obj[1] = 1 + assert obj.random_key() == 1 + assert obj.popitem() == (1, 1) + def test_pickle(self): self._test_pickle(lambda c1, c2: None) @@ -155,6 +167,8 @@ def test_recently_used_funcs(self): obj[3] = 7 obj.peek(4) + assert obj.peek(6) is None + assert obj.most_recently_used() == 3 assert obj.least_recently_used() == 0 @@ -245,6 +259,9 @@ def test_least_frequently_used(self): assert obj.least_frequently_used(1) == 3 assert obj.least_frequently_used(4) == 0 assert obj.least_frequently_used(5) is None + assert obj.least_frequently_used(5) is None + assert obj.least_frequently_used(-len(obj)) == obj.least_frequently_used() + assert obj.least_frequently_used(-1000) is None def test_pickle(self): def inner(c1, c2): @@ -257,6 +274,15 @@ class TestTTLCache(_TestMixin): CACHE = TTLCache KWARGS = {"ttl": 10} + def test__new__(self): + super().test__new__() + + cache = TTLCache(0, timedelta(minutes=2, seconds=20)) + assert cache.ttl == (2 * 60) + 20 + + with pytest.raises(ValueError): + TTLCache(0, -10) + def test_policy(self): obj = self.CACHE(2, 0.5) assert obj.ttl == 0.5 From aa2e00879788d08f7e5f518f0df5cb5b2c21b2b7 Mon Sep 17 00:00:00 2001 From: awolverp Date: Thu, 17 Apr 2025 14:14:52 +0330 Subject: [PATCH 29/37] Add utils and py.typed --- python/cachebox/__init__.py | 12 +- python/cachebox/_cachebox.py | 2 +- python/cachebox/py.typed | 0 python/cachebox/utils.py | 523 +++++++++++++++++++++++++++++++++++ 4 files changed, 535 insertions(+), 2 deletions(-) create mode 100644 python/cachebox/py.typed create mode 100644 python/cachebox/utils.py diff --git a/python/cachebox/__init__.py b/python/cachebox/__init__.py index cc980f9..d2ab225 100644 --- a/python/cachebox/__init__.py +++ b/python/cachebox/__init__.py @@ -2,7 +2,6 @@ __author__ as __author__, __version__ as __version__, ) - from ._cachebox import ( Cache as Cache, FIFOCache as FIFOCache, @@ -14,3 +13,14 @@ BaseCacheImpl as BaseCacheImpl, IteratorView as IteratorView, ) +from .utils import ( + Frozen as Frozen, + cached as cached, + cachedmethod as cachedmethod, + make_key as make_key, + make_hash_key as make_hash_key, + make_typed_key as make_typed_key, + EVENT_HIT as EVENT_HIT, + EVENT_MISS as EVENT_MISS, + is_cached as is_cached, +) diff --git a/python/cachebox/_cachebox.py b/python/cachebox/_cachebox.py index b57758c..0c5d976 100644 --- a/python/cachebox/_cachebox.py +++ b/python/cachebox/_cachebox.py @@ -1718,7 +1718,7 @@ def insert( Raises: ValueError: If the provided TTL is zero or negative. """ - if ttl is not None: # pragma: no cover + if ttl is not None: # pragma: no cover if isinstance(ttl, timedelta): ttl = ttl.total_seconds() diff --git a/python/cachebox/py.typed b/python/cachebox/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/python/cachebox/utils.py b/python/cachebox/utils.py new file mode 100644 index 0000000..199eb90 --- /dev/null +++ b/python/cachebox/utils.py @@ -0,0 +1,523 @@ +from ._cachebox import BaseCacheImpl, FIFOCache +from collections import namedtuple, defaultdict +import functools +import warnings +import asyncio +import _thread +import inspect +import typing + + +KT = typing.TypeVar("KT") +VT = typing.TypeVar("VT") +DT = typing.TypeVar("DT") + + +class Frozen(BaseCacheImpl, typing.Generic[KT, VT]): + __slots__ = ("__cache", "ignore") + + def __init__(self, cls: BaseCacheImpl[KT, VT], ignore: bool = False) -> None: + """ + **This is not a cache.** this class can freeze your caches and prevents changes. + + :param cls: your cache + + :param ignore: If False, will raise TypeError if anyone try to change cache. will do nothing otherwise. + """ + assert isinstance(cls, BaseCacheImpl) + assert type(cls) is not Frozen + + self.__cache = cls + self.ignore = ignore + + @property + def cache(self) -> BaseCacheImpl[KT, VT]: + return self.__cache + + @property + def maxsize(self) -> int: + return self.__cache.maxsize + + def __len__(self) -> int: + return len(self.__cache) + + def __sizeof__(self) -> int: + return self.__cache.__sizeof__() + + def __bool__(self) -> bool: + return bool(self.__cache) + + def __contains__(self, key: KT) -> bool: + return key in self.__cache + + def __setitem__(self, key: KT, value: VT) -> None: + if self.ignore: + return + + raise TypeError("This cache is frozen.") + + def __getitem__(self, key: KT) -> VT: + return self.__cache[key] + + def __delitem__(self, key: KT) -> VT: + if self.ignore: + return # type: ignore + + raise TypeError("This cache is frozen.") + + def __repr__(self) -> str: + return f"" + + def __iter__(self) -> typing.Iterator[KT]: + return iter(self.__cache) + + def __richcmp__(self, other, op: int) -> bool: + return self.__cache.__richcmp__(other, op) + + def capacity(self) -> int: + return self.__cache.capacity() + + def is_full(self) -> bool: + return self.__cache.is_full() + + def is_empty(self) -> bool: + return self.__cache.is_empty() + + def insert(self, key: KT, value: VT, *args, **kwargs) -> typing.Optional[VT]: + if self.ignore: + return + + raise TypeError("This cache is frozen.") + + def get(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: + return self.__cache.get(key, default) + + def pop(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: + if self.ignore: + return # type: ignore + + raise TypeError("This cache is frozen.") + + def setdefault( + self, key: KT, default: typing.Optional[DT] = None, *args, **kwargs + ) -> typing.Optional[typing.Union[VT, DT]]: + if self.ignore: + return + + raise TypeError("This cache is frozen.") + + def popitem(self) -> typing.Tuple[KT, VT]: + if self.ignore: + return # type: ignore + + raise TypeError("This cache is frozen.") + + def drain(self, n: int) -> int: + if self.ignore: + return # type: ignore + + raise TypeError("This cache is frozen.") + + def clear(self, *, reuse: bool = False) -> None: + if self.ignore: + return + + raise TypeError("This cache is frozen.") + + def shrink_to_fit(self) -> None: + if self.ignore: + return + + raise TypeError("This cache is frozen.") + + def update( + self, + iterable: typing.Union[typing.Iterable[typing.Tuple[KT, VT]], typing.Dict[KT, VT]], + *args, + **kwargs, + ) -> None: + if self.ignore: + return + + raise TypeError("This cache is frozen.") + + def keys(self) -> typing.Iterable[KT]: + return self.__cache.keys() + + def values(self) -> typing.Iterable[VT]: + return self.__cache.values() + + def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: + return self.__cache.items() + + +class _LockWithCounter: + """ + A threading/asyncio lock which count the waiters + """ + + __slots__ = ("lock", "waiters") + + def __init__(self, is_async: bool = False): + self.lock = _thread.allocate_lock() if not is_async else asyncio.Lock() + self.waiters = 0 + + async def __aenter__(self) -> None: + self.waiters += 1 + await self.lock.acquire() + + async def __aexit__(self, *args, **kwds) -> None: + self.waiters -= 1 + self.lock.release() + + def __enter__(self) -> None: + self.waiters += 1 + self.lock.acquire() + + def __exit__(self, *args, **kwds) -> None: + self.waiters -= 1 + self.lock.release() + + +def _copy_if_need(obj, tocopy=(dict, list, set), level: int = 1): + from copy import copy + + if level == 0: + return obj + + if level == 2: + return copy(obj) + + return copy(obj) if (type(obj) in tocopy) else obj + + +def make_key(args: tuple, kwds: dict, fasttype=(int, str)): + key = args + if kwds: + key += (object,) + for item in kwds.items(): + key += item + + if fasttype and len(key) == 1 and type(key[0]) in fasttype: + return key[0] + + return key + + +def make_hash_key(args: tuple, kwds: dict): + return hash(make_key(args, kwds)) + + +def make_typed_key(args: tuple, kwds: dict): + key = make_key(args, kwds, fasttype=()) + + key += tuple(type(v) for v in args) # type: ignore + if kwds: + key += tuple(type(v) for v in kwds.values()) + + return key + + +CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "length", "cachememory"]) +EVENT_MISS = 1 +EVENT_HIT = 2 + + +def _cached_wrapper( + func, + cache: BaseCacheImpl, + key_maker: typing.Callable[[tuple, dict], typing.Hashable], + clear_reuse: bool, + callback: typing.Optional[typing.Callable[[int, typing.Any, typing.Any], typing.Any]], + copy_level: int, + is_method: bool, +) -> None: + _key_maker = (lambda args, kwds: key_maker(args[1:], kwds)) if is_method else key_maker + + hits = 0 + misses = 0 + locks = defaultdict(_LockWithCounter) + exceptions = {} + + def _wrapped(*args, **kwds): + nonlocal hits, misses, locks, exceptions + + if kwds.pop("cachebox__ignore", False): + return func(*args, **kwds) + + key = _key_maker(args, kwds) + + # try to get result from cache + try: + result = cache[key] + except KeyError: + pass + else: + # A NOTE FOR ME: we don't want to catch KeyError exceptions from `callback` + # so don't wrap it with try except + hits += 1 + + if callback is not None: + callback(EVENT_HIT, key, result) + + return _copy_if_need(result, level=copy_level) + + with locks[key]: + if exceptions.get(key, None) is not None: + cached_error = exceptions[key] if locks[key].waiters > 1 else exceptions.pop(key) + raise cached_error + + try: + result = cache[key] + hits += 1 + event = EVENT_HIT + except KeyError: + try: + result = func(*args, **kwds) + except Exception as e: + if locks[key].waiters > 1: + exceptions[key] = e + + raise e + + else: + cache[key] = result + misses += 1 + event = EVENT_MISS + + if callback is not None: + callback(event, key, result) + + return _copy_if_need(result, level=copy_level) + + _wrapped.cache = cache + _wrapped.callback = callback + _wrapped.cache_info = lambda: CacheInfo( + hits, misses, cache.maxsize, len(cache), cache.capacity() + ) + + def cache_clear(): + nonlocal misses, hits, locks, exceptions + cache.clear(reuse=clear_reuse) + misses = 0 + hits = 0 + locks.clear() + exceptions.clear() + + _wrapped.cache_clear = cache_clear + + return _wrapped + + +def _async_cached_wrapper( + func, + cache: BaseCacheImpl, + key_maker: typing.Callable[[tuple, dict], typing.Hashable], + clear_reuse: bool, + callback: typing.Optional[typing.Callable[[int, typing.Any, typing.Any], typing.Any]], + copy_level: int, + is_method: bool, +) -> None: + _key_maker = (lambda args, kwds: key_maker(args[1:], kwds)) if is_method else key_maker + + hits = 0 + misses = 0 + locks = defaultdict(lambda: _LockWithCounter(True)) + exceptions = {} + + async def _wrapped(*args, **kwds): + nonlocal hits, misses, locks, exceptions + + if kwds.pop("cachebox__ignore", False): + return await func(*args, **kwds) + + key = _key_maker(args, kwds) + + # try to get result from cache + try: + result = cache[key] + except KeyError: + pass + else: + # A NOTE FOR ME: we don't want to catch KeyError exceptions from `callback` + # so don't wrap it with try except + hits += 1 + + if callback is not None: + awaitable = callback(EVENT_HIT, key, result) + if inspect.isawaitable(awaitable): + await awaitable + + return _copy_if_need(result, level=copy_level) + + async with locks[key]: + if exceptions.get(key, None) is not None: + cached_error = exceptions[key] if locks[key].waiters > 1 else exceptions.pop(key) + raise cached_error + + try: + result = cache[key] + hits += 1 + event = EVENT_HIT + except KeyError: + try: + result = await func(*args, **kwds) + except Exception as e: + if locks[key].waiters > 1: + exceptions[key] = e + + raise e + + else: + cache[key] = result + misses += 1 + event = EVENT_MISS + + if callback is not None: + awaitable = callback(event, key, result) + if inspect.isawaitable(awaitable): + await awaitable + + return _copy_if_need(result, level=copy_level) + + _wrapped.cache = cache + _wrapped.callback = callback + _wrapped.cache_info = lambda: CacheInfo( + hits, misses, cache.maxsize, len(cache), cache.capacity() + ) + + def cache_clear(): + nonlocal misses, hits, locks, exceptions + cache.clear(reuse=clear_reuse) + misses = 0 + hits = 0 + locks.clear() + exceptions.clear() + + _wrapped.cache_clear = cache_clear + + return _wrapped + + +def cached( + cache: typing.Union[BaseCacheImpl, dict, None], + key_maker: typing.Callable[[tuple, dict], typing.Hashable] = make_key, + clear_reuse: bool = False, + callback: typing.Optional[typing.Callable[[int, typing.Any, typing.Any], typing.Any]] = None, + copy_level: int = 1, + always_copy: typing.Optional[bool] = None, +): + """ + Decorator to wrap a function with a memoizing callable that saves results in a cache. + + :param cache: Specifies a cache that handles and stores the results. if `None` or `dict`, `FIFOCache` will be used. + + :param key_maker: Specifies a function that will be called with the same positional and keyword + arguments as the wrapped function itself, and which has to return a suitable + cache key (must be hashable). + + :param clear_reuse: The wrapped function has a function named `clear_cache` that uses `cache.clear` + method to clear the cache. This parameter will be passed to cache's `clear` method. + + :param callback: Every time the `cache` is used, callback is also called. + The callback arguments are: event number (see `EVENT_MISS` or `EVENT_HIT` variables), key, and then result. + + :param copy_level: The wrapped function always copies the result of your function and then returns it. + This parameter specifies that the wrapped function has to copy which type of results. + `0` means "never copy", `1` means "only copy `dict`, `list`, and `set` results" and + `2` means "always copy the results". + + Example:: + + @cachebox.cached(cachebox.LRUCache(128)) + def sum_as_string(a, b): + return str(a+b) + + assert sum_as_string(1, 2) == "3" + + assert len(sum_as_string.cache) == 1 + sum_as_string.cache_clear() + assert len(sum_as_string.cache) == 0 + + See more: [documentation](https://github.com/awolverp/cachebox#function-cached) + """ + if cache is None: + cache = FIFOCache(0) + + if type(cache) is dict: + cache = FIFOCache(0, cache) + + if not isinstance(cache, BaseCacheImpl): + raise TypeError("we expected cachebox caches, got %r" % (cache,)) + + if always_copy is not None: + warnings.warn( + "'always_copy' parameter is deprecated and will be removed in future; use 'copy_level' instead", + category=DeprecationWarning, + ) + if always_copy is True: + copy_level = 2 + + def decorator(func): + if inspect.iscoroutinefunction(func): + wrapper = _async_cached_wrapper( + func, cache, key_maker, clear_reuse, callback, copy_level, False + ) + else: + wrapper = _cached_wrapper( + func, cache, key_maker, clear_reuse, callback, copy_level, False + ) + + return functools.update_wrapper(wrapper, func) + + return decorator + + +def cachedmethod( + cache: typing.Union[BaseCacheImpl, dict, None], + key_maker: typing.Callable[[tuple, dict], typing.Hashable] = make_key, + clear_reuse: bool = False, + callback: typing.Optional[typing.Callable[[int, typing.Any, typing.Any], typing.Any]] = None, + copy_level: int = 1, + always_copy: typing.Optional[bool] = None, +): + """ + this is excatly works like `cached()`, but ignores `self` parameters in hashing and key making. + """ + if cache is None: + cache = FIFOCache(0) + + if type(cache) is dict: + cache = FIFOCache(0, cache) + + if not isinstance(cache, BaseCacheImpl): + raise TypeError("we expected cachebox caches, got %r" % (cache,)) + + if always_copy is not None: + warnings.warn( + "'always_copy' parameter is deprecated and will be removed in future; use 'copy_level' instead", + category=DeprecationWarning, + ) + if always_copy is True: + copy_level = 2 + + def decorator(func): + if inspect.iscoroutinefunction(func): + wrapper = _async_cached_wrapper( + func, cache, key_maker, clear_reuse, callback, copy_level, True + ) + else: + wrapper = _cached_wrapper( + func, cache, key_maker, clear_reuse, callback, copy_level, True + ) + + return functools.update_wrapper(wrapper, func) + + return decorator + + +def is_cached(func: object) -> bool: + """ + Check if a function/method cached by cachebox or not + """ + return hasattr(func, "cache") and isinstance(func.cache, BaseCacheImpl) From 6125bbd75ff842a2c7bbe55a7419628b70121dd1 Mon Sep 17 00:00:00 2001 From: awolverp Date: Thu, 17 Apr 2025 14:20:48 +0330 Subject: [PATCH 30/37] Remove the deprecated parameter: `always_copy` --- python/cachebox/utils.py | 21 +-- python/tests/test_concurrency.py | 108 +++++++++++ python/tests/test_utils.py | 304 +++++++++++++++++++++++++++++++ 3 files changed, 413 insertions(+), 20 deletions(-) create mode 100644 python/tests/test_concurrency.py create mode 100644 python/tests/test_utils.py diff --git a/python/cachebox/utils.py b/python/cachebox/utils.py index 199eb90..c6b5591 100644 --- a/python/cachebox/utils.py +++ b/python/cachebox/utils.py @@ -1,7 +1,6 @@ from ._cachebox import BaseCacheImpl, FIFOCache from collections import namedtuple, defaultdict import functools -import warnings import asyncio import _thread import inspect @@ -13,7 +12,7 @@ DT = typing.TypeVar("DT") -class Frozen(BaseCacheImpl, typing.Generic[KT, VT]): +class Frozen(BaseCacheImpl, typing.Generic[KT, VT]): # pragma: no cover __slots__ = ("__cache", "ignore") def __init__(self, cls: BaseCacheImpl[KT, VT], ignore: bool = False) -> None: @@ -405,7 +404,6 @@ def cached( clear_reuse: bool = False, callback: typing.Optional[typing.Callable[[int, typing.Any, typing.Any], typing.Any]] = None, copy_level: int = 1, - always_copy: typing.Optional[bool] = None, ): """ Decorator to wrap a function with a memoizing callable that saves results in a cache. @@ -450,14 +448,6 @@ def sum_as_string(a, b): if not isinstance(cache, BaseCacheImpl): raise TypeError("we expected cachebox caches, got %r" % (cache,)) - if always_copy is not None: - warnings.warn( - "'always_copy' parameter is deprecated and will be removed in future; use 'copy_level' instead", - category=DeprecationWarning, - ) - if always_copy is True: - copy_level = 2 - def decorator(func): if inspect.iscoroutinefunction(func): wrapper = _async_cached_wrapper( @@ -479,7 +469,6 @@ def cachedmethod( clear_reuse: bool = False, callback: typing.Optional[typing.Callable[[int, typing.Any, typing.Any], typing.Any]] = None, copy_level: int = 1, - always_copy: typing.Optional[bool] = None, ): """ this is excatly works like `cached()`, but ignores `self` parameters in hashing and key making. @@ -493,14 +482,6 @@ def cachedmethod( if not isinstance(cache, BaseCacheImpl): raise TypeError("we expected cachebox caches, got %r" % (cache,)) - if always_copy is not None: - warnings.warn( - "'always_copy' parameter is deprecated and will be removed in future; use 'copy_level' instead", - category=DeprecationWarning, - ) - if always_copy is True: - copy_level = 2 - def decorator(func): if inspect.iscoroutinefunction(func): wrapper = _async_cached_wrapper( diff --git a/python/tests/test_concurrency.py b/python/tests/test_concurrency.py new file mode 100644 index 0000000..2935ee1 --- /dev/null +++ b/python/tests/test_concurrency.py @@ -0,0 +1,108 @@ +from cachebox import cached, LRUCache +from concurrent import futures +import asyncio +import pytest +import time + + +def test_threading_return(): + calls = 0 + + @cached(LRUCache(0)) + def func(): + nonlocal calls + time.sleep(1) + calls += 1 + return "Hello" + + with futures.ThreadPoolExecutor(max_workers=10) as executor: + future_list = [executor.submit(func) for _ in range(10)] + for future in futures.as_completed(future_list): + assert future.result() == "Hello" + + assert calls == 1 + + +def test_threading_exc(): + calls = 0 + + @cached(LRUCache(0)) + def func(): + nonlocal calls + time.sleep(1) + calls += 1 + raise RuntimeError + + with futures.ThreadPoolExecutor(max_workers=5) as executor: + future_list = [executor.submit(func) for _ in range(5)] + for future in futures.as_completed(future_list): + assert isinstance(future.exception(), RuntimeError) + + assert calls == 1 + + with futures.ThreadPoolExecutor(max_workers=5) as executor: + future_list = [executor.submit(func) for _ in range(5)] + for future in futures.as_completed(future_list): + assert isinstance(future.exception(), RuntimeError) + + assert calls == 2 + + +@pytest.mark.asyncio +async def test_asyncio_return(): + calls = 0 + + @cached(LRUCache(0)) + async def func(): + nonlocal calls + await asyncio.sleep(1) + calls += 1 + return "Hello" + + await asyncio.gather( + func(), + func(), + func(), + func(), + func(), + ) + + assert calls == 1 + + +@pytest.mark.asyncio +async def test_asyncio_exc(): + calls = 0 + + @cached(LRUCache(0)) + async def func(): + nonlocal calls + await asyncio.sleep(1) + calls += 1 + raise RuntimeError + + tasks = await asyncio.gather( + func(), + func(), + func(), + func(), + func(), + return_exceptions=True, + ) + for future in tasks: + assert isinstance(future, RuntimeError) + + assert calls == 1 + + tasks = await asyncio.gather( + func(), + func(), + func(), + func(), + func(), + return_exceptions=True, + ) + for future in tasks: + assert isinstance(future, RuntimeError) + + assert calls == 2 diff --git a/python/tests/test_utils.py b/python/tests/test_utils.py new file mode 100644 index 0000000..ffe2d0f --- /dev/null +++ b/python/tests/test_utils.py @@ -0,0 +1,304 @@ +from cachebox import ( + Frozen, + LRUCache, + cached, + make_typed_key, + make_key, + cachedmethod, + EVENT_HIT, + EVENT_MISS, + is_cached, +) +import asyncio +import pytest +import time + + +def test_frozen(): + cache = LRUCache(10, {i: i for i in range(8)}) + f = Frozen(cache) + + assert f.maxsize == cache.maxsize + + with pytest.raises(TypeError): + f[0] = 0 + + with pytest.raises(TypeError): + f.pop(0) + + with pytest.raises(TypeError): + f.popitem() + + assert len(f) == 8 + assert len(f) == len(cache) + cache.insert(9, 9) + assert len(f) == 9 + assert len(f) == len(cache) + + f = Frozen(cache, ignore=True) + f.popitem() + + +def test_cached(): + obj = LRUCache(3) # type: LRUCache[int, int] + + @cached(obj) + def factorial(n): + fact = 1 + for num in range(2, n + 1): + fact *= num + + time.sleep(0.1) # need for testing + return fact + + perf_1 = time.perf_counter() + factorial(15) + perf_1 = time.perf_counter() - perf_1 + + assert factorial.cache_info().length == 1 + assert factorial.cache_info().misses == 1 + + perf_2 = time.perf_counter() + factorial(15) + perf_2 = time.perf_counter() - perf_2 + + assert perf_1 > perf_2 + assert factorial.cache_info().hits == 1 + + factorial.cache_clear() + assert factorial.cache_info().hits == 0 + assert factorial.cache_info().misses == 0 + + perf_3 = time.perf_counter() + factorial(15) + perf_3 = time.perf_counter() - perf_3 + assert perf_3 > perf_2 + + # test cachebox__ignore + factorial.cache_clear() + assert len(factorial.cache) == 0 + factorial(15, cachebox__ignore=True) + assert len(factorial.cache) == 0 + + +def test_key_makers(): + @cached(LRUCache(125), key_maker=make_key) + def func(a, b, c): + return a, b, c + + func(1, 2, 3) + func(1.0, 2, 3.0) + func(3, 2, 1) + + assert len(func.cache) == 2 + + @cached(LRUCache(125), key_maker=make_typed_key) + def func(a, b, c): + return a, b, c + + func(1, 2, 3) + func(1.0, 2, 3.0) + func(3, 2, 1) + + assert len(func.cache) == 3 + + +@pytest.mark.asyncio +async def test_async_cached(): + obj = LRUCache(3) # type: LRUCache[int, int] + + @cached(obj) + async def factorial(n: int, _: str): + fact = 1 + for num in range(2, n + 1): + fact *= num + + await asyncio.sleep(0.1) # need for testing + return fact + + perf_1 = time.perf_counter() + await factorial(15, "cachebox") + perf_1 = time.perf_counter() - perf_1 + + assert factorial.cache_info().length == 1 + assert factorial.cache_info().misses == 1 + + perf_2 = time.perf_counter() + await factorial(15, "cachebox") + perf_2 = time.perf_counter() - perf_2 + + assert perf_1 > perf_2 + assert factorial.cache_info().hits == 1 + + factorial.cache_clear() + assert factorial.cache_info().hits == 0 + assert factorial.cache_info().misses == 0 + + perf_3 = time.perf_counter() + await factorial(15, "cachebox") + perf_3 = time.perf_counter() - perf_3 + assert perf_3 > perf_2 + + # test cachebox__ignore + factorial.cache_clear() + assert len(factorial.cache) == 0 + await factorial(15, "me", cachebox__ignore=True) + assert len(factorial.cache) == 0 + + +def test_cachedmethod(): + class TestCachedMethod: + def __init__(self, num) -> None: + self.num = num + + @cachedmethod(None) + def method(self, char: str): + assert type(self) is TestCachedMethod + return char * self.num + + cls = TestCachedMethod(10) + assert cls.method("a") == ("a" * 10) + + +@pytest.mark.asyncio +async def test_async_cachedmethod(): + class TestCachedMethod: + def __init__(self, num) -> None: + self.num = num + + @cachedmethod(LRUCache(0)) + async def method(self, char: str): + assert type(self) is TestCachedMethod + return char * self.num + + cls = TestCachedMethod(10) + assert (await cls.method("a")) == ("a" * 10) + + +def test_callback(): + obj = LRUCache(3) + + called = list() + + @cached( + obj, + key_maker=lambda args, _: args[0], + callback=lambda event, key, value: called.append((event, key, value)), + ) + def factorial(n: int, /): + fact = 1 + for num in range(2, n + 1): + fact *= num + + return fact + + assert factorial(5) == 120 + assert len(called) == 1 + assert called[0] == (EVENT_MISS, 5, 120) + + assert factorial(5) == 120 + assert len(called) == 2 + assert called[1] == (EVENT_HIT, 5, 120) + + assert factorial(3) == 6 + assert len(called) == 3 + assert called[2] == (EVENT_MISS, 3, 6) + + assert is_cached(factorial) + + +async def _test_async_callback(): + obj = LRUCache(3) + + called = list() + + async def _callback(event, key, value): + called.append((event, key, value)) + + @cached(obj, key_maker=lambda args, _: args[0], callback=_callback) + async def factorial(n: int, /): + fact = 1 + for num in range(2, n + 1): + fact *= num + + return fact + + assert await factorial(5) == 120 + assert len(called) == 1 + assert called[0] == (EVENT_MISS, 5, 120) + + assert await factorial(5) == 120 + assert len(called) == 2 + assert called[1] == (EVENT_HIT, 5, 120) + + assert await factorial(3) == 6 + assert len(called) == 3 + assert called[2] == (EVENT_MISS, 3, 6) + + assert is_cached(factorial) + assert not is_cached(_callback) + + +def test_async_callback(): + try: + loop = asyncio.get_running_loop() + except RuntimeError: + loop = asyncio.new_event_loop() + + loop.run_until_complete(_test_async_callback()) + + +def test_copy_level(): + class A: + def __init__(self, c: int) -> None: + self.c = c + + @cached(LRUCache(0)) + def func(c: int) -> A: + return A(c) + + result = func(1) + assert result.c == 1 + result.c = 2 + + result = func(1) + assert result.c == 2 # !!! + + @cached(LRUCache(0), copy_level=2) + def func(c: int) -> A: + return A(c) + + result = func(1) + assert result.c == 1 + result.c = 2 + + result = func(1) + assert result.c == 1 # :) + + +def test_classmethod(): + class MyClass: + def __init__(self, num: int) -> None: + self.num = num + + @classmethod + @cached(None, copy_level=2) + def new(cls, num: int): + return cls(num) + + a = MyClass.new(1) + assert isinstance(a, MyClass) and a.num == 1 + + +def test_staticmethod(): + class MyClass: + def __init__(self, num: int) -> None: + self.num = num + + @staticmethod + @cached(None, copy_level=2) + def new(num: int): + return num + + a = MyClass.new(1) + assert isinstance(a, int) and a == 1 From b8fbac2b481ab0a97d7507c84da3c06a08d289fc Mon Sep 17 00:00:00 2001 From: awolverp Date: Thu, 17 Apr 2025 15:14:25 +0330 Subject: [PATCH 31/37] * Rewrite a part of README.md --- README.md | 621 +++++++++++------------------------------------------- 1 file changed, 127 insertions(+), 494 deletions(-) diff --git a/README.md b/README.md index 921bb5b..69eebb1 100644 --- a/README.md +++ b/README.md @@ -1,79 +1,95 @@ -# cachebox -![image](https://img.shields.io/pypi/v/cachebox.svg) -![image](https://img.shields.io/pypi/l/cachebox.svg) -![image](https://img.shields.io/pypi/pyversions/cachebox.svg) -![image](https://static.pepy.tech/badge/cachebox) -![python-test](https://github.com/awolverp/cachebox/actions/workflows/python-test.yml/badge.svg) - -[**Releases**](https://github.com/awolverp/cachebox/releases) | [**Benchmarks**](https://github.com/awolverp/cachebox-benchmark) | [**Issues**](https://github.com/awolverp/cachebox/issues/new) - -**The fastest caching Python library written in Rust** +

+ Cachebox +

+

+ The fastest caching Python library written in Rust +

+

+ Releases | Benchmarks | Issues +

+

+ + License + + + Release + + + Python Versions + + + Downloads + +

+ +------- ### What does it do? You can easily and powerfully perform caching operations in Python as fast as possible. This can make your application very faster and it's a good choice in big applications. +**Ideal for optimizing large-scale applications** with efficient, low-overhead caching. -- ๐Ÿš€ 10-50x faster than other caching libraries. -- ๐Ÿ“Š Very low memory usage (1/2 of dictionary). -- ๐Ÿ”ฅ Full-feature and easy-to-use +**Key Features:** +- ๐Ÿš€ Extremely fast (10-50x faster than other caching libraries - [benchmarks](https://github.com/awolverp/cachebox-benchmark)) +- ๐Ÿ“Š Minimal memory footprint (50% of standard dictionary memory usage) +- ๐Ÿ”ฅ Full-featured and user-friendly - ๐Ÿงถ Completely thread-safe - ๐Ÿ”ง Tested and correct -- **\[R\]** written in Rust that has high-performance -- ๐Ÿค Support Python 3.8+ (PyPy & CPython) -- ๐Ÿ“ฆ Over 7 cache algorithms are supported +- **\[R\]** written in Rust for maximum performance +- ๐Ÿค Compatible with Python 3.8+ (PyPy and CPython) +- ๐Ÿ“ฆ Supports 7 advanced caching algorithms -## Page Content -- [**When i need caching and cachebox?**](#when-i-need-caching-and-cachebox) -- [**Why `cachebox`?**](#why-cachebox) -- [**Installation**](#installation) -- [**Example**](#example) -- [**Learn**](#learn) -- [**Incompatible changes**](#incompatible-changes) -- [**Tips & Notes**](#tips-and-notes) +### Page Contents +- [โ“ **When i need caching and cachebox**](#when-i-need-caching-and-cachebox) +- [๐ŸŒŸ **Why `cachebox`**](#why-cachebox) +- [๐Ÿ”ง **Installation**](#installation) +- [๐Ÿ’ก **Preview**](#example) +- [๐ŸŽ“ **Learn**](#learn) +- [โœ๏ธ **Incompatible changes**](#incompatible-changes) +- [๐Ÿ“Œ **Tips & Notes**](#tips-and-notes) -## When i need caching and cachebox? -**๐Ÿ“ˆ Frequent Data Access** \ -If your application frequently accesses the same data, caching can helps you. +### When i need caching and cachebox +- ๐Ÿ“ˆ **Frequently Data Access** \ + If you need to access the same data multiple times, caching can help reduce the number of database queries or API calls, improving performance. -**๐Ÿ’Ž Expensive Operations** \ -When data retrieval involves costly operations such as database queries or API calls, caching can save time and resources. +- ๐Ÿ’Ž **Expensive Operations** \ + If you have operations that are computationally expensive, caching can help reduce the number of times these operations need to be performed. -**๐Ÿš— High Traffic Scenarios** \ -In big applications with high user traffic caching can help by reducing the number of operations. +- ๐Ÿš— **High Traffic Scenarios** \ + If your application has high user traffic, caching can help reduce the load on your server by reducing the number of requests that need to be processed. -**#๏ธโƒฃ Web Page Rendering** \ -Caching HTML pages can speed up the delivery of static content. +- #๏ธโƒฃ **Web Page Rendring** \ + If you are rendering web pages, caching can help reduce the time it takes to generate the page by caching the results of expensive operations. Caching HTML pages can speed up the delivery of static content. -**๐Ÿšง Rate Limiting** \ -Caching can help you to manage rate limits imposed by third-party APIs by reducing the number of requests sent. +- ๐Ÿšง **Rate Limiting** \ + If you have a rate limiting system in place, caching can help reduce the number of requests that need to be processed by the rate limiter. Also, caching can help you to manage rate limits imposed by third-party APIs by reducing the number of requests sent. -**๐Ÿค– Machine Learning Models** \ -If your application frequently makes predictions using the same input data, caching the results can save computation time. +- ๐Ÿค– **Machine Learning Models** \ + If your application frequently makes predictions using the same input data, caching the results can save computation time. -**And a lot of other situations ...** - -## Why cachebox? -**โšก Rust** \ +### Why cachebox? +- **โšก Rust** \ It uses *Rust* language to has high-performance. -**๐Ÿงฎ SwissTable** \ +- **๐Ÿงฎ SwissTable** \ It uses Google's high-performance SwissTable hash map. thanks to [hashbrown](https://github.com/rust-lang/hashbrown). -**โœจ Low memory usage** \ +- **โœจ Low memory usage** \ It has very low memory usage. -**โญ Zero Dependency** \ +- **โญ Zero Dependency** \ As we said, `cachebox` written in Rust so you don't have to install any other dependecies. -**๐Ÿงถ Thread safe** \ +- **๐Ÿงถ Thread safe** \ It's completely thread-safe and uses locks to prevent problems. -**๐Ÿ‘Œ Easy To Use** \ +- **๐Ÿ‘Œ Easy To Use** \ You only need to import it and choice your implementation to use and behave with it like a dictionary. -**๐Ÿšซ Avoids Cache Stampede** \ -It avoids [cache stampede](https://en.wikipedia.org/wiki/Cache_stampede) to have better performance. +- **๐Ÿšซ Avoids Cache Stampede** \ +It avoids [cache stampede](https://en.wikipedia.org/wiki/Cache_stampede) by using a distributed lock system. + ## Installation cachebox is installable by `pip`: @@ -82,7 +98,7 @@ pip3 install -U cachebox ``` > [!WARNING]\ -> The new version v4 has some incompatible with v3, for more info please see [Incompatible changes](#incompatible-changes) +> The new version v5 has some incompatible with v4, for more info please see [Incompatible changes](#incompatible-changes) ## Example The simplest example of **cachebox** could look like this: @@ -93,7 +109,7 @@ import cachebox @cachebox.cached(cachebox.FIFOCache(maxsize=128)) def factorial(number: int) -> int: fact = 1 - for num in range(2, n + 1): + for num in range(2, number + 1): fact *= num return fact @@ -107,51 +123,57 @@ async def make_request(method: str, url: str) -> dict: return response.json() ``` -> [!NOTE]\ -> Unlike functools.lru_cache and other caching libraries, cachebox will copy `dict`, `list`, and `set`. -> ```python -> @cachebox.cached(cachebox.LRUCache(maxsize=128)) -> def make_dict(name: str, age: int) -> dict: -> return {"name": name, "age": age} +Also, unlike functools.lru_cache and other caching libraries, cachebox can copy `dict`, `list`, and `set` objects. +```python +@cachebox.cached(cachebox.LRUCache(maxsize=128)) +def make_dict(name: str, age: int) -> dict: + return {"name": name, "age": age} > -> d = make_dict("cachebox", 10) -> assert d == {"name": "cachebox", "age": 10} -> d["new-key"] = "new-value" -> -> d2 = make_dict("cachebox", 10) -> # `d2` will be `{"name": "cachebox", "age": 10, "new-key": "new-value"}` if you use other libraries -> assert d2 == {"name": "cachebox", "age": 10} -> ``` +d = make_dict("cachebox", 10) +assert d == {"name": "cachebox", "age": 10} +d["new-key"] = "new-value" + +d2 = make_dict("cachebox", 10) +# `d2` will be `{"name": "cachebox", "age": 10, "new-key": "new-value"}` if you use other libraries +assert d2 == {"name": "cachebox", "age": 10} +``` + +You can use cache alghoritms without `cached` decorator -- just import what cache alghoritms you want and use it like a dictionary. +```python +from cachebox import FIFOCache + +cache = FIFOCache(maxsize=128) +cache["key"] = "value" +assert cache["key"] == "value" + +# You can also use `cache.get(key, default)` +assert cache.get("key") == "value" +``` ## Learn -There are 2 decorators: -- [**cached**](#function-cached): a decorator that helps you to cache your functions and calculations with a lot of options. -- [**cachedmethod**](#function-cachedmethod): this is excatly works like `cached()`, but ignores `self` parameters in hashing and key making. +There are 3 useful functions: +- [**cached**](#decorator-cached): a decorator that helps you to cache your functions and calculations with a lot of options. +- [**cachedmethod**](#decorator-cachedmethod): this is excatly works like `cached()`, but ignores `self` parameters in hashing and key making. - [**is_cached**](#function-is_cached): check if a function/method cached by cachebox or not -There are 9 classes: +And 9 classes: - [**BaseCacheImpl**](#class-basecacheimpl): base-class for all classes. - [**Cache**](#class-cache): A simple cache that has no algorithm; this is only a hashmap. - [**FIFOCache**](#class-fifocache): the FIFO cache will remove the element that has been in the cache the longest. - [**RRCache**](#class-rrcache): the RR cache will choice randomly element to remove it to make space when necessary. -- [**TTLCache**](#class-ttlcache): the TTL cache will automatically remove the element in the cache that has expired. - [**LRUCache**](#class-lrucache): the LRU cache will remove the element in the cache that has not been accessed in the longest time. - [**LFUCache**](#class-lfucache): the LFU cache will remove the element in the cache that has been accessed the least, regardless of time. +- [**TTLCache**](#class-ttlcache): the TTL cache will automatically remove the element in the cache that has expired. - [**VTTLCache**](#class-vttlcache): the TTL cache will automatically remove the element in the cache that has expired when need. - [**Frozen**](#class-frozen): you can use this class for freezing your caches. -Using this library is very easy and you only need to import cachebox and then use these classes like a dictionary (or use its decorator such as `cached` and `cachedmethod`). - -There are some examples for you with different methods for introducing those. \ -**All the methods you will see in the examples are common across all classes (except for a few of them).** - -* * * - -### *function* cached +### Decorator `cached` Decorator to wrap a function with a memoizing callable that saves results in a cache. -**Parameters:** +
+Parameters + - `cache`: Specifies a cache that handles and stores the results. if `None` or `dict`, `FIFOCache` will be used. - `key_maker`: Specifies a function that will be called with the same positional and keyword @@ -169,6 +191,12 @@ Decorator to wrap a function with a memoizing callable that saves results in a c `0` means "never copy", `1` means "only copy `dict`, `list`, and `set` results" and `2` means "always copy the results". +
+ +
+Examples + + **A simple example:** ```python import cachebox @@ -223,7 +251,7 @@ print(sum_as_string.cache) # LRUCache(0 / 9223372036854775807, capacity=0) print(sum_as_string.cache_info()) -# CacheInfo(hits=0, misses=0, maxsize=9223372036854775807, length=0, cachememory=8) +# CacheInfo(hits=0, misses=0, maxsize=9223372036854775807, length=0, memory=8) # `.cache_clear()` clears the cache sum_as_string.cache_clear() @@ -259,6 +287,9 @@ assert func(5, 4) == 9 # callback_func: miss event (5, 4) 9 ``` +
+ + > [!NOTE]\ > Recommended use `cached` method for **@staticmethod**s and use [`cachedmethod`](#function-cachedmethod) for **@classmethod**s; > And set `copy_level` parameter to `2` on **@classmethod**s. @@ -285,15 +316,14 @@ assert func(5, 4) == 9 > sum_as_string(10, 20, cachebox__ignore=True) > ``` -> [!NOTE]\ -> You can see [LRUCache here](#class-lrucache). - * * * -### *function* cachedmethod - +### Decorator `cachedmethod` this is excatly works like `cached()`, but ignores `self` parameters in hashing and key making. +
+Example + ```python import cachebox @@ -306,14 +336,21 @@ c = MyClass() c.my_method() ``` -> [!NOTE]\ -> You can see [TTLCache here](#class-ttlcache). +
-* * * -### *function* is_cached +### Function `is_cached` +Checks that a function/method is cached by cachebox or not. + +
+Parameters + +- `func`: The function/method to check. + +
-Check if a function/method cached by cachebox or not +
+Example ```python import cachebox @@ -325,408 +362,4 @@ def func(): assert cachebox.is_cached(func) ``` -> [!NOTE]\ -> You can see [TTLCache here](#class-ttlcache). - -* * * - -### *class* BaseCacheImpl -This is the base class of all cache classes such as Cache, FIFOCache, ... \ -Do not try to call its constructor, this is only for type-hint. - -```python -import cachebox - -class ClassName(cachebox.BaseCacheImpl): - # ... - -def func(cache: BaseCacheImpl): - # ... - -cache = cachebox.LFUCache(0) -assert isinstance(cache, cachebox.BaseCacheImpl) -``` - -* * * - -### *class* Cache -A simple cache that has no algorithm; this is only a hashmap. - -> [!TIP]\ -> **`Cache` vs `dict`**: -> - it is thread-safe and unordered, while `dict` isn't thread-safe and ordered (Python 3.6+). -> - it uses very lower memory than `dict`. -> - it supports useful and new methods for managing memory, while `dict` does not. -> - it does not support `popitem`, while `dict` does. -> - You can limit the size of `Cache`, but you cannot for `dict`. - -| | get | insert | delete | popitem | -| ------------ | ----- | ------- | ------ | ------- | -| Worse-case | O(1) | O(1) | O(1) | N/A | - -```python -from cachebox import Cache - -# These parameters are common in classes: -# By `maxsize` param, you can specify the limit size of the cache ( zero means infinity ); this is unchangable. -# By `iterable` param, you can create cache from a dict or an iterable. -# If `capacity` param is given, cache attempts to allocate a new hash table with at -# least enough capacity for inserting the given number of elements without reallocating. -cache = Cache(maxsize=100, iterable=None, capacity=100) - -# you can behave with it like a dictionary -cache["key"] = "value" -# or you can use `.insert(key, value)` instead of that (recommended) -cache.insert("key", "value") - -print(cache["key"]) # value - -del cache["key"] -cache["key"] # KeyError: key - -# cachebox.Cache does not have any policy, so will raise OverflowError if reached the bound. -cache.update({i:i for i in range(200)}) -# OverflowError: The cache has reached the bound. -``` - -* * * - -### *class* FIFOCache -FIFO Cache implementation - First-In First-Out Policy (thread-safe). - -In simple terms, the FIFO cache will remove the element that has been in the cache the longest. - -| | get | insert | delete(i) | popitem | -| ------------ | ----- | ------- | --------- | ------- | -| Worse-case | O(1) | O(1) | O(min(i, n-i)) | O(1) | - -```python -from cachebox import FIFOCache - -cache = FIFOCache(5, {i:i*2 for i in range(5)}) - -print(len(cache)) # 5 -cache["new-key"] = "new-value" -print(len(cache)) # 5 - -print(cache.get(3, "default-val")) # 6 -print(cache.get(6, "default-val")) # default-val - -print(cache.popitem()) # (1, 2) - -# insert method returns a value: -# - If the cache did not have this key present, None is returned. -# - If the cache did have this key present, the value is updated, and the old value is returned. -print(cache.insert(3, "val")) # 6 -print(cache.insert("new-key", "val")) # None - -# Returns the first key in cache; this is the one which will be removed by `popitem()`. -print(cache.first()) -``` - -* * * - -### *class* RRCache -RRCache implementation - Random Replacement policy (thread-safe). - -In simple terms, the RR cache will choice randomly element to remove it to make space when necessary. - -| | get | insert | delete | popitem | -| ------------ | ----- | ------- | ------ | ------- | -| Worse-case | O(1) | O(1) | O(1) | O(1)~ | - -```python -from cachebox import RRCache - -cache = RRCache(10, {i:i for i in range(10)}) -print(cache.is_full()) # True -print(cache.is_empty()) # False - -# Returns the number of elements the map can hold without reallocating. -print(cache.capacity()) # 28 - -# Shrinks the cache to fit len(self) elements. -cache.shrink_to_fit() -print(cache.capacity()) # 10 - -print(len(cache)) # 10 -cache.clear() -print(len(cache)) # 0 -``` - -* * * - -### *class* TTLCache -TTL Cache implementation - Time-To-Live Policy (thread-safe). - -In simple terms, the TTL cache will automatically remove the element in the cache that has expired. - -| | get | insert | delete(i) | popitem | -| ------------ | ----- | ------- | --------- | ------- | -| Worse-case | O(1)~ | O(1)~ | O(min(i, n-i)) | O(n) | - -```python -from cachebox import TTLCache -import time - -# The `ttl` param specifies the time-to-live value for each element in cache (in seconds); cannot be zero or negative. -cache = TTLCache(0, ttl=2) -cache.update({i:str(i) for i in range(10)}) - -print(cache.get_with_expire(2)) # ('2', 1.99) - -# Returns the oldest key in cache; this is the one which will be removed by `popitem()` -print(cache.first()) # 0 - -cache["mykey"] = "value" -time.sleep(2) -cache["mykey"] # KeyError -``` - -* * * - -### *class* LRUCache -LRU Cache implementation - Least recently used policy (thread-safe). - -In simple terms, the LRU cache will remove the element in the cache that has not been accessed in the longest time. - -| | get | insert | delete(i) | popitem | -| ------------ | ----- | ------- | --------- | ------- | -| Worse-case | O(1)~ | O(1)~ | O(1)~ | O(1)~ | - -```python -from cachebox import LRUCache - -cache = LRUCache(0, {i:i*2 for i in range(10)}) - -# access `1` -print(cache[0]) # 0 -print(cache.popitem()) # (1, 2) - -# .peek() searches for a key-value in the cache and returns it without moving the key to recently used. -print(cache.peek(2)) # 4 -print(cache.popitem()) # (3, 6) - -# Does the `popitem()` `n` times and returns count of removed items. -print(cache.drain(5)) # 5 -``` - -* * * - -### *class* LFUCache -LFU Cache implementation - Least frequantly used policy (thread-safe). - -In simple terms, the LFU cache will remove the element in the cache that has been accessed the least, regardless of time. - -| | get | insert | delete(i) | popitem | -| ------------ | ----- | ------- | --------- | ------- | -| Worse-case | O(1)~ | O(1)~ | O(n) | O(n) | - -```python -from cachebox import LFUCache - -cache = cachebox.LFUCache(5) -cache.insert(1, 1) -cache.insert(2, 2) - -# access 1 twice -cache[1] -cache[1] - -# access 2 once -cache[2] - -assert cache.least_frequently_used() == 2 -assert cache.least_frequently_used(2) is None # 2 is out of range - -for item in cache.items(): - print(item) -# (2, '2') -# (1, '1') -``` - -> [!TIP]\ -> `.items()`, `.keys()`, and `.values()` are ordered (v4.0+) - -* * * - -### *class* VTTLCache -VTTL Cache implementation - Time-To-Live Per-Key Policy (thread-safe). - -In simple terms, the TTL cache will automatically remove the element in the cache that has expired when need. - -| | get | insert | delete(i) | popitem | -| ------------ | ----- | ------- | --------- | ------- | -| Worse-case | O(1)~ | O(1)~ | O(n) | O(n) | - -```python -from cachebox import VTTLCache -import time - -# The `ttl` param specifies the time-to-live value for `iterable` (in seconds); cannot be zero or negative. -cache = VTTLCache(100, iterable={i:i for i in range(4)}, ttl=3) -print(len(cache)) # 4 -time.sleep(3) -print(len(cache)) # 0 - -# The "key1" is exists for 5 seconds -cache.insert("key1", "value", ttl=5) -# The "key2" is exists for 2 seconds -cache.insert("key2", "value", ttl=2) - -time.sleep(2) -# "key1" is exists for 3 seconds -print(cache.get("key1")) # value - -# "key2" has expired -print(cache.get("key2")) # None -``` - -> [!TIP] -> **`VTTLCache` vs `TTLCache`:** -> - In `VTTLCache` each item has its own unique time-to-live, unlike `TTLCache`. -> - `VTTLCache` is generally slower than `TTLCache`. - -* * * - -### *class* Frozen -**This is not a cache.** this class can freeze your caches and prevents changes โ„๏ธ. - -```python -from cachebox import Frozen, FIFOCache - -cache = FIFOCache(10, {1:1, 2:2, 3:3}) - -# parameters: -# cls: your cache -# ignore: If False, will raise TypeError if anyone try to change cache. will do nothing otherwise. -frozen = Frozen(cache, ignore=True) -print(frozen[1]) # 1 -print(len(frozen)) # 3 - -# Frozen ignores this action and do nothing -frozen.insert("key", "value") -print(len(frozen)) # 3 - -# Let's try with ignore=False -frozen = Frozen(cache, ignore=False) - -frozen.insert("key", "value") -# TypeError: This cache is frozen. -``` - -> [!NOTE]\ -> The **Frozen** class can't prevent expiring in [TTLCache](#ttlcache) or [VTTLCache](#vttlcache). -> -> For example: -> ```python -> cache = TTLCache(0, ttl=3, iterable={i:i for i in range(10)}) -> frozen = Frozen(cache) -> -> time.sleep(3) -> print(len(frozen)) # 0 -> ``` - -## Incompatible changes -These are changes that are not compatible with the previous version: - -**You can see more info about changes in [Changelog](CHANGELOG.md).** - -* * * - -#### Pickle serializing changed! -If you try to load bytes that has dumped by pickle in previous version, you will get `TypeError` exception. -There's no way to fix that ๐Ÿ’”, but it's worth it. - -```python -import pickle - -with open("old-version.pickle", "rb") as fd: - pickle.load(fd) # TypeError: ... -``` - -* * * - -#### Iterators changed! -In previous versions, the iterators are not ordered; but now all of iterators are ordered. -this means all of `.keys()`, `.values()`, `.items()`, and `iter(cache)` methods are ordered now. - -For example: -```python -from cachebox import FIFOCache - -cache = FIFOCache(maxsize=4) -for i in range(4): - cache[i] = str(i) - -for key in cache: - print(key) -# 0 -# 1 -# 2 -# 3 -``` - -* * * - -#### `.insert()` method changed! -In new version, the `.insert()` method has a small change that can help you in coding. - -`.insert()` equals to `self[key] = value`, but: -- If the cache did not have this key present, **None is returned**. -- If the cache did have this key present, the value is updated, -and **the old value is returned**. The key is not updated, though; - -For example: -```python -from cachebox import LRUCache - -lru = LRUCache(10, {"a": "b", "c": "d"}) - -print(lru.insert("a", "new-key")) # "b" -print(lru.insert("no-exists", "val")) # None -``` - -## Tips and Notes -#### How to save caches in files? -there's no built-in file-based implementation, but you can use `pickle` for saving caches in files. For example: -```python -import cachebox -import pickle -c = cachebox.LRUCache(100, {i:i for i in range(78)}) - -with open("file", "wb") as fd: - pickle.dump(c, fd) - -with open("file", "rb") as fd: - loaded = pickle.load(fd) - -assert c == loaded -assert c.capacity() == loaded.capacity() -``` - -> [!TIP]\ -> For more, see this [issue](https://github.com/awolverp/cachebox/issues/8). - -> [!NOTE]\ -> Supported since version 3.1.0 - -* * * - -#### How to copy the caches? -Use `copy.deepcopy` or `copy.copy` for copying caches. For example: -```python -import cachebox, copy -c = cachebox.LRUCache(100, {i:i for i in range(78)}) - -copied = copy.copy(c) - -assert c == copied -assert c.capacity() == copied.capacity() -``` - -> [!NOTE]\ -> Supported since version 3.1.0 - -## License -This repository is licensed under the [MIT License](LICENSE) +
From 3770b26d556fc66d163c1ee377e45b926bd5efa1 Mon Sep 17 00:00:00 2001 From: awolverp Date: Thu, 17 Apr 2025 15:15:45 +0330 Subject: [PATCH 32/37] * Rename the CacheInfo.cachememory to CacheInfo.memory * Update docstrings --- python/cachebox/utils.py | 106 ++++++++++++++++++++++++++++----------- 1 file changed, 76 insertions(+), 30 deletions(-) diff --git a/python/cachebox/utils.py b/python/cachebox/utils.py index c6b5591..f2af432 100644 --- a/python/cachebox/utils.py +++ b/python/cachebox/utils.py @@ -13,15 +13,22 @@ class Frozen(BaseCacheImpl, typing.Generic[KT, VT]): # pragma: no cover + """ + A wrapper class that prevents modifications to an underlying cache implementation. + + This class provides a read-only view of a cache, optionally allowing silent + suppression of modification attempts instead of raising exceptions. + """ __slots__ = ("__cache", "ignore") def __init__(self, cls: BaseCacheImpl[KT, VT], ignore: bool = False) -> None: """ - **This is not a cache.** this class can freeze your caches and prevents changes. - - :param cls: your cache - - :param ignore: If False, will raise TypeError if anyone try to change cache. will do nothing otherwise. + Initialize a frozen cache wrapper. + + :param cls: The underlying cache implementation to be frozen + :type cls: BaseCacheImpl[KT, VT] + :param ignore: If True, silently ignores modification attempts; if False, raises TypeError when modification is attempted + :type ignore: bool, optional """ assert isinstance(cls, BaseCacheImpl) assert type(cls) is not Frozen @@ -152,7 +159,10 @@ def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: class _LockWithCounter: """ - A threading/asyncio lock which count the waiters + A lock with a counter to track the number of waiters. + + This class provides a lock mechanism that supports both synchronous and asynchronous contexts, + with the ability to track the number of threads or coroutines waiting to acquire the lock. """ __slots__ = ("lock", "waiters") @@ -191,6 +201,17 @@ def _copy_if_need(obj, tocopy=(dict, list, set), level: int = 1): def make_key(args: tuple, kwds: dict, fasttype=(int, str)): + """ + Create a hashable key from function arguments for caching purposes. + + Args: + args (tuple): Positional arguments to be used in key generation. + kwds (dict): Keyword arguments to be used in key generation. + fasttype (tuple, optional): Types that can be directly used as keys. Defaults to (int, str). + + Returns: + A hashable key representing the function arguments, optimized for simple single-argument cases. + """ key = args if kwds: key += (object,) @@ -204,10 +225,30 @@ def make_key(args: tuple, kwds: dict, fasttype=(int, str)): def make_hash_key(args: tuple, kwds: dict): + """ + Create a hashable hash key from function arguments for caching purposes. + + Args: + args (tuple): Positional arguments to be used in key generation. + kwds (dict): Keyword arguments to be used in key generation. + + Returns: + int: A hash value representing the function arguments. + """ return hash(make_key(args, kwds)) def make_typed_key(args: tuple, kwds: dict): + """ + Create a hashable key from function arguments that includes type information. + + Args: + args (tuple): Positional arguments to be used in key generation. + kwds (dict): Keyword arguments to be used in key generation. + + Returns: + A hashable key representing the function arguments, including the types of the arguments. + """ key = make_key(args, kwds, fasttype=()) key += tuple(type(v) for v in args) # type: ignore @@ -217,7 +258,7 @@ def make_typed_key(args: tuple, kwds: dict): return key -CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "length", "cachememory"]) +CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "length", "memory"]) EVENT_MISS = 1 EVENT_HIT = 2 @@ -406,27 +447,22 @@ def cached( copy_level: int = 1, ): """ - Decorator to wrap a function with a memoizing callable that saves results in a cache. - - :param cache: Specifies a cache that handles and stores the results. if `None` or `dict`, `FIFOCache` will be used. - - :param key_maker: Specifies a function that will be called with the same positional and keyword - arguments as the wrapped function itself, and which has to return a suitable - cache key (must be hashable). - - :param clear_reuse: The wrapped function has a function named `clear_cache` that uses `cache.clear` - method to clear the cache. This parameter will be passed to cache's `clear` method. - - :param callback: Every time the `cache` is used, callback is also called. - The callback arguments are: event number (see `EVENT_MISS` or `EVENT_HIT` variables), key, and then result. - - :param copy_level: The wrapped function always copies the result of your function and then returns it. - This parameter specifies that the wrapped function has to copy which type of results. - `0` means "never copy", `1` means "only copy `dict`, `list`, and `set` results" and - `2` means "always copy the results". - + Decorator to create a memoized cache for function results. + + Wraps a function to automatically cache and retrieve its results based on input parameters. + + Args: + cache (BaseCacheImpl, dict, optional): Cache implementation to store results. Defaults to FIFOCache. + key_maker (Callable, optional): Function to generate cache keys from function arguments. Defaults to make_key. + clear_reuse (bool, optional): Whether to reuse cache during clearing. Defaults to False. + callback (Callable, optional): Function called on cache hit/miss events. Defaults to None. + copy_level (int, optional): Level of result copying. Defaults to 1. + + Returns: + Callable: Decorated function with caching capabilities. + Example:: - + @cachebox.cached(cachebox.LRUCache(128)) def sum_as_string(a, b): return str(a+b) @@ -436,8 +472,6 @@ def sum_as_string(a, b): assert len(sum_as_string.cache) == 1 sum_as_string.cache_clear() assert len(sum_as_string.cache) == 0 - - See more: [documentation](https://github.com/awolverp/cachebox#function-cached) """ if cache is None: cache = FIFOCache(0) @@ -471,7 +505,19 @@ def cachedmethod( copy_level: int = 1, ): """ - this is excatly works like `cached()`, but ignores `self` parameters in hashing and key making. + Decorator to create a method-specific memoized cache for function results. + + Similar to `cached()`, but ignores `self` parameter when generating cache keys. + + Args: + cache (BaseCacheImpl, dict, optional): Cache implementation to store results. Defaults to FIFOCache. + key_maker (Callable, optional): Function to generate cache keys from function arguments. Defaults to make_key. + clear_reuse (bool, optional): Whether to reuse cache during clearing. Defaults to False. + callback (Callable, optional): Function called on cache hit/miss events. Defaults to None. + copy_level (int, optional): Level of result copying. Defaults to 1. + + Returns: + Callable: Decorated method with method-specific caching capabilities. """ if cache is None: cache = FIFOCache(0) From 8530d31244248478a8d2c36c2d6cec6abeb839c2 Mon Sep 17 00:00:00 2001 From: awolverp Date: Fri, 18 Apr 2025 13:18:22 +0330 Subject: [PATCH 33/37] Move BaseCacheImpl from _cachebox.py to _core --- python/cachebox/__init__.py | 2 +- python/cachebox/_cachebox.py | 13 +------- python/cachebox/_core.pyi | 60 ++++++++++++++++++++++++++++++++++++ python/cachebox/utils.py | 37 +++++++++++----------- python/tests/mixin.py | 4 +-- src/bridge/mod.rs | 36 ++++++++++++++++++++++ src/lib.rs | 1 + 7 files changed, 120 insertions(+), 33 deletions(-) diff --git a/python/cachebox/__init__.py b/python/cachebox/__init__.py index d2ab225..3438d0c 100644 --- a/python/cachebox/__init__.py +++ b/python/cachebox/__init__.py @@ -3,6 +3,7 @@ __version__ as __version__, ) from ._cachebox import ( + BaseCacheImpl as BaseCacheImpl, Cache as Cache, FIFOCache as FIFOCache, RRCache as RRCache, @@ -10,7 +11,6 @@ LFUCache as LFUCache, TTLCache as TTLCache, VTTLCache as VTTLCache, - BaseCacheImpl as BaseCacheImpl, IteratorView as IteratorView, ) from .utils import ( diff --git a/python/cachebox/_cachebox.py b/python/cachebox/_cachebox.py index 0c5d976..1bd303e 100644 --- a/python/cachebox/_cachebox.py +++ b/python/cachebox/_cachebox.py @@ -1,4 +1,5 @@ from . import _core +from ._core import BaseCacheImpl from datetime import timedelta, datetime import typing @@ -29,18 +30,6 @@ def _items_to_str(items, length): return "{%s, ... %d more ...}" % (", ".join(left), length - c) -class BaseCacheImpl(typing.Generic[KT, VT]): - """ - Base implementation for cache classes in the cachebox library. - - This abstract base class defines the generic structure for cache implementations, - supporting different key and value types through generic type parameters. - Serves as a foundation for specific cache variants like Cache and FIFOCache. - """ - - pass - - class IteratorView(typing.Generic[VT]): __slots__ = ("iterator", "func") diff --git a/python/cachebox/_core.pyi b/python/cachebox/_core.pyi index 059faf8..dc16808 100644 --- a/python/cachebox/_core.pyi +++ b/python/cachebox/_core.pyi @@ -1,3 +1,5 @@ +import typing + __version__: str __author__: str @@ -8,3 +10,61 @@ class CoreKeyError(Exception): """ ... + +KT = typing.TypeVar("KT") +VT = typing.TypeVar("VT") +DT = typing.TypeVar("DT") + +class BaseCacheImpl(typing.Generic[KT, VT]): + """ + Base implementation for cache classes in the cachebox library. + + This abstract base class defines the generic structure for cache implementations, + supporting different key and value types through generic type parameters. + Serves as a foundation for specific cache variants like Cache and FIFOCache. + """ + + def __init__( + self, + maxsize: int, + iterable: typing.Union[typing.Iterable[typing.Tuple[KT, VT]], typing.Dict[KT, VT]] = ..., + *, + capacity: int = ..., + ) -> None: ... + @staticmethod + def __class_getitem__(*args) -> None: ... + @property + def maxsize(self) -> int: ... + def __len__(self) -> int: ... + def __sizeof__(self) -> int: ... + def __bool__(self) -> bool: ... + def __contains__(self, key: KT) -> bool: ... + def __setitem__(self, key: KT, value: VT) -> None: ... + def __getitem__(self, key: KT) -> VT: ... + def __delitem__(self, key: KT) -> VT: ... + def __str__(self) -> str: ... + def __iter__(self) -> typing.Iterator[KT]: ... + def __eq__(self, other) -> bool: ... + def __ne__(self, other) -> bool: ... + def capacity(self) -> int: ... + def is_full(self) -> bool: ... + def is_empty(self) -> bool: ... + def insert(self, key: KT, value: VT, *args, **kwargs) -> typing.Optional[VT]: ... + def get(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: ... + def pop(self, key: KT, default: DT = None) -> typing.Union[VT, DT]: ... + def setdefault( + self, key: KT, default: typing.Optional[DT] = None, *args, **kwargs + ) -> typing.Optional[VT | DT]: ... + def popitem(self) -> typing.Tuple[KT, VT]: ... + def drain(self, n: int) -> int: ... + def clear(self, *, reuse: bool = False) -> None: ... + def shrink_to_fit(self) -> None: ... + def update( + self, + iterable: typing.Union[typing.Iterable[typing.Tuple[KT, VT]], typing.Dict[KT, VT]], + *args, + **kwargs, + ) -> None: ... + def keys(self) -> typing.Iterable[KT]: ... + def values(self) -> typing.Iterable[VT]: ... + def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: ... diff --git a/python/cachebox/utils.py b/python/cachebox/utils.py index f2af432..56e8f73 100644 --- a/python/cachebox/utils.py +++ b/python/cachebox/utils.py @@ -15,16 +15,17 @@ class Frozen(BaseCacheImpl, typing.Generic[KT, VT]): # pragma: no cover """ A wrapper class that prevents modifications to an underlying cache implementation. - - This class provides a read-only view of a cache, optionally allowing silent + + This class provides a read-only view of a cache, optionally allowing silent suppression of modification attempts instead of raising exceptions. """ + __slots__ = ("__cache", "ignore") def __init__(self, cls: BaseCacheImpl[KT, VT], ignore: bool = False) -> None: """ Initialize a frozen cache wrapper. - + :param cls: The underlying cache implementation to be frozen :type cls: BaseCacheImpl[KT, VT] :param ignore: If True, silently ignores modification attempts; if False, raises TypeError when modification is attempted @@ -160,7 +161,7 @@ def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: class _LockWithCounter: """ A lock with a counter to track the number of waiters. - + This class provides a lock mechanism that supports both synchronous and asynchronous contexts, with the ability to track the number of threads or coroutines waiting to acquire the lock. """ @@ -203,12 +204,12 @@ def _copy_if_need(obj, tocopy=(dict, list, set), level: int = 1): def make_key(args: tuple, kwds: dict, fasttype=(int, str)): """ Create a hashable key from function arguments for caching purposes. - + Args: args (tuple): Positional arguments to be used in key generation. kwds (dict): Keyword arguments to be used in key generation. fasttype (tuple, optional): Types that can be directly used as keys. Defaults to (int, str). - + Returns: A hashable key representing the function arguments, optimized for simple single-argument cases. """ @@ -227,11 +228,11 @@ def make_key(args: tuple, kwds: dict, fasttype=(int, str)): def make_hash_key(args: tuple, kwds: dict): """ Create a hashable hash key from function arguments for caching purposes. - + Args: args (tuple): Positional arguments to be used in key generation. kwds (dict): Keyword arguments to be used in key generation. - + Returns: int: A hash value representing the function arguments. """ @@ -241,11 +242,11 @@ def make_hash_key(args: tuple, kwds: dict): def make_typed_key(args: tuple, kwds: dict): """ Create a hashable key from function arguments that includes type information. - + Args: args (tuple): Positional arguments to be used in key generation. kwds (dict): Keyword arguments to be used in key generation. - + Returns: A hashable key representing the function arguments, including the types of the arguments. """ @@ -448,21 +449,21 @@ def cached( ): """ Decorator to create a memoized cache for function results. - + Wraps a function to automatically cache and retrieve its results based on input parameters. - + Args: cache (BaseCacheImpl, dict, optional): Cache implementation to store results. Defaults to FIFOCache. key_maker (Callable, optional): Function to generate cache keys from function arguments. Defaults to make_key. clear_reuse (bool, optional): Whether to reuse cache during clearing. Defaults to False. callback (Callable, optional): Function called on cache hit/miss events. Defaults to None. copy_level (int, optional): Level of result copying. Defaults to 1. - + Returns: Callable: Decorated function with caching capabilities. - + Example:: - + @cachebox.cached(cachebox.LRUCache(128)) def sum_as_string(a, b): return str(a+b) @@ -506,16 +507,16 @@ def cachedmethod( ): """ Decorator to create a method-specific memoized cache for function results. - + Similar to `cached()`, but ignores `self` parameter when generating cache keys. - + Args: cache (BaseCacheImpl, dict, optional): Cache implementation to store results. Defaults to FIFOCache. key_maker (Callable, optional): Function to generate cache keys from function arguments. Defaults to make_key. clear_reuse (bool, optional): Whether to reuse cache during clearing. Defaults to False. callback (Callable, optional): Function called on cache hit/miss events. Defaults to None. copy_level (int, optional): Level of result copying. Defaults to 1. - + Returns: Callable: Decorated method with method-specific caching capabilities. """ diff --git a/python/tests/mixin.py b/python/tests/mixin.py index b9191e4..bdbc1e8 100644 --- a/python/tests/mixin.py +++ b/python/tests/mixin.py @@ -131,10 +131,10 @@ def test___setitem__(self): cache[2] def test___repr__(self): - cache = self.CACHE(100, **self.KWARGS, capacity=2) + cache = self.CACHE(1000, **self.KWARGS, capacity=2) assert repr(cache).startswith(self.CACHE.__module__ + "." + self.CACHE.__name__) - cache.update({i: i for i in range(100)}) + cache.update((i, i) for i in range(1000)) assert str(cache) == repr(cache) def test_insert(self): diff --git a/src/bridge/mod.rs b/src/bridge/mod.rs index 3a39df7..5f85d87 100644 --- a/src/bridge/mod.rs +++ b/src/bridge/mod.rs @@ -1,7 +1,43 @@ use pyo3::create_exception; +use pyo3::types::PyTypeMethods; create_exception!(cachebox._core, CoreKeyError, pyo3::exceptions::PyException); +#[pyo3::pyclass(module = "cachebox._cachebox", subclass, frozen)] +pub struct BaseCacheImpl {} + +#[pyo3::pymethods] +impl BaseCacheImpl { + #[new] + #[pyo3(signature = (*args, **kwargs))] + #[classmethod] + #[allow(unused_variables)] + pub fn __new__( + cls: &pyo3::Bound<'_, pyo3::types::PyType>, + args: &pyo3::Bound<'_, pyo3::PyAny>, + kwargs: Option<&pyo3::Bound<'_, pyo3::PyAny>>, + ) -> pyo3::PyResult { + let size = unsafe { pyo3::ffi::PyTuple_Size(cls.mro().as_ptr()) }; + + // This means BaseCacheImpl is used as subclass + // So we shouldn't raise NotImplementedError + if size > 2 { + Ok(Self {}) + } else { + Err(pyo3::PyErr::new::("do not call this constructor, you can subclass this implementation or use other classes.")) + } + } + + #[allow(unused_variables)] + #[classmethod] + pub fn __class_getitem__( + cls: &pyo3::Bound<'_, pyo3::types::PyType>, + args: pyo3::PyObject, + ) -> pyo3::PyObject { + cls.clone().into() + } +} + #[pyo3::pyclass(module = "cachebox._core", frozen)] pub struct TTLPair { key: pyo3::PyObject, diff --git a/src/lib.rs b/src/lib.rs index 7f6296d..1add7e5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,6 +27,7 @@ fn _core(py: pyo3::Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; Ok(()) } From 022f8758ea46224f41c83a862900477b6686c7ad Mon Sep 17 00:00:00 2001 From: awolverp Date: Fri, 18 Apr 2025 15:51:44 +0330 Subject: [PATCH 34/37] Write README.md --- README.md | 574 ++++++++++++++++++++++++++++++++--- python/cachebox/_cachebox.py | 6 +- 2 files changed, 535 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index 69eebb1..a300016 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ Release - Python Versions + Python Versions Downloads @@ -31,7 +31,7 @@ This can make your application very faster and it's a good choice in big applica **Ideal for optimizing large-scale applications** with efficient, low-overhead caching. **Key Features:** -- ๐Ÿš€ Extremely fast (10-50x faster than other caching libraries - [benchmarks](https://github.com/awolverp/cachebox-benchmark)) +- ๐Ÿš€ Extremely fast (10-50x faster than other caching libraries -- [*benchmarks*](https://github.com/awolverp/cachebox-benchmark)) - ๐Ÿ“Š Minimal memory footprint (50% of standard dictionary memory usage) - ๐Ÿ”ฅ Full-featured and user-friendly - ๐Ÿงถ Completely thread-safe @@ -41,13 +41,13 @@ This can make your application very faster and it's a good choice in big applica - ๐Ÿ“ฆ Supports 7 advanced caching algorithms ### Page Contents -- [โ“ **When i need caching and cachebox**](#when-i-need-caching-and-cachebox) -- [๐ŸŒŸ **Why `cachebox`**](#why-cachebox) -- [๐Ÿ”ง **Installation**](#installation) -- [๐Ÿ’ก **Preview**](#example) -- [๐ŸŽ“ **Learn**](#learn) -- [โœ๏ธ **Incompatible changes**](#incompatible-changes) -- [๐Ÿ“Œ **Tips & Notes**](#tips-and-notes) +- โ“ [**When i need caching and cachebox**](#when-i-need-caching-and-cachebox) +- ๐ŸŒŸ [**Why `cachebox`**](#why-cachebox) +- ๐Ÿ”ง [**Installation**](#installation) +- ๐Ÿ’ก [**Preview**](#examples) +- ๐ŸŽ“ [**Getting started**](#getting-started) +- โœ๏ธ [**Incompatible changes**](#incompatible-changes) +- ๐Ÿ“Œ [**Tips & Notes**](#tips-and-notes) ### When i need caching and cachebox - ๐Ÿ“ˆ **Frequently Data Access** \ @@ -100,7 +100,7 @@ pip3 install -U cachebox > [!WARNING]\ > The new version v5 has some incompatible with v4, for more info please see [Incompatible changes](#incompatible-changes) -## Example +## Examples The simplest example of **cachebox** could look like this: ```python import cachebox @@ -150,30 +150,34 @@ assert cache["key"] == "value" assert cache.get("key") == "value" ``` -## Learn +## Getting started There are 3 useful functions: -- [**cached**](#decorator-cached): a decorator that helps you to cache your functions and calculations with a lot of options. -- [**cachedmethod**](#decorator-cachedmethod): this is excatly works like `cached()`, but ignores `self` parameters in hashing and key making. -- [**is_cached**](#function-is_cached): check if a function/method cached by cachebox or not +- [**cached**](#cached--decorator): a decorator that helps you to cache your functions and calculations with a lot of options. +- [**cachedmethod**](#cachedmethod--decorator): this is excatly works like `cached()`, but ignores `self` parameters in hashing and key making. +- [**is_cached**](#is_cached--function): check if a function/method cached by cachebox or not And 9 classes: -- [**BaseCacheImpl**](#class-basecacheimpl): base-class for all classes. -- [**Cache**](#class-cache): A simple cache that has no algorithm; this is only a hashmap. -- [**FIFOCache**](#class-fifocache): the FIFO cache will remove the element that has been in the cache the longest. -- [**RRCache**](#class-rrcache): the RR cache will choice randomly element to remove it to make space when necessary. -- [**LRUCache**](#class-lrucache): the LRU cache will remove the element in the cache that has not been accessed in the longest time. -- [**LFUCache**](#class-lfucache): the LFU cache will remove the element in the cache that has been accessed the least, regardless of time. -- [**TTLCache**](#class-ttlcache): the TTL cache will automatically remove the element in the cache that has expired. -- [**VTTLCache**](#class-vttlcache): the TTL cache will automatically remove the element in the cache that has expired when need. -- [**Frozen**](#class-frozen): you can use this class for freezing your caches. - - -### Decorator `cached` -Decorator to wrap a function with a memoizing callable that saves results in a cache. +- [**BaseCacheImpl**](#basecacheimpl-๏ธ-class): base-class for all classes. +- [**Cache**](#cache-๏ธ-class): A simple cache that has no algorithm; this is only a hashmap. +- [**FIFOCache**](#fifocache-๏ธ-class): the FIFO cache will remove the element that has been in the cache the longest. +- [**RRCache**](#rrcache-๏ธ-class): the RR cache will choice randomly element to remove it to make space when necessary. +- [**LRUCache**](#lrucache-๏ธ-class): the LRU cache will remove the element in the cache that has not been accessed in the longest time. +- [**LFUCache**](#lfucache-๏ธ-class): the LFU cache will remove the element in the cache that has been accessed the least, regardless of time. +- [**TTLCache**](#ttlcache-๏ธ-class): the TTL cache will automatically remove the element in the cache that has expired. +- [**VTTLCache**](#vttlcache-๏ธ-class): the TTL cache will automatically remove the element in the cache that has expired when need. +- [**Frozen**](#frozen-๏ธ-class): you can use this class for freezing your caches. -
-Parameters +You only need to import the class which you want, and behave with it like a dictionary (except for [VTTLCache](#vttlcache-๏ธ-class), this have some differences) + +There are some examples for you with different methods for introducing those. +**All the methods you will see in the examples are common across all classes (except for a few of them).** + +* * * + +### `cached` (๐ŸŽ€ decorator) +Decorator to wrap a function with a memoizing callable that saves results in a cache. +**Parameters:** - `cache`: Specifies a cache that handles and stores the results. if `None` or `dict`, `FIFOCache` will be used. - `key_maker`: Specifies a function that will be called with the same positional and keyword @@ -191,13 +195,11 @@ Decorator to wrap a function with a memoizing callable that saves results in a c `0` means "never copy", `1` means "only copy `dict`, `list`, and `set` results" and `2` means "always copy the results". -
-
Examples -**A simple example:** +A simple example: ```python import cachebox @@ -212,7 +214,7 @@ sum_as_string.cache_clear() assert len(sum_as_string.cache) == 0 ``` -**A key_maker example:** +A key_maker example: ```python import cachebox @@ -225,7 +227,7 @@ async def request_handler(request: Request): return Response("hello man") ``` -**A typed key_maker example:** +A typed key_maker example: ```python import cachebox @@ -257,7 +259,7 @@ print(sum_as_string.cache_info()) sum_as_string.cache_clear() ``` -**callback example:** (Added in v4.2.0) +callback example: *(Added in v4.2.0)* ```python import cachebox @@ -318,7 +320,7 @@ assert func(5, 4) == 9 * * * -### Decorator `cachedmethod` +### `cachedmethod` (๐ŸŽ€ decorator) this is excatly works like `cached()`, but ignores `self` parameters in hashing and key making.
@@ -338,17 +340,14 @@ c.my_method()
+* * * -### Function `is_cached` +### `is_cached` (๐Ÿ“ฆ function) Checks that a function/method is cached by cachebox or not. -
-Parameters - +**Parameters:** - `func`: The function/method to check. -
-
Example @@ -363,3 +362,494 @@ assert cachebox.is_cached(func) ```
+ +* * * + +### `BaseCacheImpl` (๐Ÿ—๏ธ class) +Base implementation for cache classes in the cachebox library. + +This abstract base class defines the generic structure for cache implementations, +supporting different key and value types through generic type parameters. +Serves as a foundation for specific cache variants like Cache and FIFOCache. + +
+Example + +```python +import cachebox + +# subclass +class ClassName(cachebox.BaseCacheImpl): + ... + +# type-hint +def func(cache: BaseCacheImpl): + ... + +# isinstance +cache = cachebox.LFUCache(0) +assert isinstance(cache, cachebox.BaseCacheImpl) +``` + +
+ +* * * + +### `Cache` (๐Ÿ—๏ธ class) +A thread-safe, memory-efficient hashmap-like cache with configurable maximum size. + +Provides a flexible key-value storage mechanism with: +- Configurable maximum size (zero means unlimited) +- Lower memory usage compared to standard dict +- Thread-safe operations +- Useful memory management methods + +Supports initialization with optional initial data and capacity, +and provides dictionary-like access with additional cache-specific operations. + +> [!TIP]\ +> Differs from standard `dict` by: +> - it is thread-safe and unordered, while dict isn't thread-safe and ordered (Python 3.6+). +> - it uses very lower memory than dict. +> - it supports useful and new methods for managing memory, while dict does not. +> - it does not support popitem, while dict does. +> - You can limit the size of Cache, but you cannot for dict. + +| | get | insert | delete | popitem | +| ------------ | ----- | ------- | ------ | ------- | +| Worse-case | O(1) | O(1) | O(1) | N/A | + +
+Example + +```python +from cachebox import Cache + +# These parameters are common in classes: +# By `maxsize` param, you can specify the limit size of the cache ( zero means infinity ); this is unchangable. +# By `iterable` param, you can create cache from a dict or an iterable. +# If `capacity` param is given, cache attempts to allocate a new hash table with at +# least enough capacity for inserting the given number of elements without reallocating. +cache = Cache(maxsize=100, iterable=None, capacity=100) + +# you can behave with it like a dictionary +cache["key"] = "value" +# or you can use `.insert(key, value)` instead of that (recommended) +cache.insert("key", "value") + +print(cache["key"]) # value + +del cache["key"] +cache["key"] # KeyError: key + +# cachebox.Cache does not have any policy, so will raise OverflowError if reached the bound. +cache.update({i:i for i in range(200)}) +# OverflowError: The cache has reached the bound. +``` + +
+ +* * * + +### `FIFOCache` (๐Ÿ—๏ธ class) +A First-In-First-Out (FIFO) cache implementation with configurable maximum size and optional initial capacity. + +This cache provides a fixed-size container that automatically removes the oldest items when the maximum size is reached. + +**Key features**: +- Deterministic item eviction order (oldest items removed first) +- Efficient key-value storage and retrieval +- Supports dictionary-like operations +- Allows optional initial data population + +| | get | insert | delete | popitem | +| ------------ | ----- | ------- | ------------- | ------- | +| Worse-case | O(1) | O(1) | O(min(i, n-i)) | O(1) | + +
+Example + +```python +from cachebox import FIFOCache + +cache = FIFOCache(5, {i:i*2 for i in range(5)}) + +print(len(cache)) # 5 +cache["new-key"] = "new-value" +print(len(cache)) # 5 + +print(cache.get(3, "default-val")) # 6 +print(cache.get(6, "default-val")) # default-val + +print(cache.popitem()) # (1, 2) + +# insert method returns a value: +# - If the cache did not have this key present, None is returned. +# - If the cache did have this key present, the value is updated, and the old value is returned. +print(cache.insert(3, "val")) # 6 +print(cache.insert("new-key", "val")) # None + +# Returns the first key in cache; this is the one which will be removed by `popitem()`. +print(cache.first()) +``` + +
+ +* * * + +### `RRCache` (๐Ÿ—๏ธ class) +A thread-safe cache implementation with Random Replacement (RR) policy. + +This cache randomly selects and removes elements when the cache reaches its maximum size, +ensuring a simple and efficient caching mechanism with configurable capacity. + +Supports operations like insertion, retrieval, deletion, and iteration with O(1) complexity. + +| | get | insert | delete | popitem | +| ------------ | ----- | ------- | ------ | ------- | +| Worse-case | O(1) | O(1) | O(1) | O(1) | + +
+Example + +```python +from cachebox import RRCache + +cache = RRCache(10, {i:i for i in range(10)}) +print(cache.is_full()) # True +print(cache.is_empty()) # False + +# Returns the number of elements the map can hold without reallocating. +print(cache.capacity()) # 28 + +# Shrinks the cache to fit len(self) elements. +cache.shrink_to_fit() +print(cache.capacity()) # 10 + +# Returns a random key +print(cache.random_key()) # 4 +``` + +
+ +* * * + +### `LRUCache` (๐Ÿ—๏ธ class) +Thread-safe Least Recently Used (LRU) cache implementation. + +Provides a cache that automatically removes the least recently used items when +the cache reaches its maximum size. Supports various operations like insertion, +retrieval, and management of cached items with configurable maximum size and +initial capacity. + +| | get | insert | delete(i) | popitem | +| ------------ | ----- | ------- | --------- | ------- | +| Worse-case | O(1)~ | O(1)~ | O(1)~ | O(1)~ | + +
+Example + +```python +from cachebox import LRUCache + +cache = LRUCache(0, {i:i*2 for i in range(10)}) + +# access `1` +print(cache[0]) # 0 +print(cache.least_recently_used()) # 1 +print(cache.popitem()) # (1, 2) + +# .peek() searches for a key-value in the cache and returns it without moving the key to recently used. +print(cache.peek(2)) # 4 +print(cache.popitem()) # (3, 6) + +# Does the `popitem()` `n` times and returns count of removed items. +print(cache.drain(5)) # 5 +``` + +
+ +* * * + +### `LFUCache` (๐Ÿ—๏ธ class) +A thread-safe Least Frequently Used (LFU) cache implementation. + +This cache removes elements that have been accessed the least number of times, +regardless of their access time. It provides methods for inserting, retrieving, +and managing cache entries with configurable maximum size and initial capacity. + +| | get | insert | delete(i) | popitem | +| ------------ | ----- | ------- | --------- | ------- | +| Worse-case | O(1)~ | O(1)~ | O(min(i, n-i)) | O(1)~ | + +
+Example + +```python +from cachebox import LFUCache + +cache = cachebox.LFUCache(5) +cache.insert('first', 'A') +cache.insert('second', 'B') + +# access 'first' twice +cache['first'] +cache['first'] + +# access 'second' once +cache['second'] + +assert cache.least_frequently_used() == 'second' +assert cache.least_frequently_used(2) is None # 2 is out of range + +for item in cache.items_with_frequency(): + print(item) +# ('second', 'B', 1) +# ('first', 'A', 2) +``` + +
+ +* * * + +### `TTLCache` (๐Ÿ—๏ธ class) +A thread-safe Time-To-Live (TTL) cache implementation with configurable maximum size and expiration. + +This cache automatically removes elements that have expired based on their time-to-live setting. +Supports various operations like insertion, retrieval, and iteration. + +| | get | insert | delete(i) | popitem | +| ------------ | ----- | ------- | --------- | ------- | +| Worse-case | O(1)~ | O(1)~ | O(min(i, n-i)) | O(n) | + +
+Example + +```python +from cachebox import TTLCache +import time + +# The `ttl` param specifies the time-to-live value for each element in cache (in seconds); cannot be zero or negative. +cache = TTLCache(0, ttl=2) +cache.update({i:str(i) for i in range(10)}) + +print(cache.get_with_expire(2)) # ('2', 1.99) + +# Returns the oldest key in cache; this is the one which will be removed by `popitem()` +print(cache.first()) # 0 + +cache["mykey"] = "value" +time.sleep(2) +cache["mykey"] # KeyError +``` + +
+ +* * * + +### `VTTLCache` (๐Ÿ—๏ธ class) +A thread-safe, time-to-live (TTL) cache implementation with per-key expiration policy. + +This cache allows storing key-value pairs with optional expiration times. When an item expires, +it is automatically removed from the cache. The cache supports a maximum size and provides +various methods for inserting, retrieving, and managing cached items. + +Key features: +- Per-key time-to-live (TTL) support +- Configurable maximum cache size +- Thread-safe operations +- Automatic expiration of items + +Supports dictionary-like operations such as get, insert, update, and iteration. + +| | get | insert | delete(i) | popitem | +| ------------ | ----- | ------- | --------- | ------- | +| Worse-case | O(1)~ | O(1)~ | O(min(i, n-i)) | O(1)~ | + +> [!TIP]\ +> `VTTLCache` vs `TTLCache`: +> - In `VTTLCache` each item has its own unique time-to-live, unlike `TTLCache`. +> - `VTTLCache` is generally slower than `TTLCache`. + +
+Example + +```python +from cachebox import VTTLCache +import time + +# The `ttl` param specifies the time-to-live value for `iterable` (in seconds); cannot be zero or negative. +cache = VTTLCache(100, iterable={i:i for i in range(4)}, ttl=3) +print(len(cache)) # 4 +time.sleep(3) +print(len(cache)) # 0 + +# The "key1" is exists for 5 seconds +cache.insert("key1", "value", ttl=5) +# The "key2" is exists for 2 seconds +cache.insert("key2", "value", ttl=2) + +time.sleep(2) +# "key1" is exists for 3 seconds +print(cache.get("key1")) # value + +# "key2" has expired +print(cache.get("key2")) # None +``` + +
+ +* * * + +### `Frozen` (๐Ÿ—๏ธ class) +**This is not a cache**; This is a wrapper class that prevents modifications to an underlying cache implementation. + +This class provides a read-only view of a cache, optionally allowing silent +suppression of modification attempts instead of raising exceptions. + +
+Example + +```python +from cachebox import Frozen, FIFOCache + +cache = FIFOCache(10, {1:1, 2:2, 3:3}) + +# parameters: +# cls: your cache +# ignore: If False, will raise TypeError if anyone try to change cache. will do nothing otherwise. +frozen = Frozen(cache, ignore=True) +print(frozen[1]) # 1 +print(len(frozen)) # 3 + +# Frozen ignores this action and do nothing +frozen.insert("key", "value") +print(len(frozen)) # 3 + +# Let's try with ignore=False +frozen = Frozen(cache, ignore=False) + +frozen.insert("key", "value") +# TypeError: This cache is frozen. +``` + +
+ +> [!NOTE]\ +> The **Frozen** class can't prevent expiring in [TTLCache](#ttlcache) or [VTTLCache](#vttlcache). +> +> For example: +> ```python +> cache = TTLCache(0, ttl=3, iterable={i:i for i in range(10)}) +> frozen = Frozen(cache) +> +> time.sleep(3) +> print(len(frozen)) # 0 +> ``` + +## โš ๏ธ Incompatible Changes +These are changes that are not compatible with the previous version: + +**You can see more info about changes in [Changelog](CHANGELOG.md).** + +#### CacheInfo's cachememory attribute renamed! +The `CacheInfo.cachememory` was renamed to `CacheInfo.memory`. + +```python +@cachebox.cached({}) +def func(a: int, b: int) -> str: + ... + +info = func.cache_info() + +# Older versions +print(info.cachememory) + +# New version +print(info.memory) +``` + +#### Errors in the `__eq__` method will not be ignored! +Now the errors which occurred while doing `__eq__` operations will not be ignored. + +```python +class A: + def __hash__(self): + return 1 + + def __eq__(self, other): + raise NotImplementedError("not implemeneted") + +cache = cachebox.FIFOCache(0, {A(): 10}) + +# Older versions: +cache[A()] # => KeyError + +# New version: +cache[A()] +# Traceback (most recent call last): +# File "script.py", line 11, in +# cache[A()] +# ~~~~~^^^^^ +# File "script.py", line 7, in __eq__ +# raise NotImplementedError("not implemeneted") +# NotImplementedError: not implemeneted +``` + +#### Cache comparisons will not be strict! +In older versions, cache comparisons depended on the caching algorithm. Now, they work just like dictionary comparisons. + +```python +cache1 = cachebox.FIFOCache(10) +cache2 = cachebox.FIFOCache(10) + +cache1.insert(1, 'first') +cache1.insert(2, 'second') + +cache2.insert(2, 'second') +cache2.insert(1, 'first') + +# Older versions: +cache1 == cache2 # False + +# New version: +cache1 == cache2 # True +``` + +## Tips and Notes +#### How to save caches in files? +there's no built-in file-based implementation, but you can use `pickle` for saving caches in files. For example: +```python +import cachebox +import pickle +c = cachebox.LRUCache(100, {i:i for i in range(78)}) + +with open("file", "wb") as fd: + pickle.dump(c, fd) + +with open("file", "rb") as fd: + loaded = pickle.load(fd) + +assert c == loaded +assert c.capacity() == loaded.capacity() +``` + +> [!TIP]\ +> For more, see this [issue](https://github.com/awolverp/cachebox/issues/8). + +* * * + +#### How to copy the caches? +Use `copy.deepcopy` or `copy.copy` for copying caches. For example: +```python +import cachebox, copy +c = cachebox.LRUCache(100, {i:i for i in range(78)}) + +copied = copy.copy(c) + +assert c == copied +assert c.capacity() == copied.capacity() +``` + +## License +This repository is licensed under the [MIT License](LICENSE) diff --git a/python/cachebox/_cachebox.py b/python/cachebox/_cachebox.py index 1bd303e..e17386d 100644 --- a/python/cachebox/_cachebox.py +++ b/python/cachebox/_cachebox.py @@ -271,7 +271,7 @@ class FIFOCache(BaseCacheImpl[KT, VT]): A First-In-First-Out (FIFO) cache implementation with configurable maximum size and optional initial capacity. This cache provides a fixed-size container that automatically removes the oldest items when the maximum size is reached. - Supports various operations like insertion, retrieval, deletion, and iteration with O(1) complexity. + Supports various operations like insertion, retrieval, deletion, and iteration. Attributes: maxsize: The maximum number of items the cache can hold. @@ -523,7 +523,7 @@ class RRCache(BaseCacheImpl[KT, VT]): This cache randomly selects and removes elements when the cache reaches its maximum size, ensuring a simple and efficient caching mechanism with configurable capacity. - Supports operations like insertion, retrieval, deletion, and iteration with O(1) complexity. + Supports operations like insertion, retrieval, deletion, and iteration. """ __slots__ = ("_raw",) @@ -1285,7 +1285,7 @@ class TTLCache(BaseCacheImpl[KT, VT]): A thread-safe Time-To-Live (TTL) cache implementation with configurable maximum size and expiration. This cache automatically removes elements that have expired based on their time-to-live setting. - Supports various operations like insertion, retrieval, and iteration with O(1) complexity. + Supports various operations like insertion, retrieval, and iteration. """ __slots__ = ("_raw",) From 4bcb8a3ac667259180b5b3ea05eec86eb663ffd2 Mon Sep 17 00:00:00 2001 From: awolverp Date: Fri, 18 Apr 2025 16:16:17 +0330 Subject: [PATCH 35/37] Update changelog, readme; Fix __repr__ --- CHANGELOG.md | 29 +++++++-- README.md | 14 +++-- python/cachebox/_cachebox.py | 117 ++++++++++++++++++++++++++++++++++- 3 files changed, 146 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 63f4883..452aaca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,12 +6,29 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## v5.0.0 - Unreleased -### Targets: -- Update `hashbrown` dependency -- Make 2x faster by changing my *`isize` to `u64` strategy* in Rust. -- Rewrite cache classes API in Python; this help users to use classes as subclass and customize them. -- Make benchmarks better -- Make error handlings better +### Added +- A new method named `random_key` added to `RRCache`. +- A new method named `expire` added to `TTLCache`. +- Some new methods added to `VTTLCache`: `expire`, `items_with_expire`. +- `TTLCache` now supports `timedelta` as ttl. +- `VTTLCache` now supports `timedelta` and `datetime` as ttl. +- A new method `copy` added to all caches. + +### Changed +- The core codes (rust code) renamed from `_cachebox` to `_core`. Instead of that, all of classes + implemented in Python which are using the core's classes. This change can help to customize the alghoritms. +- Now the errors which occurred while doing `__eq__` operations will not be ignored. +- Docstrings is now more complete. +- The strictness in `__eq__` methods was reduced. +- Add more strictness for loading pickle objects. +- `LFUCache` now uses `VecDeque` instead of `Vec` (improves performance). +- The `CacheInfo.cachememory` renamed to `CacheInfo.memory`. +- *`isize` to `u64` strategy* changed in Rust. +- `__repr__` methods refactored. + +### Removed +- The `n` parameter of the `LRUCache.least_recently_used` method has been removed. +- The deprecated `always_copy` parameter of the `cached` and `cachedmethod` decorators has been removed. ## 4.5.3 - 2025-03-31 ### Changed diff --git a/README.md b/README.md index a300016..4f4cb21 100644 --- a/README.md +++ b/README.md @@ -840,15 +840,17 @@ assert c.capacity() == loaded.capacity() * * * #### How to copy the caches? -Use `copy.deepcopy` or `copy.copy` for copying caches. For example: +You can use `copy.deepcopy` or `cache.copy` for copying caches. For example: ```python -import cachebox, copy -c = cachebox.LRUCache(100, {i:i for i in range(78)}) +import cachebox +cache = cachebox.LRUCache(100, {i:i for i in range(78)}) -copied = copy.copy(c) +# shallow copy +shallow = cache.copy() -assert c == copied -assert c.capacity() == copied.capacity() +# deep copy +import copy +deep = copy.deepcopy(cache) ``` ## License diff --git a/python/cachebox/_cachebox.py b/python/cachebox/_cachebox.py index e17386d..7bcc794 100644 --- a/python/cachebox/_cachebox.py +++ b/python/cachebox/_cachebox.py @@ -1,6 +1,7 @@ from . import _core from ._core import BaseCacheImpl from datetime import timedelta, datetime +import copy as _std_copy import typing @@ -11,7 +12,7 @@ def _items_to_str(items, length): if length <= 50: - return "{" + ", ".join(f"{k}: {v}" for k, v in items) + "}" + return "{" + ", ".join(f"{k!r}: {v!r}" for k, v in items) + "}" c = 0 left = [] @@ -20,7 +21,7 @@ def _items_to_str(items, length): k, v = next(items) if c <= 50: - left.append(f"{k}: {v}") + left.append(f"{k!r}: {v!r}") else: break @@ -251,6 +252,22 @@ def values(self) -> IteratorView[VT]: """ return IteratorView(self._raw.items(), lambda x: x[1]) + def copy(self) -> "Cache[KT, VT]": + """Returns a shallow copy of the cache""" + return self.__copy__() + + def __copy__(self) -> "Cache[KT, VT]": + cls = type(self) + copied = cls.__new__(cls) + copied._raw = _std_copy.copy(self._raw) + return copied + + def __deepcopy__(self, memo) -> "Cache[KT, VT]": + cls = type(self) + copied = cls.__new__(cls) + copied._raw = _std_copy.deepcopy(self._raw, memo) + return copied + def __iter__(self) -> IteratorView[KT]: return self.keys() @@ -501,6 +518,22 @@ def last(self) -> typing.Optional[KT]: """ return self._raw.get_index(len(self._raw) - 1) + def copy(self) -> "FIFOCache[KT, VT]": + """Returns a shallow copy of the cache""" + return self.__copy__() + + def __copy__(self) -> "FIFOCache[KT, VT]": + cls = type(self) + copied = cls.__new__(cls) + copied._raw = _std_copy.copy(self._raw) + return copied + + def __deepcopy__(self, memo) -> "FIFOCache[KT, VT]": + cls = type(self) + copied = cls.__new__(cls) + copied._raw = _std_copy.deepcopy(self._raw, memo) + return copied + def __iter__(self) -> IteratorView[KT]: return self.keys() @@ -739,6 +772,22 @@ def values(self) -> IteratorView[VT]: """ return IteratorView(self._raw.items(), lambda x: x[1]) + def copy(self) -> "RRCache[KT, VT]": + """Returns a shallow copy of the cache""" + return self.__copy__() + + def __copy__(self) -> "RRCache[KT, VT]": + cls = type(self) + copied = cls.__new__(cls) + copied._raw = _std_copy.copy(self._raw) + return copied + + def __deepcopy__(self, memo) -> "RRCache[KT, VT]": + cls = type(self) + copied = cls.__new__(cls) + copied._raw = _std_copy.deepcopy(self._raw, memo) + return copied + def __iter__(self) -> IteratorView[KT]: return self.keys() @@ -995,6 +1044,22 @@ def most_recently_used(self) -> typing.Optional[KT]: """ return self._raw.most_recently_used() + def copy(self) -> "LRUCache[KT, VT]": + """Returns a shallow copy of the cache""" + return self.__copy__() + + def __copy__(self) -> "LRUCache[KT, VT]": + cls = type(self) + copied = cls.__new__(cls) + copied._raw = _std_copy.copy(self._raw) + return copied + + def __deepcopy__(self, memo) -> "LRUCache[KT, VT]": + cls = type(self) + copied = cls.__new__(cls) + copied._raw = _std_copy.deepcopy(self._raw, memo) + return copied + def __iter__(self) -> IteratorView[KT]: return self.keys() @@ -1264,6 +1329,22 @@ def least_frequently_used(self, n: int = 0) -> typing.Optional[KT]: return self._raw.least_frequently_used(n) + def copy(self) -> "LFUCache[KT, VT]": + """Returns a shallow copy of the cache""" + return self.__copy__() + + def __copy__(self) -> "LFUCache[KT, VT]": + cls = type(self) + copied = cls.__new__(cls) + copied._raw = _std_copy.copy(self._raw) + return copied + + def __deepcopy__(self, memo) -> "LFUCache[KT, VT]": + cls = type(self) + copied = cls.__new__(cls) + copied._raw = _std_copy.deepcopy(self._raw, memo) + return copied + def __iter__(self) -> IteratorView[KT]: return self.keys() @@ -1605,6 +1686,22 @@ def expire(self) -> None: # pragma: no cover """ self._raw.expire() + def copy(self) -> "TTLCache[KT, VT]": + """Returns a shallow copy of the cache""" + return self.__copy__() + + def __copy__(self) -> "TTLCache[KT, VT]": + cls = type(self) + copied = cls.__new__(cls) + copied._raw = _std_copy.copy(self._raw) + return copied + + def __deepcopy__(self, memo) -> "TTLCache[KT, VT]": + cls = type(self) + copied = cls.__new__(cls) + copied._raw = _std_copy.deepcopy(self._raw, memo) + return copied + def __iter__(self) -> IteratorView[KT]: return self.keys() @@ -1982,6 +2079,22 @@ def expire(self) -> None: # pragma: no cover """ self._raw.expire() + def copy(self) -> "VTTLCache[KT, VT]": + """Returns a shallow copy of the cache""" + return self.__copy__() + + def __copy__(self) -> "VTTLCache[KT, VT]": + cls = type(self) + copied = cls.__new__(cls) + copied._raw = _std_copy.copy(self._raw) + return copied + + def __deepcopy__(self, memo) -> "VTTLCache[KT, VT]": + cls = type(self) + copied = cls.__new__(cls) + copied._raw = _std_copy.deepcopy(self._raw, memo) + return copied + def __iter__(self) -> IteratorView[KT]: return self.keys() From 65ccbacb004a78000488205550642fb9c462c769 Mon Sep 17 00:00:00 2001 From: awolverp Date: Fri, 18 Apr 2025 16:18:52 +0330 Subject: [PATCH 36/37] Update test workflow !test --- .github/workflows/python-test.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index 11011df..7dfd6aa 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -2,10 +2,6 @@ name: python-test on: push: - branches: - - main - - workflow_dispatch: permissions: contents: read From 983eae4fda1abd231193020db1df95c0320c1335 Mon Sep 17 00:00:00 2001 From: awolverp Date: Fri, 18 Apr 2025 16:39:15 +0330 Subject: [PATCH 37/37] Improve tests !test --- python/cachebox/_cachebox.py | 2 +- python/cachebox/_core.pyi | 3 +++ python/tests/mixin.py | 29 +++++++++++++++++++++++++++++ python/tests/test_caches.py | 20 +++++++++++++++++++- 4 files changed, 52 insertions(+), 2 deletions(-) diff --git a/python/cachebox/_cachebox.py b/python/cachebox/_cachebox.py index 7bcc794..c3cc796 100644 --- a/python/cachebox/_cachebox.py +++ b/python/cachebox/_cachebox.py @@ -1939,7 +1939,7 @@ def popitem(self) -> typing.Tuple[KT, VT]: """ try: val = self._raw.popitem() - except _core.CoreKeyError: + except _core.CoreKeyError: # pragma: no cover raise KeyError() from None else: return val.pack2() diff --git a/python/cachebox/_core.pyi b/python/cachebox/_core.pyi index dc16808..728a3d4 100644 --- a/python/cachebox/_core.pyi +++ b/python/cachebox/_core.pyi @@ -68,3 +68,6 @@ class BaseCacheImpl(typing.Generic[KT, VT]): def keys(self) -> typing.Iterable[KT]: ... def values(self) -> typing.Iterable[VT]: ... def items(self) -> typing.Iterable[typing.Tuple[KT, VT]]: ... + def __copy__(self) -> "BaseCacheImpl[KT, VT]": ... + def __deepcopy__(self, memo) -> "BaseCacheImpl[KT, VT]": ... + def copy(self) -> "BaseCacheImpl[KT, VT]": ... diff --git a/python/tests/mixin.py b/python/tests/mixin.py index bdbc1e8..cc45177 100644 --- a/python/tests/mixin.py +++ b/python/tests/mixin.py @@ -431,3 +431,32 @@ def _test_pickle(self, check_order: typing.Callable): assert c1 == c2 assert c1.capacity() == c2.capacity() check_order(c1, c2) + + def test_copy(self): + import copy + + # shallow copy + c1 = self.CACHE(maxsize=0, **self.KWARGS) + c1.insert('dict', {}) + c2 = c1.copy() + + assert c2 == c1 + c2['dict'][1] = 1 + + assert c1['dict'][1] == 1 + + c2.insert(1, 1) + assert 1 not in c1 + + # deepcopy + c1 = self.CACHE(maxsize=0, **self.KWARGS) + c1.insert('dict', {}) + c2 = copy.deepcopy(c1) + + assert c2 == c1 + c2['dict'][1] = 1 + + assert 1 not in c1['dict'] + + c2.insert(1, 1) + assert 1 not in c1 diff --git a/python/tests/test_caches.py b/python/tests/test_caches.py index d585d9e..801cc69 100644 --- a/python/tests/test_caches.py +++ b/python/tests/test_caches.py @@ -430,6 +430,15 @@ def test_popitem_with_expire(self): with pytest.raises(KeyError): obj.popitem_with_expire() + def test_items_with_expire(self): + # no need to test completely items_with_expire + # because it's tested in test_iterators + obj = TTLCache(10, 3, {1: 2, 3: 4}) + for key, val, ttl in obj.items_with_expire(): + assert key in obj + assert val == obj[key] + assert isinstance(ttl, float) + class TestVTTLCache(_TestMixin): CACHE = VTTLCache @@ -571,5 +580,14 @@ def inner(c1, c2): c2 = pickle.loads(pickle.dumps(c1)) assert len(c2) == len(c1) - assert c1.capacity() == c2.capacity() + assert abs(c2.capacity() - c1.capacity()) < 2 inner(c1, c2) + + def test_items_with_expire(self): + # no need to test completely items_with_expire + # because it's tested in test_iterators + obj = VTTLCache(10, {1: 2, 3: 4}, ttl=10) + for key, val, ttl in obj.items_with_expire(): + assert key in obj + assert val == obj[key] + assert isinstance(ttl, float)