diff --git a/integration/test_backup_v4.py b/integration/test_backup_v4.py index b09f29133..f170d81f9 100644 --- a/integration/test_backup_v4.py +++ b/integration/test_backup_v4.py @@ -681,6 +681,43 @@ def test_overwrite_alias_true( assert literature.collection == "Article", "alias must point to the original collection" +def test_incremental_backup(client: weaviate.WeaviateClient, request: SubRequest) -> None: + """Create and restore incremental backup.""" + backup_id = unique_backup_id(request.node.name) + base_backup_id = backup_id + "_base" + + # create base backup + client.backup.create( + backup_id=base_backup_id, + backend=BACKEND, + include_collections=["Article"], + wait_for_completion=True, + ) + + # create incremental backup + client.backup.create( + backup_id=backup_id, + backend=BACKEND, + include_collections=["Article"], + wait_for_completion=True, + incremental_backup_base_id=base_backup_id, + ) + + # remove existing class + client.collections.delete("Article") + + # restore incremental backup + client.backup.restore( + backup_id=backup_id, + backend=BACKEND, + include_collections=["Article"], + wait_for_completion=True, + ) + + # check data exists again + assert len(client.collections.use("Article")) == len(ARTICLES_IDS) + + # This test has been disabled temporarily until the behaviour of this scenario is clarified. # It worked in version 1.33.0-rc.1, but broken in version 1.33.0+ # def test_overwrite_alias_false( diff --git a/test/collection/test_config_methods.py b/test/collection/test_config_methods.py index 4edf76e47..fbc33b702 100644 --- a/test/collection/test_config_methods.py +++ b/test/collection/test_config_methods.py @@ -1,7 +1,6 @@ - -import pytest from weaviate.collections.classes.config_methods import _collection_configs_simple_from_json + def test_collection_config_simple_from_json_with_none_vectorizer_config() -> None: """Test that _collection_configs_simple_from_json handles None vectorizer config.""" schema = { @@ -10,9 +9,7 @@ def test_collection_config_simple_from_json_with_none_vectorizer_config() -> Non "class": "TestCollection", "vectorConfig": { "default": { - "vectorizer": { - "text2vec-transformers": None - }, + "vectorizer": {"text2vec-transformers": None}, "vectorIndexType": "hnsw", "vectorIndexConfig": { "skip": False, @@ -25,15 +22,15 @@ def test_collection_config_simple_from_json_with_none_vectorizer_config() -> Non "dynamicEfFactor": 8, "vectorCacheMaxObjects": 1000000000000, "flatSearchCutoff": 40000, - "distance": "cosine" - } + "distance": "cosine", + }, } }, "properties": [], "invertedIndexConfig": { - "bm25": {"b": 0.75, "k1": 1.2}, - "cleanupIntervalSeconds": 60, - "stopwords": {"preset": "en", "additions": None, "removals": None} + "bm25": {"b": 0.75, "k1": 1.2}, + "cleanupIntervalSeconds": 60, + "stopwords": {"preset": "en", "additions": None, "removals": None}, }, "replicationConfig": {"factor": 1, "deletionStrategy": "NoAutomatedResolution"}, "shardingConfig": { @@ -44,7 +41,7 @@ def test_collection_config_simple_from_json_with_none_vectorizer_config() -> Non "actualVirtualCount": 128, "key": "_id", "strategy": "hash", - "function": "murmur3" + "function": "murmur3", }, "vectorIndexType": "hnsw", "vectorIndexConfig": { @@ -58,8 +55,8 @@ def test_collection_config_simple_from_json_with_none_vectorizer_config() -> Non "dynamicEfFactor": 8, "vectorCacheMaxObjects": 1000000000000, "flatSearchCutoff": 40000, - "distance": "cosine" - } + "distance": "cosine", + }, } ] } diff --git a/weaviate/backup/async_.pyi b/weaviate/backup/async_.pyi index 7d7454972..c32b9b0dc 100644 --- a/weaviate/backup/async_.pyi +++ b/weaviate/backup/async_.pyi @@ -20,6 +20,7 @@ class _BackupAsync(_BackupExecutor[ConnectionAsync]): backend: BackupStorage, include_collections: Union[List[str], str, None] = None, exclude_collections: Union[List[str], str, None] = None, + incremental_backup_base_id: Optional[str] = None, wait_for_completion: bool = False, config: Optional[BackupConfigCreate] = None, backup_location: Optional[BackupLocationType] = None, diff --git a/weaviate/backup/executor.py b/weaviate/backup/executor.py index a14fe1255..8a4ee1693 100644 --- a/weaviate/backup/executor.py +++ b/weaviate/backup/executor.py @@ -47,6 +47,7 @@ def create( backend: BackupStorage, include_collections: Union[List[str], str, None] = None, exclude_collections: Union[List[str], str, None] = None, + incremental_backup_base_id: Optional[str] = None, wait_for_completion: bool = False, config: Optional[BackupConfigCreate] = None, backup_location: Optional[BackupLocationType] = None, @@ -60,6 +61,8 @@ def create( collections will be included. Either `include_collections` or `exclude_collections` can be set. By default None. exclude_collections: The collection/list of collections to be excluded in the backup. Either `include_collections` or `exclude_collections` can be set. By default None. + incremental_backup_base_id: The identifier name of the base backup for an incremental backup. Files that are identical + to the base backup will not be included in the incremental backup and restored from the base. By default None. wait_for_completion: Whether to wait until the backup is done. By default False. config: The configuration of the backup creation. By default None. backup_location: The dynamic location of a backup. By default None. @@ -89,6 +92,7 @@ def create( "id": backup_id, "include": include_collections, "exclude": exclude_collections, + "incremental_backup_base_id": incremental_backup_base_id, } if config is not None: diff --git a/weaviate/backup/sync.pyi b/weaviate/backup/sync.pyi index bb43de390..67b1010fb 100644 --- a/weaviate/backup/sync.pyi +++ b/weaviate/backup/sync.pyi @@ -20,6 +20,7 @@ class _Backup(_BackupExecutor[ConnectionSync]): backend: BackupStorage, include_collections: Union[List[str], str, None] = None, exclude_collections: Union[List[str], str, None] = None, + incremental_backup_base_id: Optional[str] = None, wait_for_completion: bool = False, config: Optional[BackupConfigCreate] = None, backup_location: Optional[BackupLocationType] = None,