From 35f597b9c76d040114d638ba0a1598f9014e87e9 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Fri, 10 Jan 2025 18:26:40 +0000 Subject: [PATCH 1/3] Test hashing --- tests/test_abstract_model.py | 41 +++++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/tests/test_abstract_model.py b/tests/test_abstract_model.py index f9e820e..a9e67ad 100644 --- a/tests/test_abstract_model.py +++ b/tests/test_abstract_model.py @@ -1,3 +1,4 @@ +import datetime import io import ase @@ -8,7 +9,7 @@ from ase.io import read, write import numpy as np -from abcd.model import AbstractModel +from abcd.model import AbstractModel, Hasher from ase.calculators.lj import LennardJones @@ -238,7 +239,6 @@ def test_write_and_read(store_calc): "hash", "modified", "uploaded", - "hash_structure", # see issue #118 }: assert ( abcd_data[key] == abcd_data_after_read[key] @@ -247,10 +247,45 @@ def test_write_and_read(store_calc): # expected differences - n.b. order of calls above assert abcd_data_after_read["modified"] > abcd_data["modified"] assert abcd_data_after_read["uploaded"] > abcd_data["uploaded"] - assert abcd_data_after_read["hash"] != abcd_data["hash"] # expect results to match within fp precision for key in set(abcd_data.results_keys): assert abcd_data[key] == approx( np.array(abcd_data_after_read[key]) ), f"{key}'s value does not match" + + +@pytest.mark.parametrize( + "data", + [ + 1296, + 3.14, + [1, 2, 3], + (4, 5, 6), + {"a": "value"}, + datetime.datetime.now(datetime.timezone.utc), + b"test", + ], +) +def test_hasher(data): + """Test hash calculated correctly.""" + hasher_1 = Hasher() + + # Test hash updated + init_hash = hasher_1() + hasher_1.update("Test value") + updated_hash = hasher_1() + assert updated_hash != init_hash + + # Test updating hash for different data types + hasher_1.update(data) + assert updated_hash != hasher_1() + + # Test newer hasher reset correctly + hasher_2 = Hasher() + assert hasher_2() == init_hash + + # Test hashes match after same data added + hasher_2.update("Test value") + hasher_2.update(data) + assert hasher_1() == hasher_2() From 3ce421540d2754737095fb934bf44bebbd3095f0 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Fri, 10 Jan 2025 18:26:49 +0000 Subject: [PATCH 2/3] Fix Hasher --- abcd/model.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/abcd/model.py b/abcd/model.py index 4b9dabf..406c77d 100644 --- a/abcd/model.py +++ b/abcd/model.py @@ -12,8 +12,8 @@ class Hasher(object): - def __init__(self, method=md5()): - self.method = method + def __init__(self, method=md5): + self.method = method() def update(self, value): @@ -273,26 +273,25 @@ def pre_save(self): self["username"] = getpass.getuser() if not self.get("uploaded"): - self["uploaded"] = datetime.datetime.utcnow() + self["uploaded"] = datetime.datetime.now(datetime.timezone.utc) - self["modified"] = datetime.datetime.utcnow() + self["modified"] = datetime.datetime.now(datetime.timezone.utc) - m = Hasher() + hasher = Hasher() for key in ("numbers", "positions", "cell", "pbc"): - m.update(self[key]) + hasher.update(self[key]) self.derived_keys.append("hash_structure") - self["hash_structure"] = m() + self["hash_structure"] = hasher() - m = Hasher() for key in self.arrays_keys: - m.update(self[key]) + hasher.update(self[key]) for key in self.info_keys: - m.update(self[key]) + hasher.update(self[key]) self.derived_keys.append("hash") - self["hash"] = m() + self["hash"] = hasher() if __name__ == "__main__": From d5aff783aa7e30d5306a842f6f0ab79a58281a34 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Mon, 17 Feb 2025 13:38:46 +0000 Subject: [PATCH 3/3] Split up hash tests --- tests/test_abstract_model.py | 48 +++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/tests/test_abstract_model.py b/tests/test_abstract_model.py index a9e67ad..6960950 100644 --- a/tests/test_abstract_model.py +++ b/tests/test_abstract_model.py @@ -255,6 +255,15 @@ def test_write_and_read(store_calc): ), f"{key}'s value does not match" +def test_hash_update(): + """Test hash can be updated after initialisation.""" + hasher_1 = Hasher() + + init_hash = hasher_1() + hasher_1.update("Test value") + assert hasher_1() != init_hash + + @pytest.mark.parametrize( "data", [ @@ -267,25 +276,46 @@ def test_write_and_read(store_calc): b"test", ], ) -def test_hasher(data): - """Test hash calculated correctly.""" +def test_hash_data_types(data): + """Test updating hash for different data types.""" hasher_1 = Hasher() - - # Test hash updated - init_hash = hasher_1() hasher_1.update("Test value") updated_hash = hasher_1() - assert updated_hash != init_hash - # Test updating hash for different data types hasher_1.update(data) assert updated_hash != hasher_1() - # Test newer hasher reset correctly + +def test_second_hash_init(): + """Test second hash is initialised correctly.""" + hasher_1 = Hasher() + + init_hash = hasher_1() + hasher_1.update("Test value") + hasher_2 = Hasher() assert hasher_2() == init_hash - # Test hashes match after same data added + +@pytest.mark.parametrize( + "data", + [ + 1296, + 3.14, + [1, 2, 3], + (4, 5, 6), + {"a": "value"}, + datetime.datetime.now(datetime.timezone.utc), + b"test", + ], +) +def test_consistent_hash(data): + """Test two hashers agree with same data.""" + hasher_1 = Hasher() + hasher_1.update("Test value") + hasher_1.update(data) + + hasher_2 = Hasher() hasher_2.update("Test value") hasher_2.update(data) assert hasher_1() == hasher_2()