diff --git a/geonode/base/i18n.py b/geonode/base/i18n.py index b5563bd0c16..638af0e60e6 100644 --- a/geonode/base/i18n.py +++ b/geonode/base/i18n.py @@ -1,4 +1,6 @@ import logging +import threading +import time from django.db import connection from django.utils.translation import get_language, gettext as _ @@ -67,9 +69,12 @@ def get_localized_label(lang, about): class I18nCacheEntry: def __init__(self): # the date field of the thesaurus when it was last loaded, it's used for the expiration check - self.date: str | None = None + self.date: str = "init" self.caches: dict = {} # the caches for this language + def __str__(self): + return f"I18nCacheEntry {self.date} [{list(self.caches.keys())}]" + class I18nCache: """ @@ -77,9 +82,13 @@ class I18nCache: Synch is performed via date field in the "labels-i18n" thesaurus. """ + CHECK_INTERVAL = 5 # seconds + def __init__(self): # the cache has the lang as key, and I18nCacheEntry as a value: self.lang_cache = {} + self._last_check = 0 + self._lock = threading.Lock() def get_entry(self, lang, data_key): """ @@ -87,22 +96,36 @@ def get_entry(self, lang, data_key): date is needed for checking the entry freshness when setting info data may be None if not cached or expired """ - cached_entry: I18nCacheEntry = self.lang_cache.get(lang, None) - - # TODO: thesaurus date check should be done only after a given time interval from last check - thesaurus_date = ( # may be none if thesaurus does not exist - Thesaurus.objects.filter(identifier=I18N_THESAURUS_IDENTIFIER).values_list("date", flat=True).first() - ) - if cached_entry: - if thesaurus_date == cached_entry.date: - # only return cached data if thesaurus has not been modified - return thesaurus_date, cached_entry.caches.get(data_key, None) - else: - logger.info(f"Schema for {lang}:{data_key} needs to be recreated") - - return thesaurus_date, None - - def set(self, lang: str, data_key: str, data: dict, request_date: str): + with self._lock: + cached_entry: I18nCacheEntry = self.lang_cache.get(lang, None) + + time_now = time.time() + needs_check = time_now - self._last_check > I18nCache.CHECK_INTERVAL + + # if not needs_check: + # logger.debug(f"No cache check needed {lang}:{data_key} @ {cached_entry}") + # else: + # logger.debug(f"Cache check needed {lang}:{data_key} @ {cached_entry}") + + if needs_check or not cached_entry: + self._last_check = time_now + thesaurus_date = ( # may be none if thesaurus does not exist + Thesaurus.objects.filter(identifier=I18N_THESAURUS_IDENTIFIER) + .values_list("date", flat=True) + .first() + ) + if cached_entry and cached_entry.date != thesaurus_date: + logger.info(f"Cache for {lang}:{data_key} needs to be recreated") + return thesaurus_date, None + if not cached_entry: + logger.info(f"Cache for {lang}:{data_key} needs to be created") + return thesaurus_date, None + + # logger.debug(f"Returning cached entry for {lang}:{data_key} @ {cached_entry.date}") + return cached_entry.date, cached_entry.caches.get(data_key, None) + + def set(self, lang: str, data_key: str, data, request_date: str): + # TODO: check if lang is allowed cached_entry: I18nCacheEntry = self.lang_cache.setdefault(lang, I18nCacheEntry()) latest_date = ( @@ -114,15 +137,21 @@ def set(self, lang: str, data_key: str, data: dict, request_date: str): logger.debug(f"Caching lang:{lang} key:{data_key} date:{request_date}") cached_entry.date = latest_date cached_entry.caches[data_key] = data + return True else: logger.warning( f"Cache will not be updated for lang:{lang} key:{data_key} reqdate:{request_date} latest:{latest_date}" ) + return False def clear(self): - logger.info("Clearing schema cache") + logger.info("Clearing i18n cache") self.lang_cache.clear() + def force_check(self): + """For testing: forces a check against the DB on the next get_entry call.""" + self._last_check = 0 + class LabelResolver: CACHE_KEY_LABELS = "labels" @@ -139,6 +168,7 @@ def gettext(self, key, lang=None, fallback=True): def get_labels(self, lang): date, labels = i18nCache.get_entry(lang, self.CACHE_KEY_LABELS) if labels is None: + logger.debug("LabelResolver: loading I18N labels") labels = self._create_labels_cache(lang) i18nCache.set(lang, self.CACHE_KEY_LABELS, labels, date) return labels diff --git a/geonode/base/signals.py b/geonode/base/signals.py index cb3432c1f93..234fe1331d3 100644 --- a/geonode/base/signals.py +++ b/geonode/base/signals.py @@ -1,5 +1,5 @@ import logging -from datetime import datetime +from datetime import datetime, timedelta from django.db.models.signals import post_save @@ -10,11 +10,11 @@ def connect_signals(): - logger.debug("Setting up signal connections...") + logger.debug("Connecting thesaurus signals...") post_save.connect(thesaurus_changed, sender=Thesaurus, weak=False, dispatch_uid="metadata_reset_t") post_save.connect(thesaurusk_changed, sender=ThesaurusKeyword, weak=False, dispatch_uid="metadata_reset_tk") post_save.connect(thesauruskl_changed, sender=ThesaurusKeywordLabel, weak=False, dispatch_uid="metadata_reset_tkl") - logger.debug("Signal connections set") + logger.debug("Thesaurus signals connected") def thesaurus_changed(sender, instance, **kwargs): @@ -40,9 +40,31 @@ def thesauruskl_changed(sender, instance, **kwargs): def _update_thesaurus_date(): - logger.debug("Updating label thesaurus date") - # update timestamp to invalidate other processes also + def _resolve_new_date(old, new): + # date may be the same, especially in tests + try: + new_parsed = datetime.fromisoformat(new) + old_parsed = datetime.fromisoformat(old) + except (ValueError, TypeError): + return new + + if old == new: + date_ret = old_parsed + timedelta(0, 1) + return date_ret.isoformat() + elif old_parsed > new_parsed: + # we may have already added 1 to the db date + date_ret = old_parsed + timedelta(0, 1) + return date_ret.isoformat() + else: + return new + i18n_thesaurus = Thesaurus.objects.get(identifier=I18N_THESAURUS_IDENTIFIER) - i18n_thesaurus.date = datetime.now().replace(microsecond=0).isoformat() + + now_date = datetime.now().replace(microsecond=0).isoformat() + resolved_date = _resolve_new_date(i18n_thesaurus.date, now_date) + logger.debug(f"Updating {I18N_THESAURUS_IDENTIFIER} thesaurus date {resolved_date}") + + # update timestamp to invalidate other processes also + i18n_thesaurus.date = resolved_date i18n_thesaurus._signal_handled = True i18n_thesaurus.save() diff --git a/geonode/metadata/tests/test_i18n.py b/geonode/metadata/tests/test_i18n.py index 9b3b7cc8284..bbceb30c591 100644 --- a/geonode/metadata/tests/test_i18n.py +++ b/geonode/metadata/tests/test_i18n.py @@ -16,8 +16,7 @@ # along with this program. If not, see . # ######################################################################### - -import time +import logging from geonode.tests.base import GeoNodeBaseTestSupport @@ -31,6 +30,8 @@ Thesaurus, ) +logger = logging.getLogger(__name__) + class MetadataI18NTests(GeoNodeBaseTestSupport): @@ -48,13 +49,14 @@ def setUp(self): self.tid = Thesaurus.objects.create(title="Spatial scope thesaurus", identifier=I18N_THESAURUS_IDENTIFIER).id def _add_label(self, about, lang, label): + logger.debug(f"ADDING LABEL {lang}:{label}") tk, created = ThesaurusKeyword.objects.get_or_create( about=about, thesaurus_id=self.tid, defaults={"alt_label": f"alt_{about}"} ) if lang and label: ThesaurusKeywordLabel.objects.create(keyword=tk, label=label, lang=lang) - # this is needed to invalidate i18ncache - Thesaurus.objects.filter(pk=self.tid).update(date=str(time.time_ns())) + # this is needed to bypass invalidation optimization + i18nCache.force_check() def tearDown(self): super().tearDown()