Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 48 additions & 18 deletions geonode/base/i18n.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import logging
import threading
import time

from django.db import connection
from django.utils.translation import get_language, gettext as _
Expand Down Expand Up @@ -67,42 +69,63 @@ def get_localized_label(lang, about):
class I18nCacheEntry:
def __init__(self):
# the date field of the thesaurus when it was last loaded, it's used for the expiration check
self.date: str | None = None
self.date: str = "init"
self.caches: dict = {} # the caches for this language

def __str__(self):
return f"I18nCacheEntry {self.date} [{list(self.caches.keys())}]"


class I18nCache:
"""
Caches language related data.
Synch is performed via date field in the "labels-i18n" thesaurus.
"""

CHECK_INTERVAL = 5 # seconds

def __init__(self):
# the cache has the lang as key, and I18nCacheEntry as a value:
self.lang_cache = {}
self._last_check = 0
self._lock = threading.Lock()

def get_entry(self, lang, data_key):
"""
returns date:str, data
date is needed for checking the entry freshness when setting info
data may be None if not cached or expired
"""
cached_entry: I18nCacheEntry = self.lang_cache.get(lang, None)

# TODO: thesaurus date check should be done only after a given time interval from last check
thesaurus_date = ( # may be none if thesaurus does not exist
Thesaurus.objects.filter(identifier=I18N_THESAURUS_IDENTIFIER).values_list("date", flat=True).first()
)
if cached_entry:
if thesaurus_date == cached_entry.date:
# only return cached data if thesaurus has not been modified
return thesaurus_date, cached_entry.caches.get(data_key, None)
else:
logger.info(f"Schema for {lang}:{data_key} needs to be recreated")

return thesaurus_date, None

def set(self, lang: str, data_key: str, data: dict, request_date: str):
with self._lock:
cached_entry: I18nCacheEntry = self.lang_cache.get(lang, None)

time_now = time.time()
needs_check = time_now - self._last_check > I18nCache.CHECK_INTERVAL

# if not needs_check:
# logger.debug(f"No cache check needed {lang}:{data_key} @ {cached_entry}")
# else:
# logger.debug(f"Cache check needed {lang}:{data_key} @ {cached_entry}")

if needs_check or not cached_entry:
self._last_check = time_now
thesaurus_date = ( # may be none if thesaurus does not exist
Thesaurus.objects.filter(identifier=I18N_THESAURUS_IDENTIFIER)
.values_list("date", flat=True)
.first()
)
if cached_entry and cached_entry.date != thesaurus_date:
logger.info(f"Cache for {lang}:{data_key} needs to be recreated")
return thesaurus_date, None
if not cached_entry:
logger.info(f"Cache for {lang}:{data_key} needs to be created")
return thesaurus_date, None

# logger.debug(f"Returning cached entry for {lang}:{data_key} @ {cached_entry.date}")
return cached_entry.date, cached_entry.caches.get(data_key, None)

def set(self, lang: str, data_key: str, data, request_date: str):
# TODO: check if lang is allowed
cached_entry: I18nCacheEntry = self.lang_cache.setdefault(lang, I18nCacheEntry())

latest_date = (
Expand All @@ -114,15 +137,21 @@ def set(self, lang: str, data_key: str, data: dict, request_date: str):
logger.debug(f"Caching lang:{lang} key:{data_key} date:{request_date}")
cached_entry.date = latest_date
cached_entry.caches[data_key] = data
return True
else:
logger.warning(
f"Cache will not be updated for lang:{lang} key:{data_key} reqdate:{request_date} latest:{latest_date}"
)
return False

def clear(self):
logger.info("Clearing schema cache")
logger.info("Clearing i18n cache")
self.lang_cache.clear()

def force_check(self):
"""For testing: forces a check against the DB on the next get_entry call."""
self._last_check = 0


class LabelResolver:
CACHE_KEY_LABELS = "labels"
Expand All @@ -139,6 +168,7 @@ def gettext(self, key, lang=None, fallback=True):
def get_labels(self, lang):
date, labels = i18nCache.get_entry(lang, self.CACHE_KEY_LABELS)
if labels is None:
logger.debug("LabelResolver: loading I18N labels")
labels = self._create_labels_cache(lang)
i18nCache.set(lang, self.CACHE_KEY_LABELS, labels, date)
return labels
Expand Down
34 changes: 28 additions & 6 deletions geonode/base/signals.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
from datetime import datetime
from datetime import datetime, timedelta

from django.db.models.signals import post_save

Expand All @@ -10,11 +10,11 @@


def connect_signals():
logger.debug("Setting up signal connections...")
logger.debug("Connecting thesaurus signals...")
post_save.connect(thesaurus_changed, sender=Thesaurus, weak=False, dispatch_uid="metadata_reset_t")
post_save.connect(thesaurusk_changed, sender=ThesaurusKeyword, weak=False, dispatch_uid="metadata_reset_tk")
post_save.connect(thesauruskl_changed, sender=ThesaurusKeywordLabel, weak=False, dispatch_uid="metadata_reset_tkl")
logger.debug("Signal connections set")
logger.debug("Thesaurus signals connected")


def thesaurus_changed(sender, instance, **kwargs):
Expand All @@ -40,9 +40,31 @@ def thesauruskl_changed(sender, instance, **kwargs):


def _update_thesaurus_date():
logger.debug("Updating label thesaurus date")
# update timestamp to invalidate other processes also
def _resolve_new_date(old, new):
# date may be the same, especially in tests
try:
new_parsed = datetime.fromisoformat(new)
old_parsed = datetime.fromisoformat(old)
except (ValueError, TypeError):
return new

if old == new:
date_ret = old_parsed + timedelta(0, 1)
return date_ret.isoformat()
elif old_parsed > new_parsed:
# we may have already added 1 to the db date
date_ret = old_parsed + timedelta(0, 1)
return date_ret.isoformat()
else:
return new

i18n_thesaurus = Thesaurus.objects.get(identifier=I18N_THESAURUS_IDENTIFIER)
i18n_thesaurus.date = datetime.now().replace(microsecond=0).isoformat()

now_date = datetime.now().replace(microsecond=0).isoformat()
resolved_date = _resolve_new_date(i18n_thesaurus.date, now_date)
logger.debug(f"Updating {I18N_THESAURUS_IDENTIFIER} thesaurus date {resolved_date}")

# update timestamp to invalidate other processes also
i18n_thesaurus.date = resolved_date
i18n_thesaurus._signal_handled = True
i18n_thesaurus.save()
10 changes: 6 additions & 4 deletions geonode/metadata/tests/test_i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
#########################################################################

import time
import logging

from geonode.tests.base import GeoNodeBaseTestSupport

Expand All @@ -31,6 +30,8 @@
Thesaurus,
)

logger = logging.getLogger(__name__)


class MetadataI18NTests(GeoNodeBaseTestSupport):

Expand All @@ -48,13 +49,14 @@ def setUp(self):
self.tid = Thesaurus.objects.create(title="Spatial scope thesaurus", identifier=I18N_THESAURUS_IDENTIFIER).id

def _add_label(self, about, lang, label):
logger.debug(f"ADDING LABEL {lang}:{label}")
tk, created = ThesaurusKeyword.objects.get_or_create(
about=about, thesaurus_id=self.tid, defaults={"alt_label": f"alt_{about}"}
)
if lang and label:
ThesaurusKeywordLabel.objects.create(keyword=tk, label=label, lang=lang)
# this is needed to invalidate i18ncache
Thesaurus.objects.filter(pk=self.tid).update(date=str(time.time_ns()))
# this is needed to bypass invalidation optimization
i18nCache.force_check()

def tearDown(self):
super().tearDown()
Expand Down
Loading