From 51445c69c0191719f56a90ed8b19ab78dd626305 Mon Sep 17 00:00:00 2001 From: Brian Sam-Bodden Date: Mon, 16 Feb 2026 17:23:13 -0700 Subject: [PATCH] fix: avoid NLTK download race condition in parallel test workers Try loading stopwords before downloading to prevent concurrent pytest-xdist workers from racing on nltk.download(), which raises FileExistsError when multiple processes write to the same path. --- redisvl/query/query.py | 10 ++++++++-- redisvl/utils/full_text_query_helper.py | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/redisvl/query/query.py b/redisvl/query/query.py index 7d4cbe88..e96ffd78 100644 --- a/redisvl/query/query.py +++ b/redisvl/query/query.py @@ -1460,8 +1460,14 @@ def _set_stopwords(self, stopwords: Optional[Union[str, Set[str]]] = "english"): self._stopwords = set() elif isinstance(stopwords, str): try: - nltk.download("stopwords", quiet=True) - self._stopwords = set(nltk_stopwords.words(stopwords)) + # Try loading first; only download if not already present. + # This avoids race conditions when parallel workers (e.g. + # pytest-xdist) call nltk.download() concurrently. + try: + self._stopwords = set(nltk_stopwords.words(stopwords)) + except LookupError: + nltk.download("stopwords", quiet=True) + self._stopwords = set(nltk_stopwords.words(stopwords)) except ImportError: raise ValueError( f"Loading stopwords for {stopwords} failed: nltk is not installed." diff --git a/redisvl/utils/full_text_query_helper.py b/redisvl/utils/full_text_query_helper.py index ee5b8cb7..cdb6877c 100644 --- a/redisvl/utils/full_text_query_helper.py +++ b/redisvl/utils/full_text_query_helper.py @@ -93,8 +93,14 @@ def _get_stopwords( return set() elif isinstance(stopwords, str): try: - nltk.download("stopwords", quiet=True) - return set(nltk_stopwords.words(stopwords)) + # Try loading first; only download if not already present. + # This avoids race conditions when parallel workers (e.g. + # pytest-xdist) call nltk.download() concurrently. + try: + return set(nltk_stopwords.words(stopwords)) + except LookupError: + nltk.download("stopwords", quiet=True) + return set(nltk_stopwords.words(stopwords)) except ImportError: raise ValueError( f"Loading stopwords for {stopwords} failed: nltk is not installed."