Skip to content

Commit 2021bf1

Browse files
akxstkao05
andcommitted
Improve extract performance via ignoring directories early during os.walk
Co-authored-by: Steven Kao <st.kao.05@gmail.com>
1 parent 0c4f378 commit 2021bf1

File tree

2 files changed

+43
-7
lines changed

2 files changed

+43
-7
lines changed

babel/messages/extract.py

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import os
2424
import sys
2525
import tokenize
26+
import warnings
2627
from collections.abc import (
2728
Callable,
2829
Collection,
@@ -114,7 +115,35 @@ def _strip(line: str):
114115
comments[:] = [_strip(c) for c in comments]
115116

116117

117-
def default_directory_filter(dirpath: str | os.PathLike[str]) -> bool:
118+
def _make_default_directory_filter(
119+
method_map: Iterable[tuple[str, str]],
120+
root_dir: str | os.PathLike[str],
121+
):
122+
method_map = tuple(method_map)
123+
124+
def directory_filter(dirpath: str | os.PathLike[str]) -> bool:
125+
subdir = os.path.basename(dirpath)
126+
# Legacy default behavior: ignore dot and underscore directories
127+
if subdir.startswith('.') or subdir.startswith('_'):
128+
return False
129+
130+
dir_rel = os.path.relpath(dirpath, root_dir).replace(os.sep, '/')
131+
132+
for pattern, method in method_map:
133+
if method == "ignore" and pathmatch(pattern, dir_rel):
134+
return False
135+
136+
return True
137+
138+
return directory_filter
139+
140+
141+
def default_directory_filter(dirpath: str | os.PathLike[str]) -> bool: # pragma: no cover
142+
warnings.warn(
143+
"`default_directory_filter` is deprecated and will be removed in a future version of Babel.",
144+
DeprecationWarning,
145+
stacklevel=2,
146+
)
118147
subdir = os.path.basename(dirpath)
119148
# Legacy default behavior: ignore dot and underscore directories
120149
return not (subdir.startswith('.') or subdir.startswith('_'))
@@ -201,13 +230,19 @@ def extract_from_dir(
201230
"""
202231
if dirname is None:
203232
dirname = os.getcwd()
233+
204234
if options_map is None:
205235
options_map = {}
236+
237+
dirname = os.path.abspath(dirname)
238+
206239
if directory_filter is None:
207-
directory_filter = default_directory_filter
240+
directory_filter = _make_default_directory_filter(
241+
method_map=method_map,
242+
root_dir=dirname,
243+
)
208244

209-
absname = os.path.abspath(dirname)
210-
for root, dirnames, filenames in os.walk(absname):
245+
for root, dirnames, filenames in os.walk(dirname):
211246
dirnames[:] = [
212247
subdir for subdir in dirnames if directory_filter(os.path.join(root, subdir))
213248
]
@@ -224,7 +259,7 @@ def extract_from_dir(
224259
keywords,
225260
comment_tags,
226261
strip_comment_tags,
227-
dirpath=absname,
262+
dirpath=dirname,
228263
)
229264

230265

tests/messages/frontend/test_extract.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,10 +202,11 @@ def test_extraction_with_mapping_file(extract_cmd, pot_file):
202202

203203

204204
@freeze_time("1994-11-11")
205-
def test_extraction_with_mapping_dict(extract_cmd, pot_file):
205+
@pytest.mark.parametrize("ignore_pattern", ['**/ignored/**.*', 'ignored'])
206+
def test_extraction_with_mapping_dict(extract_cmd, pot_file, ignore_pattern):
206207
extract_cmd.distribution.message_extractors = {
207208
'project': [
208-
('**/ignored/**.*', 'ignore', None),
209+
(ignore_pattern, 'ignore', None),
209210
('**.py', 'python', None),
210211
],
211212
}

0 commit comments

Comments
 (0)