From a634e20f2f7a913c4a630c1ef2e7911fe41a83b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vojt=C4=9Bch=20Bo=C4=8Dek?= Date: Thu, 15 Jan 2026 13:49:53 +0100 Subject: [PATCH] Add min_candidate_length parameter to PhoneNumberMatcher --- python/phonenumbers/phonenumbermatcher.py | 23 ++++++++++++++++------ python/phonenumbers/phonenumbermatcher.pyi | 3 ++- python/tests/phonenumbermatchertest.py | 22 +++++++++++++++++++++ 3 files changed, 41 insertions(+), 7 deletions(-) diff --git a/python/phonenumbers/phonenumbermatcher.py b/python/phonenumbers/phonenumbermatcher.py index 237e69535..34522adc1 100644 --- a/python/phonenumbers/phonenumbermatcher.py +++ b/python/phonenumbers/phonenumbermatcher.py @@ -456,7 +456,8 @@ class PhoneNumberMatcher(object): _DONE = 2 def __init__(self, text, region, - leniency=Leniency.VALID, max_tries=65535): + leniency=Leniency.VALID, max_tries=65535, + min_candidate_length=1): """Creates a new instance. Arguments: @@ -471,6 +472,9 @@ def __init__(self, text, region, max_tries -- The maximum number of invalid numbers to try before giving up on the text. This is to cover degenerate cases where the text has a lot of false positives in it. Must be >= 0. + min_candidate_length -- The minimum length of a candidate phone number. + Can be used to quickly skip candidates that are too short to be valid, + depending on your use-case needs. """ if leniency is None: raise ValueError("Need a leniency value") @@ -487,6 +491,8 @@ def __init__(self, text, region, self.leniency = leniency # The maximum number of retries after matching an invalid number. self._max_tries = int(max_tries) + # The minimum length of a candidate phone number. + self._min_candidate_length = int(min_candidate_length) # The iteration tristate. self._state = PhoneNumberMatcher._NOT_READY # The last successful match, None unless in state _READY @@ -513,13 +519,18 @@ def _find(self, index): # 123 45 67 / 68). candidate = self._trim_after_first_match(_SECOND_NUMBER_START_PATTERN, candidate) + candidate_len = len(candidate) + + # UPSTREAM DIVERGENCE: The min_candidate_length is Python-specific + # feature, not present in the upstream Java version. + if candidate_len >= self._min_candidate_length: + match = self._extract_match(candidate, start) + if match is not None: + return match + self._max_tries -= 1 - match = self._extract_match(candidate, start) - if match is not None: - return match # Move along - index = start + len(candidate) - self._max_tries -= 1 + index = start + candidate_len match = _PATTERN.search(self.text, index) return None diff --git a/python/phonenumbers/phonenumbermatcher.pyi b/python/phonenumbers/phonenumbermatcher.pyi index a50fada18..2b8315b59 100644 --- a/python/phonenumbers/phonenumbermatcher.pyi +++ b/python/phonenumbers/phonenumbermatcher.pyi @@ -56,10 +56,11 @@ class PhoneNumberMatcher: preferred_region: str | None leniency: int _max_tries: int + _min_candidate_length: int _state: int _last_match: PhoneNumberMatch | None _search_index: int - def __init__(self, text: str | None, region: str | None, leniency: int = ..., max_tries: int = ...) -> None: ... + def __init__(self, text: str | None, region: str | None, leniency: int = ..., max_tries: int = ..., min_candidate_length: int = ...) -> None: ... def _find(self, index: int) -> PhoneNumberMatch | None: ... def _trim_after_first_match(self, pattern: Pattern[str], candidate: str) -> str: ... @classmethod diff --git a/python/tests/phonenumbermatchertest.py b/python/tests/phonenumbermatchertest.py index a295bb39c..90496851c 100644 --- a/python/tests/phonenumbermatchertest.py +++ b/python/tests/phonenumbermatchertest.py @@ -988,3 +988,25 @@ def testInternals(self): num_format = NumberFormat(pattern="(\\d{3})(\\d{3})(\\d{4})", format="\\1-\\2-\\3") self.assertEqual(["650", "253", "0000"], _get_national_number_groups(us_number, num_format)) + + def testMinCandidateLengthFiltersShortNumbers(self): + # Python-specific test: min_candidate_length parameter + text = "Call +1800-123-4567 or 415-666-7777 for help" + # With min_candidate_length=13, the short candidate should be skipped + matcher = PhoneNumberMatcher(text, "US", Leniency.POSSIBLE, 65535, min_candidate_length=13) + match = matcher.next() if matcher.has_next() else None + self.assertIsNotNone(match) + self.assertEqual("+1800-123-4567", match.raw_string) + # Should be no more matches + self.assertFalse(matcher.has_next()) + + def testMinCandidateLengthDoesNotConsumeMaxTries(self): + # Python-specific test: skipped short candidates don't consume max_tries + # Text with 5 short candidates followed by one valid number + text = "Try 123, 456, 789, 012, 345, then call 415-666-7777" + # With max_tries=1, if short candidates consumed tries, we'd fail to find the valid number + # But with min_candidate_length=10, short candidates are skipped without consuming tries + matcher = PhoneNumberMatcher(text, "US", Leniency.VALID, max_tries=1, min_candidate_length=10) + match = matcher.next() if matcher.has_next() else None + self.assertIsNotNone(match) + self.assertEqual("415-666-7777", match.raw_string)