From b5b9c4438056c69612e2ae96dc040dceb963ae99 Mon Sep 17 00:00:00 2001 From: Patrick Ziegler Date: Thu, 12 Dec 2024 20:42:46 +0100 Subject: [PATCH 1/2] Move TextMatcher from Platform UI to Platform Core Resources The TextMatcher class is used in the UI component, despite not depending on any UI classes. By moving it to the platform, it can be used in both mixed and pure E4 applications. --- .../core/internal/utils/TextMatcher.java | 169 ++++++++++++++++++ .../tests/internal/utils/AllUtilsTests.java | 3 +- .../tests/internal/utils/TextMatcherTest.java | 108 +++++++++++ 3 files changed, 279 insertions(+), 1 deletion(-) create mode 100644 resources/bundles/org.eclipse.core.resources/src/org/eclipse/core/internal/utils/TextMatcher.java create mode 100644 resources/tests/org.eclipse.core.tests.resources/src/org/eclipse/core/tests/internal/utils/TextMatcherTest.java diff --git a/resources/bundles/org.eclipse.core.resources/src/org/eclipse/core/internal/utils/TextMatcher.java b/resources/bundles/org.eclipse.core.resources/src/org/eclipse/core/internal/utils/TextMatcher.java new file mode 100644 index 00000000000..4fec70a8ebb --- /dev/null +++ b/resources/bundles/org.eclipse.core.resources/src/org/eclipse/core/internal/utils/TextMatcher.java @@ -0,0 +1,169 @@ +/******************************************************************************* + * Copyright (c) 2020 Thomas Wolf and others. + * + * This program and the accompanying materials + * are made available under the terms of the Eclipse Public License 2.0 + * which accompanies this distribution, and is available at + * https://www.eclipse.org/legal/epl-2.0/ + * + * SPDX-License-Identifier: EPL-2.0 + *******************************************************************************/ +package org.eclipse.core.internal.utils; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import java.util.regex.Pattern; +import org.eclipse.core.text.StringMatcher; + +/** + * Similar to {@link StringMatcher}, this {@code TextMatcher} matches a pattern + * that may contain the wildcards '?' or '*' against a text. However, the + * matching is not only done on the full text, but also on individual words from + * the text, and if the pattern contains whitespace, the pattern is split into + * sub-patterns and those are matched, too. + *

+ * The precise rules are: + *

+ * + *

+ * An empty pattern matches only the empty text. + *

+ */ +public final class TextMatcher { + + private static final Pattern NON_WORD = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS); //$NON-NLS-1$ + + private final StringMatcher full; + + private final List parts; + + /** + * Creates a new {@link TextMatcher}. + * + * @param pattern to match + * @param ignoreCase whether to do case-insensitive matching + * @param ignoreWildCards whether to treat '?' and '*' as normal characters, not + * as wildcards + * @throws IllegalArgumentException if {@code pattern == null} + */ + public TextMatcher(String pattern, boolean ignoreCase, boolean ignoreWildCards) { + full = new StringMatcher(pattern.trim(), ignoreCase, ignoreWildCards); + parts = splitPattern(pattern, ignoreCase, ignoreWildCards); + } + + private List splitPattern(String pattern, + boolean ignoreCase, boolean ignoreWildCards) { + String pat = pattern.trim(); + if (pat.isEmpty()) { + return Collections.emptyList(); + } + String[] subPatterns = pat.split("\\s+"); //$NON-NLS-1$ + if (subPatterns.length <= 1) { + return Collections.emptyList(); + } + List matchers = new ArrayList<>(); + for (String s : subPatterns) { + if (s == null || s.isEmpty()) { + continue; + } + StringMatcher m = new StringMatcher(s, ignoreCase, ignoreWildCards); + m.usePrefixMatch(); + matchers.add(m); + } + return matchers; + } + + /** + * Determines whether the given {@code text} matches the pattern. + * + * @param text String to match; must not be {@code null} + * @return {@code true} if the whole {@code text} matches the pattern; + * {@code false} otherwise + * @throws IllegalArgumentException if {@code text == null} + */ + public boolean match(String text) { + if (text == null) { + throw new IllegalArgumentException(); + } + return match(text, 0, text.length()); + } + + /** + * Determines whether the given sub-string of {@code text} from {@code start} + * (inclusive) to {@code end} (exclusive) matches the pattern. + * + * @param text String to match in; must not be {@code null} + * @param start start index (inclusive) within {@code text} of the sub-string to + * match + * @param end end index (exclusive) within {@code text} of the sub-string to + * match + * @return {@code true} if the given slice of {@code text} matches the pattern; + * {@code false} otherwise + * @throws IllegalArgumentException if {@code text == null} + */ + public boolean match(String text, int start, int end) { + if (text == null) { + throw new IllegalArgumentException(); + } + if (start > end) { + return false; + } + int tlen = text.length(); + start = Math.max(0, start); + end = Math.min(end, tlen); + if (full.match(text, start, end)) { + return true; + } + String[] words = getWords(text.substring(start, end)); + if (match(full, words)) { + return true; + } + if (parts.isEmpty()) { + return false; + } + for (StringMatcher subMatcher : parts) { + if (!subMatcher.match(text, start, end) && !match(subMatcher, words)) { + return false; + } + } + return true; + } + + private boolean match(StringMatcher matcher, String[] words) { + return Arrays.stream(words).filter(Objects::nonNull).anyMatch(matcher::match); + } + + /** + * Splits a given text into words. + * + * @param text to split + * @return the words of the text + */ + public static String[] getWords(String text) { + // Previous implementations (in the removed StringMatcher) used the ICU + // BreakIterator to split the text. That worked well, but in 2020 it was decided + // to drop the dependency to the ICU library due to its size. The JDK + // BreakIterator splits differently, causing e.g. + // https://bugs.eclipse.org/bugs/show_bug.cgi?id=563121 . The NON_WORD regexp + // appears to work well for programming language text, but may give sub-optimal + // results for natural languages. See also + // https://bugs.eclipse.org/bugs/show_bug.cgi?id=90579 . + return NON_WORD.split(text); + } + + @Override + public String toString() { + return '[' + full.toString() + ',' + parts + ']'; + } +} diff --git a/resources/tests/org.eclipse.core.tests.resources/src/org/eclipse/core/tests/internal/utils/AllUtilsTests.java b/resources/tests/org.eclipse.core.tests.resources/src/org/eclipse/core/tests/internal/utils/AllUtilsTests.java index 795abffb352..713007595d6 100644 --- a/resources/tests/org.eclipse.core.tests.resources/src/org/eclipse/core/tests/internal/utils/AllUtilsTests.java +++ b/resources/tests/org.eclipse.core.tests.resources/src/org/eclipse/core/tests/internal/utils/AllUtilsTests.java @@ -20,6 +20,7 @@ @Suite @SelectClasses({ // ObjectMapTest.class, // - FileUtilTest.class, }) + FileUtilTest.class, // + TextMatcherTest.class }) public class AllUtilsTests { } diff --git a/resources/tests/org.eclipse.core.tests.resources/src/org/eclipse/core/tests/internal/utils/TextMatcherTest.java b/resources/tests/org.eclipse.core.tests.resources/src/org/eclipse/core/tests/internal/utils/TextMatcherTest.java new file mode 100644 index 00000000000..cf67185e9af --- /dev/null +++ b/resources/tests/org.eclipse.core.tests.resources/src/org/eclipse/core/tests/internal/utils/TextMatcherTest.java @@ -0,0 +1,108 @@ +/******************************************************************************* + * Copyright (c) 2020 Thomas Wolf and others. + * + * This program and the accompanying materials + * are made available under the terms of the Eclipse Public License 2.0 + * which accompanies this distribution, and is available at + * https://www.eclipse.org/legal/epl-2.0/ + * + * SPDX-License-Identifier: EPL-2.0 + *******************************************************************************/ +package org.eclipse.core.tests.internal.utils; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import org.eclipse.core.internal.utils.TextMatcher; +import org.junit.Test; + +/** + * Tests for {@link TextMatcher}. + */ +public class TextMatcherTest { + + @Test + public void testEmpty() { + assertTrue(new TextMatcher("", false, false).match("")); + assertFalse(new TextMatcher("", false, false).match("foo")); + assertFalse(new TextMatcher("", false, false).match("foo bar baz")); + assertTrue(new TextMatcher("", false, true).match("")); + assertFalse(new TextMatcher("", false, true).match("foo")); + assertFalse(new TextMatcher("", false, true).match("foo bar baz")); + } + + @Test + public void testSuffixes() { + assertFalse(new TextMatcher("fo*ar", false, false).match("foobar_123")); + assertFalse(new TextMatcher("fo*ar", false, false).match("foobar_baz")); + } + + @Test + public void testChinese() { + assertTrue(new TextMatcher("喜欢", false, false).match("我 喜欢 吃 苹果。")); + // This test would work only if word-splitting used the ICU BreakIterator. + // "Words" are as shown above. + // assertTrue(new TextMatcher("喜欢", false, false).match("我喜欢吃苹果。")); + } + + @Test + public void testSingleWords() { + assertTrue(new TextMatcher("huhn", false, false).match("hahn henne hühner küken huhn")); + assertTrue(new TextMatcher("h?hner", false, false).match("hahn henne hühner küken huhn")); + assertTrue(new TextMatcher("h*hner", false, false).match("hahn henne hühner küken huhn")); + assertTrue(new TextMatcher("hühner", false, false).match("hahn henne hühner küken huhn")); + // Full pattern must match word fully + assertFalse(new TextMatcher("h?hner", false, false).match("hahn henne hühnerhof küken huhn")); + assertFalse(new TextMatcher("h*hner", false, false).match("hahn henne hühnerhof küken huhn")); + assertFalse(new TextMatcher("hühner", false, false).match("hahn henne hühnerhof küken huhn")); + + assertTrue(new TextMatcher("huhn", false, true).match("hahn henne hühner küken huhn")); + assertFalse(new TextMatcher("h?hner", false, true).match("hahn henne hühner küken huhn")); + assertFalse(new TextMatcher("h*hner", false, true).match("hahn henne hühner küken huhn")); + assertTrue(new TextMatcher("hühner", false, true).match("hahn henne hühner küken huhn")); + // Full pattern must match word fully + assertFalse(new TextMatcher("h?hner", false, true).match("hahn henne hühnerhof küken huhn")); + assertFalse(new TextMatcher("h*hner", false, true).match("hahn henne hühnerhof küken huhn")); + assertFalse(new TextMatcher("hühner", false, true).match("hahn henne hühnerhof küken huhn")); + + // Bug 570390: Pattern starting/ending with whitespace should still match + assertTrue(new TextMatcher("hahn ", false, false).match("hahn henne hühnerhof küken huhn")); + assertTrue(new TextMatcher("huhn ", false, false).match("hahn henne hühnerhof küken huhn")); + assertTrue(new TextMatcher(" hahn", false, false).match("hahn henne hühnerhof küken huhn")); + assertTrue(new TextMatcher(" huhn", false, false).match("hahn henne hühnerhof küken huhn")); + } + + @Test + public void testMultipleWords() { + assertTrue(new TextMatcher("huhn h?hner", false, false).match("hahn henne hühner küken huhn")); + assertTrue(new TextMatcher("huhn h?hner", false, false).match("hahn henne hühnerhof küken huhn")); + assertFalse(new TextMatcher("huhn h?hner", false, true).match("hahn henne hühner küken huhn")); + assertFalse(new TextMatcher("huhn h?hner", false, true).match("hahn henne hühnerhof küken huhn")); + assertTrue(new TextMatcher("huhn h*hner", false, false).match("hahn henne hühner küken huhn")); + assertTrue(new TextMatcher("huhn h*hner", false, false).match("hahn henne hühnerhof küken huhn")); + assertFalse(new TextMatcher("huhn h*hner", false, true).match("hahn henne hühner küken huhn")); + assertFalse(new TextMatcher("huhn h*hner", false, true).match("hahn henne hühnerhof küken huhn")); + assertTrue(new TextMatcher("huhn hühner", false, false).match("hahn henne hühner küken huhn")); + assertTrue(new TextMatcher("huhn hühner", false, false).match("hahn henne hühnerhof küken huhn")); + assertTrue(new TextMatcher("huhn hühner", false, true).match("hahn henne hühner küken huhn")); + assertTrue(new TextMatcher("huhn hühner", false, true).match("hahn henne hühnerhof küken huhn")); + + // Bug 570390: Pattern starting/ending with whitespace should still match + assertTrue(new TextMatcher("huhn hahn ", false, false).match("hahn henne hühnerhof küken huhn")); + assertTrue(new TextMatcher("hahn huhn ", false, false).match("hahn henne hühnerhof küken huhn")); + assertTrue(new TextMatcher(" huhn hahn", false, false).match("hahn henne hühnerhof küken huhn")); + assertTrue(new TextMatcher(" hahn huhn", false, false).match("hahn henne hühnerhof küken huhn")); + } + + @Test + public void testCaseInsensitivity() { + assertTrue(new TextMatcher("Huhn HÜHNER", true, false).match("hahn henne hühner küken huhn")); + assertTrue(new TextMatcher("Huhn HÜHNER", true, false).match("hahn henne hühnerhof küken huhn")); + assertTrue(new TextMatcher("Huhn HÜHNER", true, true).match("hahn henne hühner küken huhn")); + assertTrue(new TextMatcher("Huhn HÜHNER", true, true).match("hahn henne hühnerhof küken huhn")); + assertTrue(new TextMatcher("HüHnEr", true, false).match("hahn henne hühner küken huhn")); + assertFalse(new TextMatcher("HüHnEr", true, false).match("hahn henne hühnerhof küken huhn")); + assertTrue(new TextMatcher("HüHnEr", true, true).match("hahn henne hühner küken huhn")); + assertFalse(new TextMatcher("HüHnEr", true, true).match("hahn henne hühnerhof küken huhn")); + } +} From 3cbf9598727d61f7a814a186c76940b284509a43 Mon Sep 17 00:00:00 2001 From: Eclipse Platform Bot Date: Thu, 12 Dec 2024 19:51:15 +0000 Subject: [PATCH 2/2] Version bump(s) for 4.35 stream --- .../tests/org.eclipse.core.tests.resources/META-INF/MANIFEST.MF | 2 +- resources/tests/org.eclipse.core.tests.resources/pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/resources/tests/org.eclipse.core.tests.resources/META-INF/MANIFEST.MF b/resources/tests/org.eclipse.core.tests.resources/META-INF/MANIFEST.MF index cbb54734ffd..9f542ab492a 100644 --- a/resources/tests/org.eclipse.core.tests.resources/META-INF/MANIFEST.MF +++ b/resources/tests/org.eclipse.core.tests.resources/META-INF/MANIFEST.MF @@ -2,7 +2,7 @@ Manifest-Version: 1.0 Bundle-ManifestVersion: 2 Bundle-Name: Eclipse Core Tests Resources Bundle-SymbolicName: org.eclipse.core.tests.resources; singleton:=true -Bundle-Version: 3.11.700.qualifier +Bundle-Version: 3.11.800.qualifier Bundle-Vendor: Eclipse.org Export-Package: org.eclipse.core.tests.filesystem, org.eclipse.core.tests.internal.alias, diff --git a/resources/tests/org.eclipse.core.tests.resources/pom.xml b/resources/tests/org.eclipse.core.tests.resources/pom.xml index 44f9740e18c..d5293064bf1 100644 --- a/resources/tests/org.eclipse.core.tests.resources/pom.xml +++ b/resources/tests/org.eclipse.core.tests.resources/pom.xml @@ -18,7 +18,7 @@ 4.35.0-SNAPSHOT org.eclipse.core.tests.resources - 3.11.700-SNAPSHOT + 3.11.800-SNAPSHOT eclipse-test-plugin