diff --git a/src/main/java/org/apache/commons/text/diff/Comparator.java b/src/main/java/org/apache/commons/text/diff/Comparator.java new file mode 100755 index 0000000000..0d202d827b --- /dev/null +++ b/src/main/java/org/apache/commons/text/diff/Comparator.java @@ -0,0 +1,335 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.diff; + +import java.util.Arrays; + +/** + *
+ * It is guaranteed that the comparisons will always be done as + * {@code o1.equals(o2)} where {@code o1} belongs to the first + * sequence and {@code o2} belongs to the second sequence. This can + * be important if subclassing is used for some elements in the first + * sequence and the {@code equals} method is specialized. + *
+ *+ * Comparison can be seen from two points of view: either as giving the smallest + * modification allowing to transform the first sequence into the second one, or + * as giving the longest sequence which is a subsequence of both initial + * sequences. The {@code equals} method is used to compare objects, so any + * object can be put into sequences. Modifications include deleting, inserting + * or keeping one object, starting from the beginning of the first sequence. + *
+ *+ * This class implements the comparison algorithm, which is the very efficient + * algorithm from Eugene W. Myers + * + * An O(ND) Difference Algorithm and Its Variations. This algorithm produces + * the shortest possible {@link EditScript edit script} containing all the + * {@link EditCommand commands} needed to transform the first sequence into + * the second one. + * + *
+ * This code has been adapted from Apache Commons Collections 4.0. + *
+ * + * @param+ * Creates a new instance of Comparator. + *
+ *+ * It is guaranteed that the comparisons will always be done as + * {@code o1.equals(o2)} where {@code o1} belongs to the first + * sequence and {@code o2} belongs to the second sequence. This can be + * important if subclassing is used for some elements in the first sequence + * and the {@code equals} method is specialized. + *
+ * + * @param left first sequence to be compared + * @param right second sequence to be compared + */ + public Comparator(final T[] left, final T[] right) { + this.left = Arrays.copyOf(left, left.length); + this.right = Arrays.copyOf(right, right.length); + + final int size = left.length + right.length + 2; + vDown = new int[size]; + vUp = new int[size]; + } + + /** + * Get the {@link EditScript} object. + *+ * It is guaranteed that the objects embedded in the {@link InsertCommand + * insert commands} come from the second sequence and that the objects + * embedded in either the {@link DeleteCommand delete commands} or + * {@link KeepCommand keep commands} come from the first sequence. This can + * be important if subclassing is used for some elements in the first + * sequence and the {@code equals} method is specialized. + *
+ * + * @return The edit script resulting from the comparison of the two + * sequences + */ + public EditScript+ * The snake is found using the MYERS Algorithm (this algorithms has + * also been implemented in the GNU diff program). This algorithm is + * explained in Eugene Myers article: + * + * An O(ND) Difference Algorithm and Its Variations. + *
+ * + * @param start1 the begin of the first sequence to be compared + * @param end1 the end of the first sequence to be compared + * @param start2 the begin of the second sequence to be compared + * @param end2 the end of the second sequence to be compared + * @return The middle snake + */ + private Snake getMiddleSnake(final int start1, final int end1, final int start2, final int end2) { + // Myers Algorithm + // Initialisations + final int m = end1 - start1; + final int n = end2 - start2; + if (m == 0 || n == 0) { + return null; + } + + final int delta = m - n; + final int sum = n + m; + final int offset = (sum % 2 == 0 ? sum : sum + 1) / 2; + vDown[1 + offset] = start1; + vUp[1 + offset] = end1 + 1; + + for (int d = 0; d <= offset; ++d) { + // Down + for (int k = -d; k <= d; k += 2) { + // First step + + final int i = k + offset; + if (k == -d || k != d && vDown[i - 1] < vDown[i + 1]) { + vDown[i] = vDown[i + 1]; + } else { + vDown[i] = vDown[i - 1] + 1; + } + + int x = vDown[i]; + int y = x - start1 + start2 - k; + + while (x < end1 && y < end2 && left[x].equals(right[y])) { + vDown[i] = ++x; + ++y; + } + // Second step + if (delta % 2 != 0 && delta - d <= k && k <= delta + d) { + if (vUp[i - delta] <= vDown[i]) { // NOPMD + return buildSnake(vUp[i - delta], k + start1 - start2, end1, end2); + } + } + } + + // Up + for (int k = delta - d; k <= delta + d; k += 2) { + // First step + final int i = k + offset - delta; + if (k == delta - d + || k != delta + d && vUp[i + 1] <= vUp[i - 1]) { + vUp[i] = vUp[i + 1] - 1; + } else { + vUp[i] = vUp[i - 1]; + } + + int x = vUp[i] - 1; + int y = x - start1 + start2 - k; + while (x >= start1 && y >= start2 + && left[x].equals(right[y])) { + vUp[i] = x--; + y--; + } + // Second step + if (delta % 2 == 0 && -d <= k && k <= d) { + if (vUp[i] <= vDown[i + delta]) { // NOPMD + return buildSnake(vUp[i], k + start1 - start2, end1, end2); + } + } + } + } + + // this should not happen + throw new RuntimeException("Internal Error"); + } + + /** + * Build a snake. + * + * @param start the value of the start of the snake + * @param diag the value of the diagonal of the snake + * @param end1 the value of the end of the first sequence to be compared + * @param end2 the value of the end of the second sequence to be compared + * @return The snake built + */ + private Snake buildSnake(final int start, final int diag, final int end1, final int end2) { + int end = start; + while (end - diag < end2 + && end < end1 + && left[end].equals(right[end - diag])) { + ++end; + } + return new Snake(start, end, diag); + } + + /** + * This class is a simple placeholder to hold the end part of a path + * under construction in a {@link Comparator Comparator}. + */ + private static class Snake { + + /** Start index. */ + private final int start; + + /** End index. */ + private final int end; + + /** Diagonal number. */ + private final int diag; + + /** + * Simple constructor. Creates a new instance of Snake with specified indices. + * + * @param start start index of the snake + * @param end end index of the snake + * @param diag diagonal number + */ + Snake(final int start, final int end, final int diag) { + this.start = start; + this.end = end; + this.diag = diag; + } + + /** + * Get the start index of the snake. + * + * @return start index of the snake + */ + public int getStart() { + return start; + } + + /** + * Get the end index of the snake. + * + * @return end index of the snake + */ + public int getEnd() { + return end; + } + + /** + * Get the diagonal number of the snake. + * + * @return diagonal number of the snake + */ + public int getDiag() { + return diag; + } + } + +} diff --git a/src/main/java/org/apache/commons/text/diff/StringsComparator.java b/src/main/java/org/apache/commons/text/diff/StringsComparator.java old mode 100644 new mode 100755 index b3963e5d5b..a6217ed515 --- a/src/main/java/org/apache/commons/text/diff/StringsComparator.java +++ b/src/main/java/org/apache/commons/text/diff/StringsComparator.java @@ -16,316 +16,28 @@ */ package org.apache.commons.text.diff; +import org.apache.commons.lang3.ArrayUtils; + /** *- * It is guaranteed that the comparisons will always be done as - * {@code o1.equals(o2)} where {@code o1} belongs to the first - * sequence and {@code o2} belongs to the second sequence. This can - * be important if subclassing is used for some elements in the first - * sequence and the {@code equals} method is specialized. - *
- *- * Comparison can be seen from two points of view: either as giving the smallest - * modification allowing to transform the first sequence into the second one, or - * as giving the longest sequence which is a subsequence of both initial - * sequences. The {@code equals} method is used to compare objects, so any - * object can be put into sequences. Modifications include deleting, inserting - * or keeping one object, starting from the beginning of the first sequence. + * This class is a specialization of {@link Comparator class} adapted for sequences of {@code Character}. *
- *- * This class implements the comparison algorithm, which is the very efficient - * algorithm from Eugene W. Myers - * - * An O(ND) Difference Algorithm and Its Variations. This algorithm produces - * the shortest possible {@link EditScript edit script} containing all the - * {@link EditCommand commands} needed to transform the first sequence into - * the second one. * - *
- * This code has been adapted from Apache Commons Collections 4.0. - *
- * - * @see EditScript - * @see EditCommand - * @see CommandVisitor * @since 1.0 */ -public class StringsComparator { - - /** - * First character sequence. - */ - private final String left; - /** - * Second character sequence. - */ - private final String right; - /** - * Temporary array. - */ - private final int[] vDown; - /** - * Temporary array. - */ - private final int[] vUp; +public class StringsComparator extends Comparator* Creates a new instance of StringsComparator. *
- *- * It is guaranteed that the comparisons will always be done as - * {@code o1.equals(o2)} where {@code o1} belongs to the first - * sequence and {@code o2} belongs to the second sequence. This can be - * important if subclassing is used for some elements in the first sequence - * and the {@code equals} method is specialized. - *
* * @param left first character sequence to be compared * @param right second character sequence to be compared */ public StringsComparator(final String left, final String right) { - this.left = left; - this.right = right; - - final int size = left.length() + right.length() + 2; - vDown = new int[size]; - vUp = new int[size]; - } - - /** - * Get the {@link EditScript} object. - *- * It is guaranteed that the objects embedded in the {@link InsertCommand - * insert commands} come from the second sequence and that the objects - * embedded in either the {@link DeleteCommand delete commands} or - * {@link KeepCommand keep commands} come from the first sequence. This can - * be important if subclassing is used for some elements in the first - * sequence and the {@code equals} method is specialized. - *
- * - * @return The edit script resulting from the comparison of the two - * sequences - */ - public EditScript- * The snake is found using the MYERS Algorithm (this algorithms has - * also been implemented in the GNU diff program). This algorithm is - * explained in Eugene Myers article: - * - * An O(ND) Difference Algorithm and Its Variations. - *
- * - * @param start1 the begin of the first sequence to be compared - * @param end1 the end of the first sequence to be compared - * @param start2 the begin of the second sequence to be compared - * @param end2 the end of the second sequence to be compared - * @return The middle snake - */ - private Snake getMiddleSnake(final int start1, final int end1, final int start2, final int end2) { - // Myers Algorithm - // Initialisations - final int m = end1 - start1; - final int n = end2 - start2; - if (m == 0 || n == 0) { - return null; - } - - final int delta = m - n; - final int sum = n + m; - final int offset = (sum % 2 == 0 ? sum : sum + 1) / 2; - vDown[1 + offset] = start1; - vUp[1 + offset] = end1 + 1; - - for (int d = 0; d <= offset; ++d) { - // Down - for (int k = -d; k <= d; k += 2) { - // First step - - final int i = k + offset; - if (k == -d || k != d && vDown[i - 1] < vDown[i + 1]) { - vDown[i] = vDown[i + 1]; - } else { - vDown[i] = vDown[i - 1] + 1; - } - - int x = vDown[i]; - int y = x - start1 + start2 - k; - - while (x < end1 && y < end2 && left.charAt(x) == right.charAt(y)) { - vDown[i] = ++x; - ++y; - } - // Second step - if (delta % 2 != 0 && delta - d <= k && k <= delta + d) { - if (vUp[i - delta] <= vDown[i]) { // NOPMD - return buildSnake(vUp[i - delta], k + start1 - start2, end1, end2); - } - } - } - - // Up - for (int k = delta - d; k <= delta + d; k += 2) { - // First step - final int i = k + offset - delta; - if (k == delta - d - || k != delta + d && vUp[i + 1] <= vUp[i - 1]) { - vUp[i] = vUp[i + 1] - 1; - } else { - vUp[i] = vUp[i - 1]; - } - - int x = vUp[i] - 1; - int y = x - start1 + start2 - k; - while (x >= start1 && y >= start2 - && left.charAt(x) == right.charAt(y)) { - vUp[i] = x--; - y--; - } - // Second step - if (delta % 2 == 0 && -d <= k && k <= d) { - if (vUp[i] <= vDown[i + delta]) { // NOPMD - return buildSnake(vUp[i], k + start1 - start2, end1, end2); - } - } - } - } - - // this should not happen - throw new RuntimeException("Internal Error"); - } - - /** - * Build a snake. - * - * @param start the value of the start of the snake - * @param diag the value of the diagonal of the snake - * @param end1 the value of the end of the first sequence to be compared - * @param end2 the value of the end of the second sequence to be compared - * @return The snake built - */ - private Snake buildSnake(final int start, final int diag, final int end1, final int end2) { - int end = start; - while (end - diag < end2 - && end < end1 - && left.charAt(end) == right.charAt(end - diag)) { - ++end; - } - return new Snake(start, end, diag); - } - - /** - * This class is a simple placeholder to hold the end part of a path - * under construction in a {@link StringsComparator StringsComparator}. - */ - private static class Snake { - - /** Start index. */ - private final int start; - - /** End index. */ - private final int end; - - /** Diagonal number. */ - private final int diag; - - /** - * Simple constructor. Creates a new instance of Snake with specified indices. - * - * @param start start index of the snake - * @param end end index of the snake - * @param diag diagonal number - */ - Snake(final int start, final int end, final int diag) { - this.start = start; - this.end = end; - this.diag = diag; - } - - /** - * Get the start index of the snake. - * - * @return start index of the snake - */ - public int getStart() { - return start; - } - - /** - * Get the end index of the snake. - * - * @return end index of the snake - */ - public int getEnd() { - return end; - } - - /** - * Get the diagonal number of the snake. - * - * @return diagonal number of the snake - */ - public int getDiag() { - return diag; - } + super(ArrayUtils.toObject(left.toCharArray()), ArrayUtils.toObject(right.toCharArray())); } } diff --git a/src/main/java/org/apache/commons/text/diff/WordWiseStringsComparator.java b/src/main/java/org/apache/commons/text/diff/WordWiseStringsComparator.java new file mode 100755 index 0000000000..9f8a5e4e02 --- /dev/null +++ b/src/main/java/org/apache/commons/text/diff/WordWiseStringsComparator.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.diff; + +/** + *+ * This class is a specialization of {@link Comparator class} adapted for sequences of {@code String} objects. + *
+ * + * @since 1.x + */ +public class WordWiseStringsComparator extends Comparator+ * Creates a new instance of WordWiseStringsComparator. + *
+ * + * @param left first sequence of {@code String} objects to be compared + * @param right second sequence of {@code String} objects to be compared + */ + public WordWiseStringsComparator(String[] left, String[] right) { + super(left, right); + } + +} diff --git a/src/test/java/org/apache/commons/text/diff/WordWiseStringsComparatorTest.java b/src/test/java/org/apache/commons/text/diff/WordWiseStringsComparatorTest.java new file mode 100755 index 0000000000..23a2d02c6f --- /dev/null +++ b/src/test/java/org/apache/commons/text/diff/WordWiseStringsComparatorTest.java @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.diff; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Tests for the WordWiseStringsComparator. + */ +public class WordWiseStringsComparatorTest { + private List