From b64a0e05f761a56d96a7c07d3fef7398269c0599 Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Tue, 15 Aug 2023 13:34:20 -0400 Subject: [PATCH 01/52] initial implementation of Cases api --- .../apache/commons/text/cases/CamelCase.java | 128 +++++++++++++ .../org/apache/commons/text/cases/Case.java | 50 +++++ .../commons/text/cases/DelimitedCase.java | 143 +++++++++++++++ .../apache/commons/text/cases/KebabCase.java | 40 ++++ .../apache/commons/text/cases/PascalCase.java | 117 ++++++++++++ .../apache/commons/text/cases/SnakeCase.java | 40 ++++ .../commons/text/cases/package-info.java | 29 +++ .../apache/commons/text/cases/CasesTest.java | 171 ++++++++++++++++++ 8 files changed, 718 insertions(+) create mode 100644 src/main/java/org/apache/commons/text/cases/CamelCase.java create mode 100644 src/main/java/org/apache/commons/text/cases/Case.java create mode 100644 src/main/java/org/apache/commons/text/cases/DelimitedCase.java create mode 100644 src/main/java/org/apache/commons/text/cases/KebabCase.java create mode 100644 src/main/java/org/apache/commons/text/cases/PascalCase.java create mode 100644 src/main/java/org/apache/commons/text/cases/SnakeCase.java create mode 100644 src/main/java/org/apache/commons/text/cases/package-info.java create mode 100644 src/test/java/org/apache/commons/text/cases/CasesTest.java diff --git a/src/main/java/org/apache/commons/text/cases/CamelCase.java b/src/main/java/org/apache/commons/text/cases/CamelCase.java new file mode 100644 index 0000000000..95ad46ee2b --- /dev/null +++ b/src/main/java/org/apache/commons/text/cases/CamelCase.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.cases; + +import java.util.LinkedList; +import java.util.List; + +import org.apache.commons.lang3.CharUtils; +import org.apache.commons.lang3.StringUtils; + +/** + * Case implementation that parses and formats strings of the form 'myCamelCase' + *

+ * This case separates tokens on uppercase ascii alpha characters, with the exception + * that the first token begin with a lowercase ascii alpha character. + *

+ */ +public class CamelCase implements Case { + + /** constant reuseable instance of this case. */ + public static final CamelCase INSTANCE = new CamelCase(); + + /** + * Constructs new CamelCase instance. + */ + public CamelCase() { + super(); + } + + /** + * Parses string tokens from a Camel Case formatted string. + *

+ * Parses each character of the string parameter and creates new tokens when uppercase ascii + * letters are encountered. The upppercase letter is considered part of the new token. The very + * first character of the string is an exception to this rule and must be a lowercase ascii + * character. This method places no other restrictions on the content of the string.
+ * Note: This method should never produce empty tokens. + *

+ * @param string Camel Case formatted string to parse + * @return list of tokens parsed from the string + */ + @Override + public List parse(String string) { + List tokens = new LinkedList<>(); + if (string.length() == 0) { + return tokens; + } + if (!CharUtils.isAsciiAlphaLower(string.charAt(0))) { + throw new IllegalArgumentException("Character '" + string.charAt(0) + "' at index 0 must be an ascii lowercase letter"); + } + /*StringBuilder tokenBuilder = new StringBuilder(); + for (int i = 0; i < string.length(); i++) { + char c = string.charAt(i); + if (CharUtils.isAsciiAlphaUpper(c)) { + tokens.add(tokenBuilder.toString()); + tokenBuilder.setLength(0); + } + tokenBuilder.append(c); + } + tokens.add(tokenBuilder.toString());*/ + int strLen = string.length(); + int[] tokenCodePoints = new int[strLen]; + int tokenCodePointsOffset = 0; + for (int i = 0; i < string.length();) { + final int codePoint = string.codePointAt(i); + if (CharUtils.isAsciiAlphaUpper((char) codePoint)) { + if (tokenCodePointsOffset > 0) { + tokens.add(new String(tokenCodePoints, 0, tokenCodePointsOffset)); + tokenCodePoints = new int[strLen]; + tokenCodePointsOffset = 0; + } + tokenCodePoints[tokenCodePointsOffset++] = codePoint; + i += Character.charCount(codePoint); + } else { + tokenCodePoints[tokenCodePointsOffset++] = codePoint; + i += Character.charCount(codePoint); + } + } + tokens.add(new String(tokenCodePoints, 0, tokenCodePointsOffset)); + return tokens; + } + + /** + * Formats tokens into a Camel Case string. + *

+ * Iterates each token and creates a camel case formatted string. Each token must begin with an + * ascii letter, which will be forced uppercase in the output, except for the very first token, + * which will have a lowercase first character. The remaining characters in all tokens will be + * forced lowercase. This Case does not support empty tokens.
+ * No other restrictions are placed on token contents. + *

+ * @param tokens String tokens to format into CamelCase + * @return Camel Case formatted string + */ + @Override + public String format(Iterable tokens) { + StringBuilder formattedString = new StringBuilder(); + int i = 0; + for (String token : tokens) { + if (token.length() == 0) { + throw new IllegalArgumentException("Unsupported empty token at index " + i); + } + if (!CharUtils.isAsciiAlpha(token.charAt(0))) { + throw new IllegalArgumentException("First character '" + token.charAt(0) + "' in token " + i + " must be an ascii letter"); + } + String formattedToken = (i == 0 ? token.substring(0, 1).toLowerCase() : token.substring(0, 1).toUpperCase()) + + (token.length() > 1 ? token.substring(1).toLowerCase() : StringUtils.EMPTY); + i++; + formattedString.append(formattedToken); + } + return formattedString.toString(); + } + +} diff --git a/src/main/java/org/apache/commons/text/cases/Case.java b/src/main/java/org/apache/commons/text/cases/Case.java new file mode 100644 index 0000000000..6efe857f5a --- /dev/null +++ b/src/main/java/org/apache/commons/text/cases/Case.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.cases; + +import java.util.List; + +/** + * Handles formatting and parsing tokens to/from a String. For most implementations tokens returned + * by the parse method should abide by any restrictions present in the format method. i.e. Calling + * format() with the results of a call to parse() on the same Case instance should return a + * matching String. + * + * @since 1.11 + */ +public interface Case { + + /** + * Formats a set of tokens into a string. The tokens do not necessarily have to meet the syntax + * requirements of the Case. The documentation for each implementation should specify what input + * is supported. + * + * @param tokens string tokens to be formatted by this Case + * @return the formatted string + */ + String format(Iterable tokens); + + /** + * Parses a string into a series of tokens. The string must abide by certain restrictions, + * dependent on each Case implementation. + * + * @param string The string to be parsed by the Case into a list of tokens + * @return The list of parsed tokens + */ + List parse(String string); + +} diff --git a/src/main/java/org/apache/commons/text/cases/DelimitedCase.java b/src/main/java/org/apache/commons/text/cases/DelimitedCase.java new file mode 100644 index 0000000000..8070504ad2 --- /dev/null +++ b/src/main/java/org/apache/commons/text/cases/DelimitedCase.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.cases; + +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; + +import org.apache.commons.lang3.CharUtils; + +/** + * DelimitedCase is a case in which the true alphabetic case of the characters is ignored by default + * and tokens themselves are determined by the presence of a delimiter between each token. + */ +public class DelimitedCase implements Case { + + /** delimiters to be used when parsing. */ + private Set parseDelimiters; + + /** delimited to be used when formatting. */ + private String formatDelimiter; + + /** + * Constructs a new Delimited Case. + * @param delimiter the character to use as both the parse and format delimiter + */ + public DelimitedCase(char delimiter) { + this(new char[] { delimiter }, CharUtils.toString(delimiter)); + } + + /** + * Constructs a new delimited case. + * @param parseDelimiters The array of delimiters to use when parsing + * @param formatDelimiter The delimiter to use when formatting + */ + public DelimitedCase(char[] parseDelimiters, String formatDelimiter) { + super(); + if (parseDelimiters == null || parseDelimiters.length == 0) { + throw new IllegalArgumentException("Parse Delimiters cannot be null or empty"); + } + if (formatDelimiter == null || formatDelimiter.length() == 0) { + throw new IllegalArgumentException("Format Delimiters cannot be null or empty"); + } + this.parseDelimiters = generateDelimiterSet(parseDelimiters); + this.formatDelimiter = formatDelimiter; + } + + /** + * Formats tokens into Delimited Case. + *

+ * Tokens are iterated on and appended to an output stream, with an instance of a + * delimiter character between them. This method validates that the delimiter character is not + * part of the token. If it is found within the token an exception is thrown.
+ * No other restrictions are placed on the contents of the tokens. + * Note: This Case does support empty tokens.
+ *

+ * @param tokens the tokens to be formatted into a delimited string + * @return The delimited string + */ + @Override + public String format(Iterable tokens) { + StringBuilder formattedString = new StringBuilder(); + int i = 0; + for (String token : tokens) { + int delimiterFoundIndex = token.indexOf(formatDelimiter); + if (delimiterFoundIndex > -1) { + throw new IllegalArgumentException("Token " + i + " contains delimiter character '" + formatDelimiter + "' at index " + delimiterFoundIndex); + } + if (i > 0) { + formattedString.append(formatDelimiter); + } + i++; + formattedString.append(token); + } + return formattedString.toString(); + } + + /** + * Parses delimited string into tokens. + *

+ * Input string is parsed one character at a time until a delimiter character is reached. + * When a delimiter character is reached a new token begins. The delimiter character is + * considered reserved, and is omitted from the returned parsed tokens.
+ * No other restrictions are placed on the contents of the input string.
+ *

+ * @param string The delimited string to be parsed + * @return The list of tokens found in the string + */ + @Override + public List parse(String string) { + List tokens = new LinkedList<>(); + if (string.length() == 0) { + return tokens; + } + int strLen = string.length(); + int[] tokenCodePoints = new int[strLen]; + int tokenCodePointsOffset = 0; + for (int i = 0; i < string.length();) { + final int codePoint = string.codePointAt(i); + if (parseDelimiters.contains(codePoint)) { + tokens.add(new String(tokenCodePoints, 0, tokenCodePointsOffset)); + tokenCodePoints = new int[strLen]; + tokenCodePointsOffset = 0; + i++; + } else { + tokenCodePoints[tokenCodePointsOffset++] = codePoint; + i += Character.charCount(codePoint); + } + } + tokens.add(new String(tokenCodePoints, 0, tokenCodePointsOffset)); + return tokens; + } + + /** + * Converts an array of delimiters to a hash set of code points. The generated hash set provides O(1) lookup time. + * + * @param delimiters set of characters to determine capitalization, null means whitespace + * @return Set + */ + private static Set generateDelimiterSet(final char[] delimiters) { + final Set delimiterHashSet = new HashSet<>(); + for (int index = 0; index < delimiters.length; index++) { + delimiterHashSet.add(Character.codePointAt(delimiters, index)); + } + return delimiterHashSet; + } + +} diff --git a/src/main/java/org/apache/commons/text/cases/KebabCase.java b/src/main/java/org/apache/commons/text/cases/KebabCase.java new file mode 100644 index 0000000000..af1860e1f9 --- /dev/null +++ b/src/main/java/org/apache/commons/text/cases/KebabCase.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.cases; + +/** + * Case implementation which parses and formats strings of the form 'my-kebab-string' + *

+ * KebabCase is a delimited case where the delimiter is a hyphen character '-'. + *

+ */ +public class KebabCase extends DelimitedCase { + + /** constant for delimiter. */ + public static final char DELIMITER = '-'; + + /** constant reuseable instance of this case. */ + public static final KebabCase INSTANCE = new KebabCase(); + + /** + * Constructs a new KebabCase instance. + */ + public KebabCase() { + super(DELIMITER); + } + +} diff --git a/src/main/java/org/apache/commons/text/cases/PascalCase.java b/src/main/java/org/apache/commons/text/cases/PascalCase.java new file mode 100644 index 0000000000..eb5af9feda --- /dev/null +++ b/src/main/java/org/apache/commons/text/cases/PascalCase.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.cases; + +import java.util.LinkedList; +import java.util.List; + +import org.apache.commons.lang3.CharUtils; +import org.apache.commons.lang3.StringUtils; + +/** + * Case implementation which parses and formats strings of the form 'MyPascalString' + *

+ * PascalCase is a case where tokens are delimited by uppercase characters. Each parsed token + * must begin with an uppercase character, but the case of the remaining token characters is + * ignored and returned as-is. + *

+ */ +public class PascalCase implements Case { + + /** constant reuseable instance of this case. */ + public static final PascalCase INSTANCE = new PascalCase(); + + /** + * Constructs a new PascalCase instance. + */ + public PascalCase() { + } + + /** + * Parses a PascalCase string into tokens. + *

+ * String characters are iterated over and any time an upper case ascii character is + * encountered, that character is considered to be the start of a new token, with the character + * itself included in the token. This method should never return empty tokens. The first + * character of the string must be an uppercase ascii character. No further restrictions are + * placed on string contents. + *

+ * @param string The Pascal Cased string to parse + * @return the list of tokens found in the string + */ + @Override + public List parse(String string) { + List tokens = new LinkedList<>(); + if (string.length() == 0) { + return tokens; + } + if (!CharUtils.isAsciiAlphaUpper(string.charAt(0))) { + throw new IllegalArgumentException("Character '" + string.charAt(0) + "' at index 0 must be ascii uppercase"); + } + int strLen = string.length(); + int[] tokenCodePoints = new int[strLen]; + int tokenCodePointsOffset = 0; + for (int i = 0; i < string.length();) { + final int codePoint = string.codePointAt(i); + if (CharUtils.isAsciiAlphaUpper((char) codePoint)) { + if (tokenCodePointsOffset > 0) { + tokens.add(new String(tokenCodePoints, 0, tokenCodePointsOffset)); + tokenCodePoints = new int[strLen]; + tokenCodePointsOffset = 0; + } + tokenCodePoints[tokenCodePointsOffset++] = codePoint; + i += Character.charCount(codePoint); + } else { + tokenCodePoints[tokenCodePointsOffset++] = codePoint; + i += Character.charCount(codePoint); + } + } + tokens.add(new String(tokenCodePoints, 0, tokenCodePointsOffset)); + return tokens; + } + + /** + * Formats string tokens into a Pascal Case string. + *

+ * Iterates the tokens and formates each one into a Pascal Case token. The first character of + * the token must be an ascii alpha character. This character is forced upper case in the + * output. The remaining alpha characters of the token are forced lowercase. Any other + * characters in the token are returned as-is. Empty tokens are not supported. + *

+ * @param tokens The string tokens to be formatted into Pascal Case + * @return The Pascal Case formatted string + */ + @Override + public String format(Iterable tokens) { + StringBuilder formattedString = new StringBuilder(); + int i = 0; + for (String token : tokens) { + if (token.length() == 0) { + throw new IllegalArgumentException("Unsupported empty token at index " + i); + } + if (!CharUtils.isAsciiAlpha(token.charAt(0))) { + throw new IllegalArgumentException("First character '" + token.charAt(0) + "' in token " + i + " must be an ascii letter"); + } + String formattedToken = token.substring(0, 1).toUpperCase() + (token.length() > 1 ? token.substring(1).toLowerCase() : StringUtils.EMPTY); + i++; + formattedString.append(formattedToken); + } + return formattedString.toString(); + + } + +} diff --git a/src/main/java/org/apache/commons/text/cases/SnakeCase.java b/src/main/java/org/apache/commons/text/cases/SnakeCase.java new file mode 100644 index 0000000000..db14ac4cfb --- /dev/null +++ b/src/main/java/org/apache/commons/text/cases/SnakeCase.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.cases; + +/** + * Case implementation which parses and formats strings of the form 'my_snake_string' + *

+ * SnakeCase is a delimited case where the delimiter is the underscore character '_'. + *

+ */ +public class SnakeCase extends DelimitedCase { + + /** constant for delimiter. */ + public static final char DELIMITER = '_'; + + /** constant reuseable instance of this case. */ + public static final SnakeCase INSTANCE = new SnakeCase(); + + /** + * Constructs a new SnakeCase instance. + */ + public SnakeCase() { + super(DELIMITER); + } + +} diff --git a/src/main/java/org/apache/commons/text/cases/package-info.java b/src/main/java/org/apache/commons/text/cases/package-info.java new file mode 100644 index 0000000000..3dff24de0c --- /dev/null +++ b/src/main/java/org/apache/commons/text/cases/package-info.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + *

Provides algorithms for parsing and formatting various programming "Cases".

+ *

The provided implementations are for the four most common cases:
+ * CamelCase - delimited by ascii uppercase alpha characters and always beginning with a lowercase ascii alpha
+ * PascalCase - Similar to CamelCase but always begins with an uppercase ascii alpha
+ * DelimitedCase - delimited by a constant character, which is omitted from parsed tokens
+ * SnakeCase - implementation of DelimitedCase in which the delimiter is an underscore '_'
+ * KebabCase - implementation of DelimitedCase in which the delimiter is a hyphen '-'
+ *

+ * + * @since 1.0 + */ +package org.apache.commons.text.cases; diff --git a/src/test/java/org/apache/commons/text/cases/CasesTest.java b/src/test/java/org/apache/commons/text/cases/CasesTest.java new file mode 100644 index 0000000000..ef0887f6fe --- /dev/null +++ b/src/test/java/org/apache/commons/text/cases/CasesTest.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.cases; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class CasesTest { + + @Test + public void testDelimiterCharacterException() { + Assertions.assertThrows(IllegalArgumentException.class, () -> KebabCase.INSTANCE.format(Arrays.asList("a", "-"))); + Assertions.assertThrows(IllegalArgumentException.class, () -> SnakeCase.INSTANCE.format(Arrays.asList("a", "_"))); + Assertions.assertThrows(IllegalArgumentException.class, () -> new DelimitedCase(null, ",")); + Assertions.assertThrows(IllegalArgumentException.class, () -> new DelimitedCase(new char[1], null)); + Assertions.assertThrows(IllegalArgumentException.class, () -> new DelimitedCase(new char[0], ",")); + Assertions.assertThrows(IllegalArgumentException.class, () -> new DelimitedCase(new char[0], "")); + } + + @Test + public void testKebabCase() { + assertFormatAndParse(KebabCase.INSTANCE, "", Arrays.asList()); + assertFormatAndParse(KebabCase.INSTANCE, "my-Tokens-123-a1", Arrays.asList("my", "Tokens", "123", "a1")); + assertFormatAndParse(KebabCase.INSTANCE, "blank--token", Arrays.asList("blank", "", "token")); + } + + @Test + public void testUtf32() { + assertFormatAndParse(KebabCase.INSTANCE, "\uD800\uDF00-\uD800\uDF01\uD800\uDF14-\uD800\uDF02\uD800\uDF03", + Arrays.asList("\uD800\uDF00", "\uD800\uDF01\uD800\uDF14", "\uD800\uDF02\uD800\uDF03")); + assertFormatAndParse(SnakeCase.INSTANCE, "\uD800\uDF00_\uD800\uDF01\uD800\uDF14_\uD800\uDF02\uD800\uDF03", + Arrays.asList("\uD800\uDF00", "\uD800\uDF01\uD800\uDF14", "\uD800\uDF02\uD800\uDF03")); + assertFormatAndParse(PascalCase.INSTANCE, "A\uD800\uDF00B\uD800\uDF01\uD800\uDF14C\uD800\uDF02\uD800\uDF03", + Arrays.asList("A\uD800\uDF00", "B\uD800\uDF01\uD800\uDF14", "C\uD800\uDF02\uD800\uDF03")); + assertFormatAndParse(CamelCase.INSTANCE, "a\uD800\uDF00B\uD800\uDF01\uD800\uDF14C\uD800\uDF02\uD800\uDF03", + Arrays.asList("a\uD800\uDF00", "B\uD800\uDF01\uD800\uDF14", "C\uD800\uDF02\uD800\uDF03")); + } + + @Test + public void testSnakeCase() { + assertFormatAndParse(SnakeCase.INSTANCE, "", Arrays.asList()); + assertFormatAndParse(SnakeCase.INSTANCE, "my_Tokens_123_a1", Arrays.asList("my", "Tokens", "123", "a1")); + assertFormatAndParse(SnakeCase.INSTANCE, "blank__token", Arrays.asList("blank", "", "token")); + } + + @Test + public void testPascalCase() { + + assertFormatAndParse(PascalCase.INSTANCE, "MyVarName", Arrays.asList("My", "Var", "Name")); + assertFormatAndParse(PascalCase.INSTANCE, "MyTokensA1D", Arrays.asList("My", "Tokens", "A1", "D")); + assertFormatAndParse(PascalCase.INSTANCE, "", Arrays.asList()); + + // first character must be ascii alpha upper + Assertions.assertThrows(IllegalArgumentException.class, () -> PascalCase.INSTANCE.parse("lowerFirst")); + Assertions.assertThrows(IllegalArgumentException.class, () -> PascalCase.INSTANCE.format(Arrays.asList("1"))); + Assertions.assertThrows(IllegalArgumentException.class, () -> PascalCase.INSTANCE.format(Arrays.asList(""))); + } + + @Test + public void testCamelCase() { + + assertFormatAndParse(CamelCase.INSTANCE, "", Arrays.asList()); + assertFormatAndParse(CamelCase.INSTANCE, "myTokensAbc123", Arrays.asList("my", "Tokens", "Abc123")); + assertFormatAndParse(CamelCase.INSTANCE, "specChar-Token+", Arrays.asList("spec", "Char-", "Token+")); + + // empty token not supported + Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.format(Arrays.asList("a", "b", ""))); + // must begin with ascii alpha + Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.format(Arrays.asList("a", "1b"))); + // must begin with ascii alpha lower + Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.parse("MyTokens")); + } + + @Test + public void testConversionsDelimited() { + + List tokens = Arrays.asList("My", "var", "NAME", "mIXED", "a1", "12", ""); + + String kebabString = "My-var-NAME-mIXED-a1-12-"; + assertFormatAndParse(KebabCase.INSTANCE, kebabString, tokens); + + String snakeString = "My_var_NAME_mIXED_a1_12_"; + assertFormatAndParse(SnakeCase.INSTANCE, snakeString, tokens); + } + + @Test + public void testConversions() { + + List tokens = Arrays.asList("My", "var", "NAME", "mIXED", "a1", "c|=+"); + + String kebabString = "My-var-NAME-mIXED-a1-c|=+"; + assertFormatAndParse(KebabCase.INSTANCE, kebabString, tokens); + + String snakeString = "My_var_NAME_mIXED_a1_c|=+"; + assertFormatAndParse(SnakeCase.INSTANCE, snakeString, tokens); + + String camelString = "myVarNameMixedA1C|=+"; + assertFormatAndParse(CamelCase.INSTANCE, camelString, tokens, true); + + String pascalString = "MyVarNameMixedA1C|=+"; + assertFormatAndParse(PascalCase.INSTANCE, pascalString, tokens, true); + + } + + @Test + public void testEmptyTokens() { + List tokens = Arrays.asList("HAS", "", "empty", "Tokens", ""); + + String snakeString = "HAS__empty_Tokens_"; + assertFormatAndParse(SnakeCase.INSTANCE, snakeString, tokens); + + String kebabString = "HAS--empty-Tokens-"; + assertFormatAndParse(KebabCase.INSTANCE, kebabString, tokens); + + Assertions.assertThrows(IllegalArgumentException.class, () -> PascalCase.INSTANCE.format(tokens)); + Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.format(tokens)); + } + + public void assertFormatAndParse(Case caseInstance, String string, List tokens) { + assertFormatAndParse(caseInstance, string, tokens, false); + } + + /** + * Test Util method for ensuring that a case instance produces the expecting string and tokens + * upon formatting and parsing + * + * @param case Instance the case instance to use + * @param string the expected formatted string + * @param tokens the expected tokens + * @param caseInsensitive whether to not to validate tokens case insensitively + */ + public void assertFormatAndParse(Case caseInstance, String string, List tokens, Boolean caseInsensitive) { + List parsedTokens = caseInstance.parse(string); + if (caseInsensitive) { + assertEqualsIgnoreCase(tokens, parsedTokens); + } else { + Assertions.assertEquals(tokens, parsedTokens); + } + String formatted = caseInstance.format(tokens); + Assertions.assertEquals(string, formatted); + } + + public void assertEqualsIgnoreCase(List expected, List actual) { + Assertions.assertEquals(expected.size(), actual.size()); + Iterator itEx = expected.iterator(); + Iterator itAc = actual.iterator(); + for (; itEx.hasNext();) { + Assertions.assertEquals(itEx.next().toLowerCase(), itAc.next().toLowerCase()); + } + } + +} + From 0de50a2639c2036c2cd4c764c30272731e06c1e3 Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Mon, 21 Aug 2023 07:09:26 -0400 Subject: [PATCH 02/52] spelling --- .../org/apache/commons/text/cases/PascalCase.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/apache/commons/text/cases/PascalCase.java b/src/main/java/org/apache/commons/text/cases/PascalCase.java index eb5af9feda..c2e0fd92ba 100644 --- a/src/main/java/org/apache/commons/text/cases/PascalCase.java +++ b/src/main/java/org/apache/commons/text/cases/PascalCase.java @@ -25,14 +25,14 @@ /** * Case implementation which parses and formats strings of the form 'MyPascalString' *

- * PascalCase is a case where tokens are delimited by uppercase characters. Each parsed token + * PascalCase is a case where tokens are delimited by uppercase ASCII characters. Each parsed token * must begin with an uppercase character, but the case of the remaining token characters is * ignored and returned as-is. *

*/ public class PascalCase implements Case { - /** constant reuseable instance of this case. */ + /** constant reusable instance of this case. */ public static final PascalCase INSTANCE = new PascalCase(); /** @@ -44,10 +44,10 @@ public PascalCase() { /** * Parses a PascalCase string into tokens. *

- * String characters are iterated over and any time an upper case ascii character is + * String characters are iterated over and any time an upper case ASCII character is * encountered, that character is considered to be the start of a new token, with the character * itself included in the token. This method should never return empty tokens. The first - * character of the string must be an uppercase ascii character. No further restrictions are + * character of the string must be an uppercase ASCII character. No further restrictions are * placed on string contents. *

* @param string The Pascal Cased string to parse @@ -87,8 +87,8 @@ public List parse(String string) { /** * Formats string tokens into a Pascal Case string. *

- * Iterates the tokens and formates each one into a Pascal Case token. The first character of - * the token must be an ascii alpha character. This character is forced upper case in the + * Iterates the tokens and formats each one into a Pascal Case token. The first character of + * the token must be an ASCII alpha character. This character is forced upper case in the * output. The remaining alpha characters of the token are forced lowercase. Any other * characters in the token are returned as-is. Empty tokens are not supported. *

From 47c244f89142a374237a34e5c9a15382ca9a5ece Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Mon, 21 Aug 2023 07:11:26 -0400 Subject: [PATCH 03/52] spelling --- .../apache/commons/text/cases/CamelCase.java | 28 ++++++------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/src/main/java/org/apache/commons/text/cases/CamelCase.java b/src/main/java/org/apache/commons/text/cases/CamelCase.java index 95ad46ee2b..567345b84d 100644 --- a/src/main/java/org/apache/commons/text/cases/CamelCase.java +++ b/src/main/java/org/apache/commons/text/cases/CamelCase.java @@ -25,13 +25,13 @@ /** * Case implementation that parses and formats strings of the form 'myCamelCase' *

- * This case separates tokens on uppercase ascii alpha characters, with the exception - * that the first token begin with a lowercase ascii alpha character. + * This case separates tokens on uppercase ASCII alpha characters, with the exception + * that the first token begin with a lowercase ASCII alpha character. *

*/ public class CamelCase implements Case { - /** constant reuseable instance of this case. */ + /** constant reusable instance of this case. */ public static final CamelCase INSTANCE = new CamelCase(); /** @@ -44,9 +44,9 @@ public CamelCase() { /** * Parses string tokens from a Camel Case formatted string. *

- * Parses each character of the string parameter and creates new tokens when uppercase ascii - * letters are encountered. The upppercase letter is considered part of the new token. The very - * first character of the string is an exception to this rule and must be a lowercase ascii + * Parses each character of the string parameter and creates new tokens when uppercase ASCII + * letters are encountered. The uppercase letter is considered part of the new token. The very + * first character of the string is an exception to this rule and must be a lowercase ASCII * character. This method places no other restrictions on the content of the string.
* Note: This method should never produce empty tokens. *

@@ -62,16 +62,6 @@ public List parse(String string) { if (!CharUtils.isAsciiAlphaLower(string.charAt(0))) { throw new IllegalArgumentException("Character '" + string.charAt(0) + "' at index 0 must be an ascii lowercase letter"); } - /*StringBuilder tokenBuilder = new StringBuilder(); - for (int i = 0; i < string.length(); i++) { - char c = string.charAt(i); - if (CharUtils.isAsciiAlphaUpper(c)) { - tokens.add(tokenBuilder.toString()); - tokenBuilder.setLength(0); - } - tokenBuilder.append(c); - } - tokens.add(tokenBuilder.toString());*/ int strLen = string.length(); int[] tokenCodePoints = new int[strLen]; int tokenCodePointsOffset = 0; @@ -98,13 +88,13 @@ public List parse(String string) { * Formats tokens into a Camel Case string. *

* Iterates each token and creates a camel case formatted string. Each token must begin with an - * ascii letter, which will be forced uppercase in the output, except for the very first token, + * ASCII letter, which will be forced uppercase in the output, except for the very first token, * which will have a lowercase first character. The remaining characters in all tokens will be * forced lowercase. This Case does not support empty tokens.
* No other restrictions are placed on token contents. *

- * @param tokens String tokens to format into CamelCase - * @return Camel Case formatted string + * @param tokens String tokens to format into camel case + * @return camel case formatted string */ @Override public String format(Iterable tokens) { From ff8c791b32ccdd4d319eda61ca065dfff011d96c Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Mon, 21 Aug 2023 07:13:56 -0400 Subject: [PATCH 04/52] private util method, spelling --- .../org/apache/commons/text/cases/CasesTest.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/test/java/org/apache/commons/text/cases/CasesTest.java b/src/test/java/org/apache/commons/text/cases/CasesTest.java index ef0887f6fe..4713bd3de7 100644 --- a/src/test/java/org/apache/commons/text/cases/CasesTest.java +++ b/src/test/java/org/apache/commons/text/cases/CasesTest.java @@ -68,7 +68,7 @@ public void testPascalCase() { assertFormatAndParse(PascalCase.INSTANCE, "MyTokensA1D", Arrays.asList("My", "Tokens", "A1", "D")); assertFormatAndParse(PascalCase.INSTANCE, "", Arrays.asList()); - // first character must be ascii alpha upper + // first character must be ASCII alpha upper Assertions.assertThrows(IllegalArgumentException.class, () -> PascalCase.INSTANCE.parse("lowerFirst")); Assertions.assertThrows(IllegalArgumentException.class, () -> PascalCase.INSTANCE.format(Arrays.asList("1"))); Assertions.assertThrows(IllegalArgumentException.class, () -> PascalCase.INSTANCE.format(Arrays.asList(""))); @@ -83,9 +83,9 @@ public void testCamelCase() { // empty token not supported Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.format(Arrays.asList("a", "b", ""))); - // must begin with ascii alpha + // must begin with ASCII alpha Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.format(Arrays.asList("a", "1b"))); - // must begin with ascii alpha lower + // must begin with ASCII alpha lower Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.parse("MyTokens")); } @@ -134,7 +134,7 @@ public void testEmptyTokens() { Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.format(tokens)); } - public void assertFormatAndParse(Case caseInstance, String string, List tokens) { + private void assertFormatAndParse(Case caseInstance, String string, List tokens) { assertFormatAndParse(caseInstance, string, tokens, false); } @@ -147,7 +147,7 @@ public void assertFormatAndParse(Case caseInstance, String string, List * @param tokens the expected tokens * @param caseInsensitive whether to not to validate tokens case insensitively */ - public void assertFormatAndParse(Case caseInstance, String string, List tokens, Boolean caseInsensitive) { + private void assertFormatAndParse(Case caseInstance, String string, List tokens, Boolean caseInsensitive) { List parsedTokens = caseInstance.parse(string); if (caseInsensitive) { assertEqualsIgnoreCase(tokens, parsedTokens); @@ -158,7 +158,7 @@ public void assertFormatAndParse(Case caseInstance, String string, List Assertions.assertEquals(string, formatted); } - public void assertEqualsIgnoreCase(List expected, List actual) { + private void assertEqualsIgnoreCase(List expected, List actual) { Assertions.assertEquals(expected.size(), actual.size()); Iterator itEx = expected.iterator(); Iterator itAc = actual.iterator(); From 31bd8961700815105ffd22e36aa46cfcc95400e9 Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Mon, 21 Aug 2023 07:25:39 -0400 Subject: [PATCH 05/52] class description --- src/main/java/org/apache/commons/text/cases/CamelCase.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/apache/commons/text/cases/CamelCase.java b/src/main/java/org/apache/commons/text/cases/CamelCase.java index 567345b84d..a58123718a 100644 --- a/src/main/java/org/apache/commons/text/cases/CamelCase.java +++ b/src/main/java/org/apache/commons/text/cases/CamelCase.java @@ -25,8 +25,9 @@ /** * Case implementation that parses and formats strings of the form 'myCamelCase' *

- * This case separates tokens on uppercase ASCII alpha characters, with the exception - * that the first token begin with a lowercase ASCII alpha character. + * This case separates tokens on uppercase ASCII alpha characters. Each token begins with an + * uppercase ASCII alpha character, except the first token, which begins with a lowercase ASCII + * alpha character. *

*/ public class CamelCase implements Case { From f8cbd57dbfae903bc258f6ed1bb8d93bf7be9d5e Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Mon, 21 Aug 2023 07:27:10 -0400 Subject: [PATCH 06/52] loop --- src/test/java/org/apache/commons/text/cases/CasesTest.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/test/java/org/apache/commons/text/cases/CasesTest.java b/src/test/java/org/apache/commons/text/cases/CasesTest.java index 4713bd3de7..e99c5c6380 100644 --- a/src/test/java/org/apache/commons/text/cases/CasesTest.java +++ b/src/test/java/org/apache/commons/text/cases/CasesTest.java @@ -160,9 +160,8 @@ private void assertFormatAndParse(Case caseInstance, String string, List private void assertEqualsIgnoreCase(List expected, List actual) { Assertions.assertEquals(expected.size(), actual.size()); - Iterator itEx = expected.iterator(); Iterator itAc = actual.iterator(); - for (; itEx.hasNext();) { + for (Iterator itEx = expected.iterator(); itEx.hasNext();) { Assertions.assertEquals(itEx.next().toLowerCase(), itAc.next().toLowerCase()); } } From d741a00037a9805e105fb4163361a5099709fe9a Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Mon, 21 Aug 2023 08:04:04 -0400 Subject: [PATCH 07/52] private/protected methods, spelling --- src/main/java/org/apache/commons/text/cases/CamelCase.java | 4 ++-- .../java/org/apache/commons/text/cases/DelimitedCase.java | 4 ++-- src/main/java/org/apache/commons/text/cases/KebabCase.java | 6 +++--- src/main/java/org/apache/commons/text/cases/PascalCase.java | 2 +- src/main/java/org/apache/commons/text/cases/SnakeCase.java | 6 +++--- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/apache/commons/text/cases/CamelCase.java b/src/main/java/org/apache/commons/text/cases/CamelCase.java index a58123718a..b1df5a848b 100644 --- a/src/main/java/org/apache/commons/text/cases/CamelCase.java +++ b/src/main/java/org/apache/commons/text/cases/CamelCase.java @@ -38,7 +38,7 @@ public class CamelCase implements Case { /** * Constructs new CamelCase instance. */ - public CamelCase() { + private CamelCase() { super(); } @@ -88,7 +88,7 @@ public List parse(String string) { /** * Formats tokens into a Camel Case string. *

- * Iterates each token and creates a camel case formatted string. Each token must begin with an + * Iterates over tokens and creates a camel case formatted string. Each token must begin with an * ASCII letter, which will be forced uppercase in the output, except for the very first token, * which will have a lowercase first character. The remaining characters in all tokens will be * forced lowercase. This Case does not support empty tokens.
diff --git a/src/main/java/org/apache/commons/text/cases/DelimitedCase.java b/src/main/java/org/apache/commons/text/cases/DelimitedCase.java index 8070504ad2..f13988318e 100644 --- a/src/main/java/org/apache/commons/text/cases/DelimitedCase.java +++ b/src/main/java/org/apache/commons/text/cases/DelimitedCase.java @@ -39,7 +39,7 @@ public class DelimitedCase implements Case { * Constructs a new Delimited Case. * @param delimiter the character to use as both the parse and format delimiter */ - public DelimitedCase(char delimiter) { + protected DelimitedCase(char delimiter) { this(new char[] { delimiter }, CharUtils.toString(delimiter)); } @@ -48,7 +48,7 @@ public DelimitedCase(char delimiter) { * @param parseDelimiters The array of delimiters to use when parsing * @param formatDelimiter The delimiter to use when formatting */ - public DelimitedCase(char[] parseDelimiters, String formatDelimiter) { + protected DelimitedCase(char[] parseDelimiters, String formatDelimiter) { super(); if (parseDelimiters == null || parseDelimiters.length == 0) { throw new IllegalArgumentException("Parse Delimiters cannot be null or empty"); diff --git a/src/main/java/org/apache/commons/text/cases/KebabCase.java b/src/main/java/org/apache/commons/text/cases/KebabCase.java index af1860e1f9..485774cd16 100644 --- a/src/main/java/org/apache/commons/text/cases/KebabCase.java +++ b/src/main/java/org/apache/commons/text/cases/KebabCase.java @@ -25,15 +25,15 @@ public class KebabCase extends DelimitedCase { /** constant for delimiter. */ - public static final char DELIMITER = '-'; + private static final char DELIMITER = '-'; - /** constant reuseable instance of this case. */ + /** constant reusable instance of this case. */ public static final KebabCase INSTANCE = new KebabCase(); /** * Constructs a new KebabCase instance. */ - public KebabCase() { + private KebabCase() { super(DELIMITER); } diff --git a/src/main/java/org/apache/commons/text/cases/PascalCase.java b/src/main/java/org/apache/commons/text/cases/PascalCase.java index c2e0fd92ba..1f6c9ebc43 100644 --- a/src/main/java/org/apache/commons/text/cases/PascalCase.java +++ b/src/main/java/org/apache/commons/text/cases/PascalCase.java @@ -38,7 +38,7 @@ public class PascalCase implements Case { /** * Constructs a new PascalCase instance. */ - public PascalCase() { + private PascalCase() { } /** diff --git a/src/main/java/org/apache/commons/text/cases/SnakeCase.java b/src/main/java/org/apache/commons/text/cases/SnakeCase.java index db14ac4cfb..63b0266ad0 100644 --- a/src/main/java/org/apache/commons/text/cases/SnakeCase.java +++ b/src/main/java/org/apache/commons/text/cases/SnakeCase.java @@ -25,15 +25,15 @@ public class SnakeCase extends DelimitedCase { /** constant for delimiter. */ - public static final char DELIMITER = '_'; + private static final char DELIMITER = '_'; - /** constant reuseable instance of this case. */ + /** constant reusable instance of this case. */ public static final SnakeCase INSTANCE = new SnakeCase(); /** * Constructs a new SnakeCase instance. */ - public SnakeCase() { + private SnakeCase() { super(DELIMITER); } From 67ccbb8921ceb0ddc8d628f289bd1c006e924942 Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Mon, 21 Aug 2023 08:06:47 -0400 Subject: [PATCH 08/52] use ArrayList --- .../java/org/apache/commons/text/cases/CamelCase.java | 8 ++++---- .../java/org/apache/commons/text/cases/DelimitedCase.java | 4 ++-- .../java/org/apache/commons/text/cases/PascalCase.java | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/apache/commons/text/cases/CamelCase.java b/src/main/java/org/apache/commons/text/cases/CamelCase.java index b1df5a848b..297e278f55 100644 --- a/src/main/java/org/apache/commons/text/cases/CamelCase.java +++ b/src/main/java/org/apache/commons/text/cases/CamelCase.java @@ -16,7 +16,7 @@ */ package org.apache.commons.text.cases; -import java.util.LinkedList; +import java.util.ArrayList; import java.util.List; import org.apache.commons.lang3.CharUtils; @@ -51,12 +51,12 @@ private CamelCase() { * character. This method places no other restrictions on the content of the string.
* Note: This method should never produce empty tokens. *

- * @param string Camel Case formatted string to parse + * @param string camel case formatted string to parse * @return list of tokens parsed from the string */ @Override public List parse(String string) { - List tokens = new LinkedList<>(); + List tokens = new ArrayList<>(); if (string.length() == 0) { return tokens; } @@ -94,7 +94,7 @@ public List parse(String string) { * forced lowercase. This Case does not support empty tokens.
* No other restrictions are placed on token contents. *

- * @param tokens String tokens to format into camel case + * @param tokens string tokens to format into camel case * @return camel case formatted string */ @Override diff --git a/src/main/java/org/apache/commons/text/cases/DelimitedCase.java b/src/main/java/org/apache/commons/text/cases/DelimitedCase.java index f13988318e..0302d3da6a 100644 --- a/src/main/java/org/apache/commons/text/cases/DelimitedCase.java +++ b/src/main/java/org/apache/commons/text/cases/DelimitedCase.java @@ -16,8 +16,8 @@ */ package org.apache.commons.text.cases; +import java.util.ArrayList; import java.util.HashSet; -import java.util.LinkedList; import java.util.List; import java.util.Set; @@ -103,7 +103,7 @@ public String format(Iterable tokens) { */ @Override public List parse(String string) { - List tokens = new LinkedList<>(); + List tokens = new ArrayList<>(); if (string.length() == 0) { return tokens; } diff --git a/src/main/java/org/apache/commons/text/cases/PascalCase.java b/src/main/java/org/apache/commons/text/cases/PascalCase.java index 1f6c9ebc43..f6c82e4c99 100644 --- a/src/main/java/org/apache/commons/text/cases/PascalCase.java +++ b/src/main/java/org/apache/commons/text/cases/PascalCase.java @@ -16,7 +16,7 @@ */ package org.apache.commons.text.cases; -import java.util.LinkedList; +import java.util.ArrayList; import java.util.List; import org.apache.commons.lang3.CharUtils; @@ -55,7 +55,7 @@ private PascalCase() { */ @Override public List parse(String string) { - List tokens = new LinkedList<>(); + List tokens = new ArrayList<>(); if (string.length() == 0) { return tokens; } From b3a33685e2149b164fe52eabcd9e9b789d886927 Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Mon, 21 Aug 2023 08:27:37 -0400 Subject: [PATCH 09/52] doc formatting --- .../org/apache/commons/text/cases/DelimitedCase.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/apache/commons/text/cases/DelimitedCase.java b/src/main/java/org/apache/commons/text/cases/DelimitedCase.java index 0302d3da6a..e9c770f22d 100644 --- a/src/main/java/org/apache/commons/text/cases/DelimitedCase.java +++ b/src/main/java/org/apache/commons/text/cases/DelimitedCase.java @@ -45,8 +45,8 @@ protected DelimitedCase(char delimiter) { /** * Constructs a new delimited case. - * @param parseDelimiters The array of delimiters to use when parsing - * @param formatDelimiter The delimiter to use when formatting + * @param parseDelimiters the array of delimiters to use when parsing + * @param formatDelimiter the delimiter to use when formatting */ protected DelimitedCase(char[] parseDelimiters, String formatDelimiter) { super(); @@ -70,7 +70,7 @@ protected DelimitedCase(char[] parseDelimiters, String formatDelimiter) { * Note: This Case does support empty tokens.
*

* @param tokens the tokens to be formatted into a delimited string - * @return The delimited string + * @return the delimited string */ @Override public String format(Iterable tokens) { @@ -98,8 +98,8 @@ public String format(Iterable tokens) { * considered reserved, and is omitted from the returned parsed tokens.
* No other restrictions are placed on the contents of the input string.
*

- * @param string The delimited string to be parsed - * @return The list of tokens found in the string + * @param string the delimited string to be parsed + * @return the list of tokens found in the string */ @Override public List parse(String string) { @@ -130,7 +130,7 @@ public List parse(String string) { * Converts an array of delimiters to a hash set of code points. The generated hash set provides O(1) lookup time. * * @param delimiters set of characters to determine capitalization, null means whitespace - * @return Set + * @return the Set of delimiter characters in the input array */ private static Set generateDelimiterSet(final char[] delimiters) { final Set delimiterHashSet = new HashSet<>(); From f98d277ba9d1b8b898dd638616e9529827b36ff2 Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Mon, 21 Aug 2023 08:27:51 -0400 Subject: [PATCH 10/52] doc spelling --- src/main/java/org/apache/commons/text/cases/CamelCase.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/apache/commons/text/cases/CamelCase.java b/src/main/java/org/apache/commons/text/cases/CamelCase.java index 297e278f55..dfc410690a 100644 --- a/src/main/java/org/apache/commons/text/cases/CamelCase.java +++ b/src/main/java/org/apache/commons/text/cases/CamelCase.java @@ -89,9 +89,9 @@ public List parse(String string) { * Formats tokens into a Camel Case string. *

* Iterates over tokens and creates a camel case formatted string. Each token must begin with an - * ASCII letter, which will be forced uppercase in the output, except for the very first token, + * ASCII letter, which will be converted to uppercase in the output, except for the very first token, * which will have a lowercase first character. The remaining characters in all tokens will be - * forced lowercase. This Case does not support empty tokens.
+ * converted to lowercase. This Case does not support empty tokens.
* No other restrictions are placed on token contents. *

* @param tokens string tokens to format into camel case From 76de8652a52213e133427c0d74bf31ea9b228e3f Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Mon, 21 Aug 2023 08:36:47 -0400 Subject: [PATCH 11/52] @throws docs, spelling --- src/main/java/org/apache/commons/text/cases/CamelCase.java | 6 ++++-- src/main/java/org/apache/commons/text/cases/Case.java | 6 ++++-- .../java/org/apache/commons/text/cases/DelimitedCase.java | 1 + src/main/java/org/apache/commons/text/cases/PascalCase.java | 6 ++++-- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/apache/commons/text/cases/CamelCase.java b/src/main/java/org/apache/commons/text/cases/CamelCase.java index dfc410690a..b274760dd8 100644 --- a/src/main/java/org/apache/commons/text/cases/CamelCase.java +++ b/src/main/java/org/apache/commons/text/cases/CamelCase.java @@ -53,6 +53,7 @@ private CamelCase() { *

* @param string camel case formatted string to parse * @return list of tokens parsed from the string + * @throws IllegalArgumentException if the string does not begin with a lowercase ASCII alpha character */ @Override public List parse(String string) { @@ -61,7 +62,7 @@ public List parse(String string) { return tokens; } if (!CharUtils.isAsciiAlphaLower(string.charAt(0))) { - throw new IllegalArgumentException("Character '" + string.charAt(0) + "' at index 0 must be an ascii lowercase letter"); + throw new IllegalArgumentException("Character '" + string.charAt(0) + "' at index 0 must be an ASCII lowercase letter"); } int strLen = string.length(); int[] tokenCodePoints = new int[strLen]; @@ -96,6 +97,7 @@ public List parse(String string) { *

* @param tokens string tokens to format into camel case * @return camel case formatted string + * @throws IllegalArgumentException if any tokens are empty String or do not begin with ASCII alpha characters */ @Override public String format(Iterable tokens) { @@ -106,7 +108,7 @@ public String format(Iterable tokens) { throw new IllegalArgumentException("Unsupported empty token at index " + i); } if (!CharUtils.isAsciiAlpha(token.charAt(0))) { - throw new IllegalArgumentException("First character '" + token.charAt(0) + "' in token " + i + " must be an ascii letter"); + throw new IllegalArgumentException("First character '" + token.charAt(0) + "' in token " + i + " must be an ASCII letter"); } String formattedToken = (i == 0 ? token.substring(0, 1).toLowerCase() : token.substring(0, 1).toUpperCase()) + (token.length() > 1 ? token.substring(1).toLowerCase() : StringUtils.EMPTY); diff --git a/src/main/java/org/apache/commons/text/cases/Case.java b/src/main/java/org/apache/commons/text/cases/Case.java index 6efe857f5a..163a13cdb0 100644 --- a/src/main/java/org/apache/commons/text/cases/Case.java +++ b/src/main/java/org/apache/commons/text/cases/Case.java @@ -35,6 +35,7 @@ public interface Case { * * @param tokens string tokens to be formatted by this Case * @return the formatted string + * @throws IllegalArgumentException if tokens cannot be formatted */ String format(Iterable tokens); @@ -42,8 +43,9 @@ public interface Case { * Parses a string into a series of tokens. The string must abide by certain restrictions, * dependent on each Case implementation. * - * @param string The string to be parsed by the Case into a list of tokens - * @return The list of parsed tokens + * @param string the string to be parsed by the Case into a list of tokens + * @return the list of parsed tokens + * @throws IllegalArgumentException if the string cannot be parsed */ List parse(String string); diff --git a/src/main/java/org/apache/commons/text/cases/DelimitedCase.java b/src/main/java/org/apache/commons/text/cases/DelimitedCase.java index e9c770f22d..fed48e4976 100644 --- a/src/main/java/org/apache/commons/text/cases/DelimitedCase.java +++ b/src/main/java/org/apache/commons/text/cases/DelimitedCase.java @@ -71,6 +71,7 @@ protected DelimitedCase(char[] parseDelimiters, String formatDelimiter) { *

* @param tokens the tokens to be formatted into a delimited string * @return the delimited string + * @throws IllegalArgumentException if any tokens contain the delimiter character */ @Override public String format(Iterable tokens) { diff --git a/src/main/java/org/apache/commons/text/cases/PascalCase.java b/src/main/java/org/apache/commons/text/cases/PascalCase.java index f6c82e4c99..ac430a8b56 100644 --- a/src/main/java/org/apache/commons/text/cases/PascalCase.java +++ b/src/main/java/org/apache/commons/text/cases/PascalCase.java @@ -52,6 +52,7 @@ private PascalCase() { *

* @param string The Pascal Cased string to parse * @return the list of tokens found in the string + * @throws IllegalArgumentException if the string does not begin with an uppercase ASCII alpha character */ @Override public List parse(String string) { @@ -60,7 +61,7 @@ public List parse(String string) { return tokens; } if (!CharUtils.isAsciiAlphaUpper(string.charAt(0))) { - throw new IllegalArgumentException("Character '" + string.charAt(0) + "' at index 0 must be ascii uppercase"); + throw new IllegalArgumentException("Character '" + string.charAt(0) + "' at index 0 must be ASCII uppercase"); } int strLen = string.length(); int[] tokenCodePoints = new int[strLen]; @@ -94,6 +95,7 @@ public List parse(String string) { *

* @param tokens The string tokens to be formatted into Pascal Case * @return The Pascal Case formatted string + * @throws IllegalArgumentException if any token is empty String or does not begin with an ASCII alpha character */ @Override public String format(Iterable tokens) { @@ -104,7 +106,7 @@ public String format(Iterable tokens) { throw new IllegalArgumentException("Unsupported empty token at index " + i); } if (!CharUtils.isAsciiAlpha(token.charAt(0))) { - throw new IllegalArgumentException("First character '" + token.charAt(0) + "' in token " + i + " must be an ascii letter"); + throw new IllegalArgumentException("First character '" + token.charAt(0) + "' in token " + i + " must be an ASCII letter"); } String formattedToken = token.substring(0, 1).toUpperCase() + (token.length() > 1 ? token.substring(1).toLowerCase() : StringUtils.EMPTY); i++; From de03d81561e8a0a29706648de26727523b8c5efe Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Mon, 21 Aug 2023 08:41:10 -0400 Subject: [PATCH 12/52] checkstyle violations --- src/main/java/org/apache/commons/text/cases/CamelCase.java | 2 +- src/main/java/org/apache/commons/text/cases/KebabCase.java | 2 +- src/main/java/org/apache/commons/text/cases/PascalCase.java | 2 +- src/main/java/org/apache/commons/text/cases/SnakeCase.java | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/apache/commons/text/cases/CamelCase.java b/src/main/java/org/apache/commons/text/cases/CamelCase.java index b274760dd8..24a557d297 100644 --- a/src/main/java/org/apache/commons/text/cases/CamelCase.java +++ b/src/main/java/org/apache/commons/text/cases/CamelCase.java @@ -30,7 +30,7 @@ * alpha character. *

*/ -public class CamelCase implements Case { +public final class CamelCase implements Case { /** constant reusable instance of this case. */ public static final CamelCase INSTANCE = new CamelCase(); diff --git a/src/main/java/org/apache/commons/text/cases/KebabCase.java b/src/main/java/org/apache/commons/text/cases/KebabCase.java index 485774cd16..3ba58f5105 100644 --- a/src/main/java/org/apache/commons/text/cases/KebabCase.java +++ b/src/main/java/org/apache/commons/text/cases/KebabCase.java @@ -22,7 +22,7 @@ * KebabCase is a delimited case where the delimiter is a hyphen character '-'. *

*/ -public class KebabCase extends DelimitedCase { +public final class KebabCase extends DelimitedCase { /** constant for delimiter. */ private static final char DELIMITER = '-'; diff --git a/src/main/java/org/apache/commons/text/cases/PascalCase.java b/src/main/java/org/apache/commons/text/cases/PascalCase.java index ac430a8b56..b1729888f9 100644 --- a/src/main/java/org/apache/commons/text/cases/PascalCase.java +++ b/src/main/java/org/apache/commons/text/cases/PascalCase.java @@ -30,7 +30,7 @@ * ignored and returned as-is. *

*/ -public class PascalCase implements Case { +public final class PascalCase implements Case { /** constant reusable instance of this case. */ public static final PascalCase INSTANCE = new PascalCase(); diff --git a/src/main/java/org/apache/commons/text/cases/SnakeCase.java b/src/main/java/org/apache/commons/text/cases/SnakeCase.java index 63b0266ad0..c253c98ce7 100644 --- a/src/main/java/org/apache/commons/text/cases/SnakeCase.java +++ b/src/main/java/org/apache/commons/text/cases/SnakeCase.java @@ -22,7 +22,7 @@ * SnakeCase is a delimited case where the delimiter is the underscore character '_'. *

*/ -public class SnakeCase extends DelimitedCase { +public final class SnakeCase extends DelimitedCase { /** constant for delimiter. */ private static final char DELIMITER = '_'; From dd0394b75005f53eb027053ed1ba25e27364644c Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Mon, 21 Aug 2023 08:49:59 -0400 Subject: [PATCH 13/52] doc formatting --- src/main/java/org/apache/commons/text/cases/Case.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/apache/commons/text/cases/Case.java b/src/main/java/org/apache/commons/text/cases/Case.java index 163a13cdb0..9523d020bf 100644 --- a/src/main/java/org/apache/commons/text/cases/Case.java +++ b/src/main/java/org/apache/commons/text/cases/Case.java @@ -43,7 +43,7 @@ public interface Case { * Parses a string into a series of tokens. The string must abide by certain restrictions, * dependent on each Case implementation. * - * @param string the string to be parsed by the Case into a list of tokens + * @param string the string to be parsed by this Case into a list of tokens * @return the list of parsed tokens * @throws IllegalArgumentException if the string cannot be parsed */ From 169505101ab20080cc8f0c99ae67e8900cf81907 Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Mon, 11 Sep 2023 16:18:04 -0400 Subject: [PATCH 14/52] javadoc formatting --- src/main/java/org/apache/commons/text/cases/CamelCase.java | 2 +- src/main/java/org/apache/commons/text/cases/Case.java | 2 +- src/main/java/org/apache/commons/text/cases/PascalCase.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/apache/commons/text/cases/CamelCase.java b/src/main/java/org/apache/commons/text/cases/CamelCase.java index 24a557d297..942b40d2ec 100644 --- a/src/main/java/org/apache/commons/text/cases/CamelCase.java +++ b/src/main/java/org/apache/commons/text/cases/CamelCase.java @@ -92,7 +92,7 @@ public List parse(String string) { * Iterates over tokens and creates a camel case formatted string. Each token must begin with an * ASCII letter, which will be converted to uppercase in the output, except for the very first token, * which will have a lowercase first character. The remaining characters in all tokens will be - * converted to lowercase. This Case does not support empty tokens.
+ * converted to lowercase. This Case does not support empty tokens. * No other restrictions are placed on token contents. *

* @param tokens string tokens to format into camel case diff --git a/src/main/java/org/apache/commons/text/cases/Case.java b/src/main/java/org/apache/commons/text/cases/Case.java index 9523d020bf..99b7f9a0ed 100644 --- a/src/main/java/org/apache/commons/text/cases/Case.java +++ b/src/main/java/org/apache/commons/text/cases/Case.java @@ -20,7 +20,7 @@ /** * Handles formatting and parsing tokens to/from a String. For most implementations tokens returned - * by the parse method should abide by any restrictions present in the format method. i.e. Calling + * by the parse method should abide by any restrictions present in the format method. i.e. calling * format() with the results of a call to parse() on the same Case instance should return a * matching String. * diff --git a/src/main/java/org/apache/commons/text/cases/PascalCase.java b/src/main/java/org/apache/commons/text/cases/PascalCase.java index b1729888f9..388a3c064b 100644 --- a/src/main/java/org/apache/commons/text/cases/PascalCase.java +++ b/src/main/java/org/apache/commons/text/cases/PascalCase.java @@ -94,7 +94,7 @@ public List parse(String string) { * characters in the token are returned as-is. Empty tokens are not supported. *

* @param tokens The string tokens to be formatted into Pascal Case - * @return The Pascal Case formatted string + * @return the Pascal Case formatted string * @throws IllegalArgumentException if any token is empty String or does not begin with an ASCII alpha character */ @Override From 1cb481ffaa8b0ec4ee0d10b2f65bf6dff227343e Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Mon, 2 Oct 2023 10:52:57 -0400 Subject: [PATCH 15/52] implemented unicode casing logic --- .../apache/commons/text/cases/CamelCase.java | 77 ++++++++++----- .../apache/commons/text/cases/PascalCase.java | 45 ++++++--- .../apache/commons/text/cases/CasesTest.java | 93 ++++++++++++++++++- 3 files changed, 176 insertions(+), 39 deletions(-) diff --git a/src/main/java/org/apache/commons/text/cases/CamelCase.java b/src/main/java/org/apache/commons/text/cases/CamelCase.java index 942b40d2ec..4ab601e5c0 100644 --- a/src/main/java/org/apache/commons/text/cases/CamelCase.java +++ b/src/main/java/org/apache/commons/text/cases/CamelCase.java @@ -19,15 +19,14 @@ import java.util.ArrayList; import java.util.List; -import org.apache.commons.lang3.CharUtils; -import org.apache.commons.lang3.StringUtils; - /** * Case implementation that parses and formats strings of the form 'myCamelCase' *

- * This case separates tokens on uppercase ASCII alpha characters. Each token begins with an - * uppercase ASCII alpha character, except the first token, which begins with a lowercase ASCII - * alpha character. + * This case separates tokens on uppercase Unicode letter characters, according to the logic in {@link java.lang.Character#toUpperCase} + * and {@link java.lang.Character#toLowerCase} which should following the mapping present in + * the Unicode data file. + * Each token begins with an + * uppercase unicode letter, except the first token, which begins with a lowercase unicode letter character. *

*/ public final class CamelCase implements Case { @@ -45,15 +44,15 @@ private CamelCase() { /** * Parses string tokens from a Camel Case formatted string. *

- * Parses each character of the string parameter and creates new tokens when uppercase ASCII + * Parses each character of the string parameter and creates new tokens when uppercase Unicode * letters are encountered. The uppercase letter is considered part of the new token. The very - * first character of the string is an exception to this rule and must be a lowercase ASCII - * character. This method places no other restrictions on the content of the string.
+ * first character of the string is an exception to this rule and must be a lowercase Unicode + * letter. This method places no other restrictions on the content of the string.
* Note: This method should never produce empty tokens. *

* @param string camel case formatted string to parse * @return list of tokens parsed from the string - * @throws IllegalArgumentException if the string does not begin with a lowercase ASCII alpha character + * @throws IllegalArgumentException if the string does not begin with a Unicode lowercase letter character */ @Override public List parse(String string) { @@ -61,15 +60,15 @@ public List parse(String string) { if (string.length() == 0) { return tokens; } - if (!CharUtils.isAsciiAlphaLower(string.charAt(0))) { - throw new IllegalArgumentException("Character '" + string.charAt(0) + "' at index 0 must be an ASCII lowercase letter"); + if (!Character.isLowerCase(string.codePointAt(0))) { + throw new IllegalArgumentException(createExceptionString(string.codePointAt(0), 0, "must be a Unicode lowercase letter")); } int strLen = string.length(); int[] tokenCodePoints = new int[strLen]; int tokenCodePointsOffset = 0; for (int i = 0; i < string.length();) { final int codePoint = string.codePointAt(i); - if (CharUtils.isAsciiAlphaUpper((char) codePoint)) { + if (Character.isUpperCase(codePoint)) { if (tokenCodePointsOffset > 0) { tokens.add(new String(tokenCodePoints, 0, tokenCodePointsOffset)); tokenCodePoints = new int[strLen]; @@ -89,33 +88,63 @@ public List parse(String string) { /** * Formats tokens into a Camel Case string. *

- * Iterates over tokens and creates a camel case formatted string. Each token must begin with an - * ASCII letter, which will be converted to uppercase in the output, except for the very first token, + * Iterates over tokens and creates a camel case formatted string. Each token must begin with a + * Unicode lower/upper cased letter, which will be converted to uppercase in the output, except for the very first token, * which will have a lowercase first character. The remaining characters in all tokens will be * converted to lowercase. This Case does not support empty tokens. * No other restrictions are placed on token contents. *

* @param tokens string tokens to format into camel case * @return camel case formatted string - * @throws IllegalArgumentException if any tokens are empty String or do not begin with ASCII alpha characters + * @throws IllegalArgumentException if any tokens are empty String or do not begin with Unicode upper/lower letter characters */ @Override public String format(Iterable tokens) { StringBuilder formattedString = new StringBuilder(); - int i = 0; + int tokenIndex = 0; for (String token : tokens) { if (token.length() == 0) { - throw new IllegalArgumentException("Unsupported empty token at index " + i); + throw new IllegalArgumentException("Unsupported empty token at index " + tokenIndex); } - if (!CharUtils.isAsciiAlpha(token.charAt(0))) { - throw new IllegalArgumentException("First character '" + token.charAt(0) + "' in token " + i + " must be an ASCII letter"); + for (int i = 0; i < token.length();) { + final int codePoint = token.codePointAt(i); + int codePointFormatted = codePoint; + if (i == 0) { + if (tokenIndex == 0) { + //token must be lowercase or lowercaseable + if (!Character.isLowerCase(codePoint)) { + codePointFormatted = Character.toLowerCase(codePoint); + if (codePoint == codePointFormatted) { + throw new IllegalArgumentException(createExceptionString(codePoint, i, "cannot be mapped to lowercase")); + } + } + } else { + //token must be uppercase or uppercaseable + if (!Character.isUpperCase(codePoint)) { + codePointFormatted = Character.toUpperCase(codePoint); + if (codePoint == codePointFormatted || !Character.isUpperCase(codePointFormatted)) { + throw new IllegalArgumentException(createExceptionString(codePoint, i, "cannot be mapped to uppercase")); + } + } + } + } else { + //only need to force lowercase if the letter is uppercase, otherwise just add it + if (Character.isUpperCase(codePoint)) { + codePointFormatted = Character.toLowerCase(codePoint); + if (codePoint == codePointFormatted) { + throw new IllegalArgumentException(createExceptionString(codePoint, i, "cannot be mapped to lowercase")); + } + } + } + formattedString.appendCodePoint(codePointFormatted); + i += Character.charCount(codePoint); } - String formattedToken = (i == 0 ? token.substring(0, 1).toLowerCase() : token.substring(0, 1).toUpperCase()) - + (token.length() > 1 ? token.substring(1).toLowerCase() : StringUtils.EMPTY); - i++; - formattedString.append(formattedToken); + tokenIndex++; } return formattedString.toString(); } + private static String createExceptionString(int codePoint, int index, String suffix) { + return "Character '" + new String(new int[] {codePoint}, 0, 1) + "' with code point " + codePoint + " at index " + index + " " + suffix; + } } diff --git a/src/main/java/org/apache/commons/text/cases/PascalCase.java b/src/main/java/org/apache/commons/text/cases/PascalCase.java index 388a3c064b..477959ba29 100644 --- a/src/main/java/org/apache/commons/text/cases/PascalCase.java +++ b/src/main/java/org/apache/commons/text/cases/PascalCase.java @@ -19,9 +19,6 @@ import java.util.ArrayList; import java.util.List; -import org.apache.commons.lang3.CharUtils; -import org.apache.commons.lang3.StringUtils; - /** * Case implementation which parses and formats strings of the form 'MyPascalString' *

@@ -60,15 +57,15 @@ public List parse(String string) { if (string.length() == 0) { return tokens; } - if (!CharUtils.isAsciiAlphaUpper(string.charAt(0))) { - throw new IllegalArgumentException("Character '" + string.charAt(0) + "' at index 0 must be ASCII uppercase"); + if (!Character.isUpperCase(string.codePointAt(0))) { + throw new IllegalArgumentException(createExceptionString(string.codePointAt(0), 0, "must be a Unicode uppercase letter")); } int strLen = string.length(); int[] tokenCodePoints = new int[strLen]; int tokenCodePointsOffset = 0; for (int i = 0; i < string.length();) { final int codePoint = string.codePointAt(i); - if (CharUtils.isAsciiAlphaUpper((char) codePoint)) { + if (Character.isUpperCase(codePoint)) { if (tokenCodePointsOffset > 0) { tokens.add(new String(tokenCodePoints, 0, tokenCodePointsOffset)); tokenCodePoints = new int[strLen]; @@ -100,20 +97,40 @@ public List parse(String string) { @Override public String format(Iterable tokens) { StringBuilder formattedString = new StringBuilder(); - int i = 0; + int tokenIndex = 0; for (String token : tokens) { if (token.length() == 0) { - throw new IllegalArgumentException("Unsupported empty token at index " + i); + throw new IllegalArgumentException("Unsupported empty token at index " + tokenIndex); } - if (!CharUtils.isAsciiAlpha(token.charAt(0))) { - throw new IllegalArgumentException("First character '" + token.charAt(0) + "' in token " + i + " must be an ASCII letter"); + for (int i = 0; i < token.length();) { + final int codePoint = token.codePointAt(i); + int codePointFormatted = codePoint; + if (i == 0) { + //must uppercase + if (!Character.isUpperCase(codePoint)) { + codePointFormatted = Character.toUpperCase(codePoint); + if (codePoint == codePointFormatted || !Character.isUpperCase(codePointFormatted)) { + throw new IllegalArgumentException(createExceptionString(codePoint, i, "cannot be mapped to uppercase")); + } + } + } else { + //only need to force lowercase if the letter is uppercase, otherwise just add it + if (Character.isUpperCase(codePoint)) { + codePointFormatted = Character.toLowerCase(codePoint); + if (codePoint == codePointFormatted || !Character.isLowerCase(codePointFormatted)) { + throw new IllegalArgumentException(createExceptionString(codePoint, i, "cannot be mapped to lowercase")); + } + } + } + formattedString.appendCodePoint(codePointFormatted); + i += Character.charCount(codePoint); } - String formattedToken = token.substring(0, 1).toUpperCase() + (token.length() > 1 ? token.substring(1).toLowerCase() : StringUtils.EMPTY); - i++; - formattedString.append(formattedToken); + tokenIndex++; } return formattedString.toString(); - } + private static String createExceptionString(int codePoint, int index, String suffix) { + return "Character '" + new String(new int[] {codePoint}, 0, 1) + "' with code point " + codePoint + " at index " + index + " " + suffix; + } } diff --git a/src/test/java/org/apache/commons/text/cases/CasesTest.java b/src/test/java/org/apache/commons/text/cases/CasesTest.java index e99c5c6380..8787d13aec 100644 --- a/src/test/java/org/apache/commons/text/cases/CasesTest.java +++ b/src/test/java/org/apache/commons/text/cases/CasesTest.java @@ -134,6 +134,98 @@ public void testEmptyTokens() { Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.format(tokens)); } + @Test + public void testUnicodeUncaseableLetter() { + + // LATIN SMALL LETTER SHARP S + Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.format(Arrays.asList("uncaseable", "\u00DFabc", "token"))); + Assertions.assertThrows(IllegalArgumentException.class, () -> PascalCase.INSTANCE.format(Arrays.asList("uncaseable", "\u00DFabc", "token"))); + + // LATIN SMALL LETTER KRA + Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.format(Arrays.asList("uncaseable", "\u0138abc", "token"))); + Assertions.assertThrows(IllegalArgumentException.class, () -> PascalCase.INSTANCE.format(Arrays.asList("uncaseable", "\u0138abc", "token"))); + + } + + /** + * This method iterates through all unicode characters and confirms: 1. If the character is + * uppercase or lowercase, that it can be parsed successfully 2. If the character can be + * converted to the opposite case, that this conversion can be parsed successfully 3. If the + * opposite case character can be converted BACK to the initial character, that this conversion + * can be parsed successfully + */ + @Test + public void testUnicodeCasing() { + + for (int i = 0; i < Character.MAX_CODE_POINT; i++) { + + // this if block weeds out titlecase characters + if (Character.isLowerCase(i) || Character.isUpperCase(i)) { + + // if char is lowercase + boolean lower = Character.isLowerCase(i); + + // find the lowercase code point, if it exists + int lowerCode = lower ? i : Character.toLowerCase(i); + if (!Character.isLowerCase(lowerCode)) { + lowerCode = i; + } + + // find the uppercase code point, if it exists + int upperCode = !lower ? i : Character.toUpperCase(i); + if (!Character.isUpperCase(upperCode)) { + upperCode = i; + } + + // if char has a valid conversion + boolean canConvert = lowerCode != upperCode; + + // if opposite case converts back to original char + // this is sometimes false when an uppercase character has a many-to-one + // relationship with lower cases, and thus there is no single upper-to-lower + // conversion that be selected + boolean canReverse = false; + if (canConvert) { + if (lower) { + canReverse = Character.toLowerCase(upperCode) == lowerCode; + } else { + canReverse = Character.toUpperCase(lowerCode) == upperCode; + } + } + + String lowerCodeString = new String(new int[] { lowerCode }, 0, 1); + String upperCodeString = new String(new int[] { upperCode }, 0, 1); + + // confirm the token can be handled by format and parse methods + String camelCaseString = lower ? lowerCodeString : "a" + upperCodeString; + List camelTokens = lower ? Arrays.asList(lowerCodeString) : Arrays.asList("a", upperCodeString); + assertFormatAndParse(CamelCase.INSTANCE, camelCaseString, camelTokens); + + String pascalCaseString = lower ? "A" + lowerCodeString : upperCodeString; + List pascalTokens = lower ? Arrays.asList("A" + lowerCodeString) : Arrays.asList(upperCodeString); + assertFormatAndParse(PascalCase.INSTANCE, pascalCaseString, pascalTokens); + + if (canConvert) { + + if (lower || canReverse) { + // confirm we can convert lower to upper or reverse + Assertions.assertEquals(upperCodeString, PascalCase.INSTANCE.format(Arrays.asList(lowerCodeString))); + Assertions.assertEquals("a" + upperCodeString, CamelCase.INSTANCE.format(Arrays.asList("a", lowerCodeString))); + } + if (!lower || canReverse) { + // confirm we can convert upper to lower or reverse + Assertions.assertEquals("A" + lowerCodeString, PascalCase.INSTANCE.format(Arrays.asList("A" + upperCodeString))); + Assertions.assertEquals(lowerCodeString, CamelCase.INSTANCE.format(Arrays.asList(upperCodeString))); + } + + } + + } + + } + + } + private void assertFormatAndParse(Case caseInstance, String string, List tokens) { assertFormatAndParse(caseInstance, string, tokens, false); } @@ -167,4 +259,3 @@ private void assertEqualsIgnoreCase(List expected, List actual) } } - From 8833ae20217e652e30af69b541345c99ae09d2df Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Mon, 2 Oct 2023 10:55:50 -0400 Subject: [PATCH 16/52] order of if/else --- .../apache/commons/text/cases/PascalCase.java | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/apache/commons/text/cases/PascalCase.java b/src/main/java/org/apache/commons/text/cases/PascalCase.java index 477959ba29..42535cf69d 100644 --- a/src/main/java/org/apache/commons/text/cases/PascalCase.java +++ b/src/main/java/org/apache/commons/text/cases/PascalCase.java @@ -105,15 +105,7 @@ public String format(Iterable tokens) { for (int i = 0; i < token.length();) { final int codePoint = token.codePointAt(i); int codePointFormatted = codePoint; - if (i == 0) { - //must uppercase - if (!Character.isUpperCase(codePoint)) { - codePointFormatted = Character.toUpperCase(codePoint); - if (codePoint == codePointFormatted || !Character.isUpperCase(codePointFormatted)) { - throw new IllegalArgumentException(createExceptionString(codePoint, i, "cannot be mapped to uppercase")); - } - } - } else { + if (i != 0) { //only need to force lowercase if the letter is uppercase, otherwise just add it if (Character.isUpperCase(codePoint)) { codePointFormatted = Character.toLowerCase(codePoint); @@ -121,6 +113,15 @@ public String format(Iterable tokens) { throw new IllegalArgumentException(createExceptionString(codePoint, i, "cannot be mapped to lowercase")); } } + } + else { + //must uppercase + if (!Character.isUpperCase(codePoint)) { + codePointFormatted = Character.toUpperCase(codePoint); + if (codePoint == codePointFormatted || !Character.isUpperCase(codePointFormatted)) { + throw new IllegalArgumentException(createExceptionString(codePoint, i, "cannot be mapped to uppercase")); + } + } } formattedString.appendCodePoint(codePointFormatted); i += Character.charCount(codePoint); From c3282330c38cb7ea1c556da38874cfeb9f0f06af Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Mon, 2 Oct 2023 10:59:28 -0400 Subject: [PATCH 17/52] Revert "order of if/else" This reverts commit 8833ae20217e652e30af69b541345c99ae09d2df. --- .../apache/commons/text/cases/PascalCase.java | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/apache/commons/text/cases/PascalCase.java b/src/main/java/org/apache/commons/text/cases/PascalCase.java index 42535cf69d..477959ba29 100644 --- a/src/main/java/org/apache/commons/text/cases/PascalCase.java +++ b/src/main/java/org/apache/commons/text/cases/PascalCase.java @@ -105,16 +105,7 @@ public String format(Iterable tokens) { for (int i = 0; i < token.length();) { final int codePoint = token.codePointAt(i); int codePointFormatted = codePoint; - if (i != 0) { - //only need to force lowercase if the letter is uppercase, otherwise just add it - if (Character.isUpperCase(codePoint)) { - codePointFormatted = Character.toLowerCase(codePoint); - if (codePoint == codePointFormatted || !Character.isLowerCase(codePointFormatted)) { - throw new IllegalArgumentException(createExceptionString(codePoint, i, "cannot be mapped to lowercase")); - } - } - } - else { + if (i == 0) { //must uppercase if (!Character.isUpperCase(codePoint)) { codePointFormatted = Character.toUpperCase(codePoint); @@ -122,6 +113,14 @@ public String format(Iterable tokens) { throw new IllegalArgumentException(createExceptionString(codePoint, i, "cannot be mapped to uppercase")); } } + } else { + //only need to force lowercase if the letter is uppercase, otherwise just add it + if (Character.isUpperCase(codePoint)) { + codePointFormatted = Character.toLowerCase(codePoint); + if (codePoint == codePointFormatted || !Character.isLowerCase(codePointFormatted)) { + throw new IllegalArgumentException(createExceptionString(codePoint, i, "cannot be mapped to lowercase")); + } + } } formattedString.appendCodePoint(codePointFormatted); i += Character.charCount(codePoint); From eff05297a797f1c9bf172b2a065182b77fae8af0 Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Tue, 3 Oct 2023 09:10:21 -0400 Subject: [PATCH 18/52] spelling --- src/test/java/org/apache/commons/text/cases/CasesTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/java/org/apache/commons/text/cases/CasesTest.java b/src/test/java/org/apache/commons/text/cases/CasesTest.java index 8787d13aec..b7a96d6d39 100644 --- a/src/test/java/org/apache/commons/text/cases/CasesTest.java +++ b/src/test/java/org/apache/commons/text/cases/CasesTest.java @@ -231,8 +231,8 @@ private void assertFormatAndParse(Case caseInstance, String string, List } /** - * Test Util method for ensuring that a case instance produces the expecting string and tokens - * upon formatting and parsing + * Test Util method for ensuring that a case instance parses and formats the expected string and tokens + * to one another * * @param case Instance the case instance to use * @param string the expected formatted string From b32035f4cb372d052a59d0eb9f25eca9d2f74da4 Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Wed, 4 Oct 2023 09:44:31 -0400 Subject: [PATCH 19/52] grammar --- src/main/java/org/apache/commons/text/cases/DelimitedCase.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/apache/commons/text/cases/DelimitedCase.java b/src/main/java/org/apache/commons/text/cases/DelimitedCase.java index fed48e4976..549ea47537 100644 --- a/src/main/java/org/apache/commons/text/cases/DelimitedCase.java +++ b/src/main/java/org/apache/commons/text/cases/DelimitedCase.java @@ -32,7 +32,7 @@ public class DelimitedCase implements Case { /** delimiters to be used when parsing. */ private Set parseDelimiters; - /** delimited to be used when formatting. */ + /** delimiter to be used when formatting. */ private String formatDelimiter; /** From 0280d333beb29871305b6baf8db494f89a69a6bb Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Thu, 19 Oct 2023 09:29:10 -0400 Subject: [PATCH 20/52] Improved implementation to better handle unicode cases --- .../apache/commons/text/cases/CamelCase.java | 121 +----------- .../apache/commons/text/cases/CasesUtils.java | 61 +++++++ .../apache/commons/text/cases/PascalCase.java | 106 +---------- .../text/cases/UpperCaseDelimitedCase.java | 116 ++++++++++++ .../apache/commons/text/cases/CasesTest.java | 172 +++++++++--------- 5 files changed, 273 insertions(+), 303 deletions(-) create mode 100644 src/main/java/org/apache/commons/text/cases/CasesUtils.java create mode 100644 src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java diff --git a/src/main/java/org/apache/commons/text/cases/CamelCase.java b/src/main/java/org/apache/commons/text/cases/CamelCase.java index 4ab601e5c0..d91bd95085 100644 --- a/src/main/java/org/apache/commons/text/cases/CamelCase.java +++ b/src/main/java/org/apache/commons/text/cases/CamelCase.java @@ -16,20 +16,15 @@ */ package org.apache.commons.text.cases; -import java.util.ArrayList; -import java.util.List; - /** * Case implementation that parses and formats strings of the form 'myCamelCase' *

- * This case separates tokens on uppercase Unicode letter characters, according to the logic in {@link java.lang.Character#toUpperCase} - * and {@link java.lang.Character#toLowerCase} which should following the mapping present in - * the Unicode data file. - * Each token begins with an - * uppercase unicode letter, except the first token, which begins with a lowercase unicode letter character. + * CamelCase is a case where tokens are delimited by upper case unicode characters. The very first + * token should begin with lower or non cased character, and any subsequent tokens begin with an + * upper case character. All remaining characters will be lower cased or non cased. *

*/ -public final class CamelCase implements Case { +public final class CamelCase extends UpperCaseDelimitedCase { /** constant reusable instance of this case. */ public static final CamelCase INSTANCE = new CamelCase(); @@ -38,113 +33,7 @@ public final class CamelCase implements Case { * Constructs new CamelCase instance. */ private CamelCase() { - super(); - } - - /** - * Parses string tokens from a Camel Case formatted string. - *

- * Parses each character of the string parameter and creates new tokens when uppercase Unicode - * letters are encountered. The uppercase letter is considered part of the new token. The very - * first character of the string is an exception to this rule and must be a lowercase Unicode - * letter. This method places no other restrictions on the content of the string.
- * Note: This method should never produce empty tokens. - *

- * @param string camel case formatted string to parse - * @return list of tokens parsed from the string - * @throws IllegalArgumentException if the string does not begin with a Unicode lowercase letter character - */ - @Override - public List parse(String string) { - List tokens = new ArrayList<>(); - if (string.length() == 0) { - return tokens; - } - if (!Character.isLowerCase(string.codePointAt(0))) { - throw new IllegalArgumentException(createExceptionString(string.codePointAt(0), 0, "must be a Unicode lowercase letter")); - } - int strLen = string.length(); - int[] tokenCodePoints = new int[strLen]; - int tokenCodePointsOffset = 0; - for (int i = 0; i < string.length();) { - final int codePoint = string.codePointAt(i); - if (Character.isUpperCase(codePoint)) { - if (tokenCodePointsOffset > 0) { - tokens.add(new String(tokenCodePoints, 0, tokenCodePointsOffset)); - tokenCodePoints = new int[strLen]; - tokenCodePointsOffset = 0; - } - tokenCodePoints[tokenCodePointsOffset++] = codePoint; - i += Character.charCount(codePoint); - } else { - tokenCodePoints[tokenCodePointsOffset++] = codePoint; - i += Character.charCount(codePoint); - } - } - tokens.add(new String(tokenCodePoints, 0, tokenCodePointsOffset)); - return tokens; + super(true); } - /** - * Formats tokens into a Camel Case string. - *

- * Iterates over tokens and creates a camel case formatted string. Each token must begin with a - * Unicode lower/upper cased letter, which will be converted to uppercase in the output, except for the very first token, - * which will have a lowercase first character. The remaining characters in all tokens will be - * converted to lowercase. This Case does not support empty tokens. - * No other restrictions are placed on token contents. - *

- * @param tokens string tokens to format into camel case - * @return camel case formatted string - * @throws IllegalArgumentException if any tokens are empty String or do not begin with Unicode upper/lower letter characters - */ - @Override - public String format(Iterable tokens) { - StringBuilder formattedString = new StringBuilder(); - int tokenIndex = 0; - for (String token : tokens) { - if (token.length() == 0) { - throw new IllegalArgumentException("Unsupported empty token at index " + tokenIndex); - } - for (int i = 0; i < token.length();) { - final int codePoint = token.codePointAt(i); - int codePointFormatted = codePoint; - if (i == 0) { - if (tokenIndex == 0) { - //token must be lowercase or lowercaseable - if (!Character.isLowerCase(codePoint)) { - codePointFormatted = Character.toLowerCase(codePoint); - if (codePoint == codePointFormatted) { - throw new IllegalArgumentException(createExceptionString(codePoint, i, "cannot be mapped to lowercase")); - } - } - } else { - //token must be uppercase or uppercaseable - if (!Character.isUpperCase(codePoint)) { - codePointFormatted = Character.toUpperCase(codePoint); - if (codePoint == codePointFormatted || !Character.isUpperCase(codePointFormatted)) { - throw new IllegalArgumentException(createExceptionString(codePoint, i, "cannot be mapped to uppercase")); - } - } - } - } else { - //only need to force lowercase if the letter is uppercase, otherwise just add it - if (Character.isUpperCase(codePoint)) { - codePointFormatted = Character.toLowerCase(codePoint); - if (codePoint == codePointFormatted) { - throw new IllegalArgumentException(createExceptionString(codePoint, i, "cannot be mapped to lowercase")); - } - } - } - formattedString.appendCodePoint(codePointFormatted); - i += Character.charCount(codePoint); - } - tokenIndex++; - } - return formattedString.toString(); - } - - private static String createExceptionString(int codePoint, int index, String suffix) { - return "Character '" + new String(new int[] {codePoint}, 0, 1) + "' with code point " + codePoint + " at index " + index + " " + suffix; - } } diff --git a/src/main/java/org/apache/commons/text/cases/CasesUtils.java b/src/main/java/org/apache/commons/text/cases/CasesUtils.java new file mode 100644 index 0000000000..a79d922857 --- /dev/null +++ b/src/main/java/org/apache/commons/text/cases/CasesUtils.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.cases; + +public class CasesUtils { + + /** + * Transforms a unicode code point into upper case using {@link java.lang.Character#toUpperCase} and confirms the + * result is upper case. + * @param codePoint + * @return the transformed code point + * @throws IllegalArgumentException if the converted code point cannot be mapped into an upper case character + */ + protected static int toUpperCase(int codePoint) { + int codePointFormatted = Character.toUpperCase(codePoint); + if (!Character.isUpperCase(codePointFormatted)) { + throw new IllegalArgumentException(createExceptionMessage(codePoint, " cannot be mapped to upper case")); + } + return codePointFormatted; + } + + /** + * Transforms a unicode code point into lower case using {@link java.lang.Character#toLowerCase} and confirms the + * result is lower case. + * @param codePoint the code point to transform + * @return the lower case code point that corresponds to the input parameter + * @throws IllegalArgumentException if the converted code point cannot be mapped into a lower case character + */ + protected static int toLowerCase(int codePoint) { + int codePointFormatted = Character.toLowerCase(codePoint); + if (!Character.isLowerCase(codePointFormatted)) { + throw new IllegalArgumentException(createExceptionMessage(codePoint, " cannot be mapped to lower case")); + } + return codePointFormatted; + } + + /** + * Creates an exception message that displays the unicode character as well as the hex value for clarity. + * @param codePoint the unicode code point + * @param suffix a string suffix for the message + * @return the message + */ + protected static String createExceptionMessage(int codePoint, String suffix) { + return "Character '" + new String(new int[] { codePoint }, 0, 1) + "' with value 0x" + Integer.toHexString(codePoint) + suffix; + } + +} diff --git a/src/main/java/org/apache/commons/text/cases/PascalCase.java b/src/main/java/org/apache/commons/text/cases/PascalCase.java index 477959ba29..96dd7b2a5c 100644 --- a/src/main/java/org/apache/commons/text/cases/PascalCase.java +++ b/src/main/java/org/apache/commons/text/cases/PascalCase.java @@ -16,18 +16,14 @@ */ package org.apache.commons.text.cases; -import java.util.ArrayList; -import java.util.List; - /** * Case implementation which parses and formats strings of the form 'MyPascalString' *

- * PascalCase is a case where tokens are delimited by uppercase ASCII characters. Each parsed token - * must begin with an uppercase character, but the case of the remaining token characters is - * ignored and returned as-is. + * PascalCase is a case where tokens are delimited by upper case unicode characters. Each parsed token + * begins with an upper case character, and remaining token characters are either lower case or non cased. *

*/ -public final class PascalCase implements Case { +public final class PascalCase extends UpperCaseDelimitedCase { /** constant reusable instance of this case. */ public static final PascalCase INSTANCE = new PascalCase(); @@ -36,101 +32,7 @@ public final class PascalCase implements Case { * Constructs a new PascalCase instance. */ private PascalCase() { + super(false); } - /** - * Parses a PascalCase string into tokens. - *

- * String characters are iterated over and any time an upper case ASCII character is - * encountered, that character is considered to be the start of a new token, with the character - * itself included in the token. This method should never return empty tokens. The first - * character of the string must be an uppercase ASCII character. No further restrictions are - * placed on string contents. - *

- * @param string The Pascal Cased string to parse - * @return the list of tokens found in the string - * @throws IllegalArgumentException if the string does not begin with an uppercase ASCII alpha character - */ - @Override - public List parse(String string) { - List tokens = new ArrayList<>(); - if (string.length() == 0) { - return tokens; - } - if (!Character.isUpperCase(string.codePointAt(0))) { - throw new IllegalArgumentException(createExceptionString(string.codePointAt(0), 0, "must be a Unicode uppercase letter")); - } - int strLen = string.length(); - int[] tokenCodePoints = new int[strLen]; - int tokenCodePointsOffset = 0; - for (int i = 0; i < string.length();) { - final int codePoint = string.codePointAt(i); - if (Character.isUpperCase(codePoint)) { - if (tokenCodePointsOffset > 0) { - tokens.add(new String(tokenCodePoints, 0, tokenCodePointsOffset)); - tokenCodePoints = new int[strLen]; - tokenCodePointsOffset = 0; - } - tokenCodePoints[tokenCodePointsOffset++] = codePoint; - i += Character.charCount(codePoint); - } else { - tokenCodePoints[tokenCodePointsOffset++] = codePoint; - i += Character.charCount(codePoint); - } - } - tokens.add(new String(tokenCodePoints, 0, tokenCodePointsOffset)); - return tokens; - } - - /** - * Formats string tokens into a Pascal Case string. - *

- * Iterates the tokens and formats each one into a Pascal Case token. The first character of - * the token must be an ASCII alpha character. This character is forced upper case in the - * output. The remaining alpha characters of the token are forced lowercase. Any other - * characters in the token are returned as-is. Empty tokens are not supported. - *

- * @param tokens The string tokens to be formatted into Pascal Case - * @return the Pascal Case formatted string - * @throws IllegalArgumentException if any token is empty String or does not begin with an ASCII alpha character - */ - @Override - public String format(Iterable tokens) { - StringBuilder formattedString = new StringBuilder(); - int tokenIndex = 0; - for (String token : tokens) { - if (token.length() == 0) { - throw new IllegalArgumentException("Unsupported empty token at index " + tokenIndex); - } - for (int i = 0; i < token.length();) { - final int codePoint = token.codePointAt(i); - int codePointFormatted = codePoint; - if (i == 0) { - //must uppercase - if (!Character.isUpperCase(codePoint)) { - codePointFormatted = Character.toUpperCase(codePoint); - if (codePoint == codePointFormatted || !Character.isUpperCase(codePointFormatted)) { - throw new IllegalArgumentException(createExceptionString(codePoint, i, "cannot be mapped to uppercase")); - } - } - } else { - //only need to force lowercase if the letter is uppercase, otherwise just add it - if (Character.isUpperCase(codePoint)) { - codePointFormatted = Character.toLowerCase(codePoint); - if (codePoint == codePointFormatted || !Character.isLowerCase(codePointFormatted)) { - throw new IllegalArgumentException(createExceptionString(codePoint, i, "cannot be mapped to lowercase")); - } - } - } - formattedString.appendCodePoint(codePointFormatted); - i += Character.charCount(codePoint); - } - tokenIndex++; - } - return formattedString.toString(); - } - - private static String createExceptionString(int codePoint, int index, String suffix) { - return "Character '" + new String(new int[] {codePoint}, 0, 1) + "' with code point " + codePoint + " at index " + index + " " + suffix; - } } diff --git a/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java b/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java new file mode 100644 index 0000000000..48503b1d26 --- /dev/null +++ b/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.cases; + +import java.util.ArrayList; +import java.util.List; + +/** + * Case implementation which parses and formats strings where tokens are delimited by upper case characters. + */ +public class UpperCaseDelimitedCase implements Case { + + /** flag to indicate whether the first character of the first token should be upper cased. */ + boolean upperCaseFirstCharacter = false; + + /** + * Constructs a new UpperCaseDelimitedCase instance. + */ + protected UpperCaseDelimitedCase(boolean upperCaseFirstCharacter) { + this.upperCaseFirstCharacter = upperCaseFirstCharacter; + } + + /** + * Parses a string into tokens. + *

+ * String characters are iterated over and when an upper case unicode character is + * encountered, that character is considered to be the start of a new token, with the character + * itself included in the token. This method will never return empty tokens. + *

+ * @param string the string to parse + * @return the list of tokens found in the string + */ + @Override + public List parse(String string) { + List tokens = new ArrayList<>(); + if (string.length() == 0) { + return tokens; + } + int strLen = string.length(); + int[] tokenCodePoints = new int[strLen]; + int tokenCodePointsOffset = 0; + for (int i = 0; i < string.length();) { + final int codePoint = string.codePointAt(i); + if (Character.isUpperCase(codePoint)) { + if (tokenCodePointsOffset > 0) { + tokens.add(new String(tokenCodePoints, 0, tokenCodePointsOffset)); + tokenCodePoints = new int[strLen]; + tokenCodePointsOffset = 0; + } + tokenCodePoints[tokenCodePointsOffset++] = codePoint; + i += Character.charCount(codePoint); + } else { + tokenCodePoints[tokenCodePointsOffset++] = codePoint; + i += Character.charCount(codePoint); + } + } + tokens.add(new String(tokenCodePoints, 0, tokenCodePointsOffset)); + return tokens; + } + + /** + * Formats string tokens into a single string where each token begins with an upper case + * character, followed by lower case or non cased characters. + *

+ * Iterates the tokens and formats each one into a token where the first character of the token + * is forced upper case in the output. The remaining characters of the token will be lower case + * or non cased. Conversions to lower case are attempted and any conversion that is not possible + * throws an exception. Any other characters in the token are returned as-is. Empty tokens are + * not supported and will cause an exception to be thrown. + *

+ * @param tokens The string tokens to be formatted + * @return the formatted string + * @throws IllegalArgumentException if 1) any token is empty 2) any token begins with a + * character that cannot be upper cased, or 3) any token contains an upper or title case + * character that cannot be converted to lower case. + */ + @Override + public String format(Iterable tokens) { + StringBuilder formattedString = new StringBuilder(); + int tokenIndex = 0; + for (String token : tokens) { + if (token.length() == 0) { + throw new IllegalArgumentException("Unsupported empty token at index " + tokenIndex); + } + for (int i = 0; i < token.length();) { + final int codePoint = token.codePointAt(i); + int codePointFormatted = codePoint; + if (i == 0 && (!upperCaseFirstCharacter || tokenIndex > 0)) { + codePointFormatted = CasesUtils.toUpperCase(codePoint); + } else if (Character.isUpperCase(codePointFormatted) || Character.isTitleCase(codePointFormatted)) { + //if character is title or upper case, it must be converted to lower + codePointFormatted = CasesUtils.toLowerCase(codePoint); + } + formattedString.appendCodePoint(codePointFormatted); + i += Character.charCount(codePoint); + } + tokenIndex++; + } + return formattedString.toString(); + } + +} diff --git a/src/test/java/org/apache/commons/text/cases/CasesTest.java b/src/test/java/org/apache/commons/text/cases/CasesTest.java index b7a96d6d39..3617a3437a 100644 --- a/src/test/java/org/apache/commons/text/cases/CasesTest.java +++ b/src/test/java/org/apache/commons/text/cases/CasesTest.java @@ -48,10 +48,12 @@ public void testUtf32() { Arrays.asList("\uD800\uDF00", "\uD800\uDF01\uD800\uDF14", "\uD800\uDF02\uD800\uDF03")); assertFormatAndParse(SnakeCase.INSTANCE, "\uD800\uDF00_\uD800\uDF01\uD800\uDF14_\uD800\uDF02\uD800\uDF03", Arrays.asList("\uD800\uDF00", "\uD800\uDF01\uD800\uDF14", "\uD800\uDF02\uD800\uDF03")); + assertFormatAndParse(PascalCase.INSTANCE, "A\uD800\uDF00B\uD800\uDF01\uD800\uDF14C\uD800\uDF02\uD800\uDF03", Arrays.asList("A\uD800\uDF00", "B\uD800\uDF01\uD800\uDF14", "C\uD800\uDF02\uD800\uDF03")); assertFormatAndParse(CamelCase.INSTANCE, "a\uD800\uDF00B\uD800\uDF01\uD800\uDF14C\uD800\uDF02\uD800\uDF03", Arrays.asList("a\uD800\uDF00", "B\uD800\uDF01\uD800\uDF14", "C\uD800\uDF02\uD800\uDF03")); + } @Test @@ -67,13 +69,31 @@ public void testPascalCase() { assertFormatAndParse(PascalCase.INSTANCE, "MyVarName", Arrays.asList("My", "Var", "Name")); assertFormatAndParse(PascalCase.INSTANCE, "MyTokensA1D", Arrays.asList("My", "Tokens", "A1", "D")); assertFormatAndParse(PascalCase.INSTANCE, "", Arrays.asList()); + assertParse(PascalCase.INSTANCE, "lowerFirst", Arrays.asList("lower", "First")); + assertFormat(PascalCase.INSTANCE, "LowerFirst", Arrays.asList("lower", "First")); - // first character must be ASCII alpha upper - Assertions.assertThrows(IllegalArgumentException.class, () -> PascalCase.INSTANCE.parse("lowerFirst")); Assertions.assertThrows(IllegalArgumentException.class, () -> PascalCase.INSTANCE.format(Arrays.asList("1"))); + Assertions.assertThrows(IllegalArgumentException.class, () -> PascalCase.INSTANCE.format(Arrays.asList("a1", "2c"))); Assertions.assertThrows(IllegalArgumentException.class, () -> PascalCase.INSTANCE.format(Arrays.asList(""))); } + @Test + public void testNumberLetters() { + + // roman numerals - have an upper/lower case but are numbers + + assertFormatAndParse(PascalCase.INSTANCE, "A\u2170\u2160c", Arrays.asList("A\u2170", "\u2160c")); + + assertFormat(PascalCase.INSTANCE, "A\u2170Bc", Arrays.asList("a\u2160", "bc")); + assertParse(PascalCase.INSTANCE, "A\u2170Bc", Arrays.asList("A\u2170", "Bc")); + assertFormat(PascalCase.INSTANCE, "A\u2170", Arrays.asList("a\u2170")); + assertParse(PascalCase.INSTANCE, "A\u2170Bc", Arrays.asList("A\u2170", "Bc")); + + assertFormat(CamelCase.INSTANCE, "a\u2170Bc", Arrays.asList("a\u2160", "bc")); + assertParse(CamelCase.INSTANCE, "\u2160Bc", Arrays.asList("\u2160", "Bc")); + + } + @Test public void testCamelCase() { @@ -81,12 +101,13 @@ public void testCamelCase() { assertFormatAndParse(CamelCase.INSTANCE, "myTokensAbc123", Arrays.asList("my", "Tokens", "Abc123")); assertFormatAndParse(CamelCase.INSTANCE, "specChar-Token+", Arrays.asList("spec", "Char-", "Token+")); + assertParse(CamelCase.INSTANCE, "MyTokens", Arrays.asList("My", "Tokens")); + assertFormat(CamelCase.INSTANCE, "myTokens", Arrays.asList("My", "Tokens")); + // empty token not supported Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.format(Arrays.asList("a", "b", ""))); - // must begin with ASCII alpha + // must begin with character that can be uppercased Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.format(Arrays.asList("a", "1b"))); - // must begin with ASCII alpha lower - Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.parse("MyTokens")); } @Test @@ -134,96 +155,50 @@ public void testEmptyTokens() { Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.format(tokens)); } - @Test - public void testUnicodeUncaseableLetter() { - // LATIN SMALL LETTER SHARP S - Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.format(Arrays.asList("uncaseable", "\u00DFabc", "token"))); - Assertions.assertThrows(IllegalArgumentException.class, () -> PascalCase.INSTANCE.format(Arrays.asList("uncaseable", "\u00DFabc", "token"))); - // LATIN SMALL LETTER KRA - Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.format(Arrays.asList("uncaseable", "\u0138abc", "token"))); - Assertions.assertThrows(IllegalArgumentException.class, () -> PascalCase.INSTANCE.format(Arrays.asList("uncaseable", "\u0138abc", "token"))); - - } - - /** - * This method iterates through all unicode characters and confirms: 1. If the character is - * uppercase or lowercase, that it can be parsed successfully 2. If the character can be - * converted to the opposite case, that this conversion can be parsed successfully 3. If the - * opposite case character can be converted BACK to the initial character, that this conversion - * can be parsed successfully - */ @Test - public void testUnicodeCasing() { + public void testUnicodeCases() { - for (int i = 0; i < Character.MAX_CODE_POINT; i++) { + // LATIN SMALL LETTER SHARP S - lower case, no upper case + Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.format(Arrays.asList("a", "\u00DFabc"))); + Assertions.assertThrows(IllegalArgumentException.class, () -> PascalCase.INSTANCE.format(Arrays.asList("\u00DFabc"))); - // this if block weeds out titlecase characters - if (Character.isLowerCase(i) || Character.isUpperCase(i)) { + // LATIN CAPITAL LETTER L WITH SMALL LETTER J - title case, has upper and lower + assertFormatAndParse(CamelCase.INSTANCE, "\u01CCbc", Arrays.asList("\u01CBbc"), true); + assertFormatAndParse(CamelCase.INSTANCE, "a\u01CAbc", Arrays.asList("a", "\u01CBbc"), true); - // if char is lowercase - boolean lower = Character.isLowerCase(i); + // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI - title case , no upper case + assertFormatAndParse(PascalCase.INSTANCE, "A\u1f80", Arrays.asList("a\u1f88"), true); + assertFormatAndParse(CamelCase.INSTANCE, "\u1f80", Arrays.asList("\u1f88"), true); + Assertions.assertThrows(IllegalArgumentException.class, () -> PascalCase.INSTANCE.format(Arrays.asList("\u1f88"))); - // find the lowercase code point, if it exists - int lowerCode = lower ? i : Character.toLowerCase(i); - if (!Character.isLowerCase(lowerCode)) { - lowerCode = i; - } - - // find the uppercase code point, if it exists - int upperCode = !lower ? i : Character.toUpperCase(i); - if (!Character.isUpperCase(upperCode)) { - upperCode = i; - } - - // if char has a valid conversion - boolean canConvert = lowerCode != upperCode; - - // if opposite case converts back to original char - // this is sometimes false when an uppercase character has a many-to-one - // relationship with lower cases, and thus there is no single upper-to-lower - // conversion that be selected - boolean canReverse = false; - if (canConvert) { - if (lower) { - canReverse = Character.toLowerCase(upperCode) == lowerCode; - } else { - canReverse = Character.toUpperCase(lowerCode) == upperCode; - } + //scan all titlecase characters + for (int i = 0; i < Character.MAX_CODE_POINT; i++) { + if (Character.isTitleCase(i)) { + String codeString = new String(new int[] { i }, 0, 1); + + int upperCode = Character.toUpperCase(i); + int lowerCode = Character.toLowerCase(i); + + //if upper exists, ensure it gets upper cased to it + if (upperCode != i) { + String upperCodeString = new String(new int[] { upperCode }, 0, 1); + Assertions.assertEquals(PascalCase.INSTANCE.format(Arrays.asList(codeString + "bc")), upperCodeString + "bc"); + } else { + // if there is no uppercase value + Assertions.assertThrows(IllegalArgumentException.class, () -> PascalCase.INSTANCE.format(Arrays.asList(codeString))); } - String lowerCodeString = new String(new int[] { lowerCode }, 0, 1); - String upperCodeString = new String(new int[] { upperCode }, 0, 1); - - // confirm the token can be handled by format and parse methods - String camelCaseString = lower ? lowerCodeString : "a" + upperCodeString; - List camelTokens = lower ? Arrays.asList(lowerCodeString) : Arrays.asList("a", upperCodeString); - assertFormatAndParse(CamelCase.INSTANCE, camelCaseString, camelTokens); - - String pascalCaseString = lower ? "A" + lowerCodeString : upperCodeString; - List pascalTokens = lower ? Arrays.asList("A" + lowerCodeString) : Arrays.asList(upperCodeString); - assertFormatAndParse(PascalCase.INSTANCE, pascalCaseString, pascalTokens); - - if (canConvert) { - - if (lower || canReverse) { - // confirm we can convert lower to upper or reverse - Assertions.assertEquals(upperCodeString, PascalCase.INSTANCE.format(Arrays.asList(lowerCodeString))); - Assertions.assertEquals("a" + upperCodeString, CamelCase.INSTANCE.format(Arrays.asList("a", lowerCodeString))); - } - if (!lower || canReverse) { - // confirm we can convert upper to lower or reverse - Assertions.assertEquals("A" + lowerCodeString, PascalCase.INSTANCE.format(Arrays.asList("A" + upperCodeString))); - Assertions.assertEquals(lowerCodeString, CamelCase.INSTANCE.format(Arrays.asList(upperCodeString))); - } - + //if lower exists, ensure it gets lower cased to it + if (lowerCode != i) { + String lowerCodeString = new String(new int[] { lowerCode }, 0, 1); + Assertions.assertEquals(CamelCase.INSTANCE.format(Arrays.asList(codeString + "bc")), lowerCodeString + "bc"); + } else { + Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.format(Arrays.asList("a" + codeString))); } - } - } - } private void assertFormatAndParse(Case caseInstance, String string, List tokens) { @@ -240,14 +215,41 @@ private void assertFormatAndParse(Case caseInstance, String string, List * @param caseInsensitive whether to not to validate tokens case insensitively */ private void assertFormatAndParse(Case caseInstance, String string, List tokens, Boolean caseInsensitive) { + assertFormat(caseInstance, string, tokens, caseInsensitive); + assertParse(caseInstance, string, tokens, caseInsensitive); + } + + private void assertFormat(Case caseInstance, String string, List tokens) { + assertFormat(caseInstance, string, tokens, false); + } + + private void assertFormat(Case caseInstance, String string, List tokens, boolean caseInsensitive) { + String formatted = caseInstance.format(tokens); + if (caseInsensitive) { + Assertions.assertEquals(string.toLowerCase(), formatted.toLowerCase()); + } else { + Assertions.assertEquals(string, formatted); + } + } + + private void assertParse(Case caseInstance, String string, List tokens) { + assertParse(caseInstance, string, tokens, false); + } + + /** + * Asserts that string parses into the expected tokens, ignoring case if the caseInsensitive parameter is true + * @param caseInstance + * @param string + * @param tokens + * @param caseInsensitive + */ + private void assertParse(Case caseInstance, String string, List tokens, Boolean caseInsensitive) { List parsedTokens = caseInstance.parse(string); if (caseInsensitive) { assertEqualsIgnoreCase(tokens, parsedTokens); } else { Assertions.assertEquals(tokens, parsedTokens); } - String formatted = caseInstance.format(tokens); - Assertions.assertEquals(string, formatted); } private void assertEqualsIgnoreCase(List expected, List actual) { From 52e64416ac49089aa2e476b73904f1a42179c50b Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Thu, 19 Oct 2023 09:48:49 -0400 Subject: [PATCH 21/52] renamed class for clarity --- ...{DelimitedCase.java => CharacterDelimitedCase.java} | 6 +++--- .../java/org/apache/commons/text/cases/KebabCase.java | 2 +- .../java/org/apache/commons/text/cases/SnakeCase.java | 2 +- .../java/org/apache/commons/text/cases/CasesTest.java | 10 ++++++---- 4 files changed, 11 insertions(+), 9 deletions(-) rename src/main/java/org/apache/commons/text/cases/{DelimitedCase.java => CharacterDelimitedCase.java} (96%) diff --git a/src/main/java/org/apache/commons/text/cases/DelimitedCase.java b/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java similarity index 96% rename from src/main/java/org/apache/commons/text/cases/DelimitedCase.java rename to src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java index 549ea47537..7f7fe51c6f 100644 --- a/src/main/java/org/apache/commons/text/cases/DelimitedCase.java +++ b/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java @@ -27,7 +27,7 @@ * DelimitedCase is a case in which the true alphabetic case of the characters is ignored by default * and tokens themselves are determined by the presence of a delimiter between each token. */ -public class DelimitedCase implements Case { +public class CharacterDelimitedCase implements Case { /** delimiters to be used when parsing. */ private Set parseDelimiters; @@ -39,7 +39,7 @@ public class DelimitedCase implements Case { * Constructs a new Delimited Case. * @param delimiter the character to use as both the parse and format delimiter */ - protected DelimitedCase(char delimiter) { + protected CharacterDelimitedCase(char delimiter) { this(new char[] { delimiter }, CharUtils.toString(delimiter)); } @@ -48,7 +48,7 @@ protected DelimitedCase(char delimiter) { * @param parseDelimiters the array of delimiters to use when parsing * @param formatDelimiter the delimiter to use when formatting */ - protected DelimitedCase(char[] parseDelimiters, String formatDelimiter) { + protected CharacterDelimitedCase(char[] parseDelimiters, String formatDelimiter) { super(); if (parseDelimiters == null || parseDelimiters.length == 0) { throw new IllegalArgumentException("Parse Delimiters cannot be null or empty"); diff --git a/src/main/java/org/apache/commons/text/cases/KebabCase.java b/src/main/java/org/apache/commons/text/cases/KebabCase.java index 3ba58f5105..cee209f301 100644 --- a/src/main/java/org/apache/commons/text/cases/KebabCase.java +++ b/src/main/java/org/apache/commons/text/cases/KebabCase.java @@ -22,7 +22,7 @@ * KebabCase is a delimited case where the delimiter is a hyphen character '-'. *

*/ -public final class KebabCase extends DelimitedCase { +public final class KebabCase extends CharacterDelimitedCase { /** constant for delimiter. */ private static final char DELIMITER = '-'; diff --git a/src/main/java/org/apache/commons/text/cases/SnakeCase.java b/src/main/java/org/apache/commons/text/cases/SnakeCase.java index c253c98ce7..4a33e2dce8 100644 --- a/src/main/java/org/apache/commons/text/cases/SnakeCase.java +++ b/src/main/java/org/apache/commons/text/cases/SnakeCase.java @@ -22,7 +22,7 @@ * SnakeCase is a delimited case where the delimiter is the underscore character '_'. *

*/ -public final class SnakeCase extends DelimitedCase { +public final class SnakeCase extends CharacterDelimitedCase { /** constant for delimiter. */ private static final char DELIMITER = '_'; diff --git a/src/test/java/org/apache/commons/text/cases/CasesTest.java b/src/test/java/org/apache/commons/text/cases/CasesTest.java index 3617a3437a..cdf73b08b9 100644 --- a/src/test/java/org/apache/commons/text/cases/CasesTest.java +++ b/src/test/java/org/apache/commons/text/cases/CasesTest.java @@ -29,10 +29,10 @@ public class CasesTest { public void testDelimiterCharacterException() { Assertions.assertThrows(IllegalArgumentException.class, () -> KebabCase.INSTANCE.format(Arrays.asList("a", "-"))); Assertions.assertThrows(IllegalArgumentException.class, () -> SnakeCase.INSTANCE.format(Arrays.asList("a", "_"))); - Assertions.assertThrows(IllegalArgumentException.class, () -> new DelimitedCase(null, ",")); - Assertions.assertThrows(IllegalArgumentException.class, () -> new DelimitedCase(new char[1], null)); - Assertions.assertThrows(IllegalArgumentException.class, () -> new DelimitedCase(new char[0], ",")); - Assertions.assertThrows(IllegalArgumentException.class, () -> new DelimitedCase(new char[0], "")); + Assertions.assertThrows(IllegalArgumentException.class, () -> new CharacterDelimitedCase(null, ",")); + Assertions.assertThrows(IllegalArgumentException.class, () -> new CharacterDelimitedCase(new char[1], null)); + Assertions.assertThrows(IllegalArgumentException.class, () -> new CharacterDelimitedCase(new char[0], ",")); + Assertions.assertThrows(IllegalArgumentException.class, () -> new CharacterDelimitedCase(new char[0], "")); } @Test @@ -74,6 +74,7 @@ public void testPascalCase() { Assertions.assertThrows(IllegalArgumentException.class, () -> PascalCase.INSTANCE.format(Arrays.asList("1"))); Assertions.assertThrows(IllegalArgumentException.class, () -> PascalCase.INSTANCE.format(Arrays.asList("a1", "2c"))); + Assertions.assertThrows(IllegalArgumentException.class, () -> PascalCase.INSTANCE.format(Arrays.asList("1a"))); Assertions.assertThrows(IllegalArgumentException.class, () -> PascalCase.INSTANCE.format(Arrays.asList(""))); } @@ -108,6 +109,7 @@ public void testCamelCase() { Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.format(Arrays.asList("a", "b", ""))); // must begin with character that can be uppercased Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.format(Arrays.asList("a", "1b"))); + Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.format(Arrays.asList("1a"))); } @Test From 23912de581fb1f11d81fa327a646833ab69ddfb4 Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Thu, 19 Oct 2023 09:49:03 -0400 Subject: [PATCH 22/52] javadoc comment --- src/main/java/org/apache/commons/text/cases/CasesUtils.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/main/java/org/apache/commons/text/cases/CasesUtils.java b/src/main/java/org/apache/commons/text/cases/CasesUtils.java index a79d922857..011ec3cba0 100644 --- a/src/main/java/org/apache/commons/text/cases/CasesUtils.java +++ b/src/main/java/org/apache/commons/text/cases/CasesUtils.java @@ -16,6 +16,10 @@ */ package org.apache.commons.text.cases; +/** + * Util class for {@link org.apache.commons.text.cases.Case} implementations to force case + * conversion and throw consistent and clear exceptions. + */ public class CasesUtils { /** From 670f84ed7eb7d277c7dfd5325ab9ad1be0e50eae Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Thu, 19 Oct 2023 09:50:00 -0400 Subject: [PATCH 23/52] renamed variable for clarity --- .../text/cases/UpperCaseDelimitedCase.java | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java b/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java index 48503b1d26..ac47d126de 100644 --- a/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java +++ b/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java @@ -25,13 +25,13 @@ public class UpperCaseDelimitedCase implements Case { /** flag to indicate whether the first character of the first token should be upper cased. */ - boolean upperCaseFirstCharacter = false; + boolean lowerCaseFirstCharacter = false; /** * Constructs a new UpperCaseDelimitedCase instance. */ - protected UpperCaseDelimitedCase(boolean upperCaseFirstCharacter) { - this.upperCaseFirstCharacter = upperCaseFirstCharacter; + protected UpperCaseDelimitedCase(boolean lowerCaseFirstCharacter) { + this.lowerCaseFirstCharacter = lowerCaseFirstCharacter; } /** @@ -50,6 +50,11 @@ public List parse(String string) { if (string.length() == 0) { return tokens; } + if (lowerCaseFirstCharacter) { + CasesUtils.toLowerCase(string.codePointAt(0)); + } else { + CasesUtils.toUpperCase(string.codePointAt(0)); + } int strLen = string.length(); int[] tokenCodePoints = new int[strLen]; int tokenCodePointsOffset = 0; @@ -99,7 +104,9 @@ public String format(Iterable tokens) { for (int i = 0; i < token.length();) { final int codePoint = token.codePointAt(i); int codePointFormatted = codePoint; - if (i == 0 && (!upperCaseFirstCharacter || tokenIndex > 0)) { + if (i == 0 && tokenIndex == 0 && lowerCaseFirstCharacter) { + codePointFormatted = CasesUtils.toLowerCase(codePoint); + } else if (i == 0) { codePointFormatted = CasesUtils.toUpperCase(codePoint); } else if (Character.isUpperCase(codePointFormatted) || Character.isTitleCase(codePointFormatted)) { //if character is title or upper case, it must be converted to lower From c24c8023e94b4a1cbd32f19acb54e15110225df2 Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Thu, 19 Oct 2023 10:00:10 -0400 Subject: [PATCH 24/52] Util class no longer needed, can place methods directly in base class --- .../apache/commons/text/cases/CasesUtils.java | 65 ------------------- .../text/cases/UpperCaseDelimitedCase.java | 52 +++++++++++++-- 2 files changed, 46 insertions(+), 71 deletions(-) delete mode 100644 src/main/java/org/apache/commons/text/cases/CasesUtils.java diff --git a/src/main/java/org/apache/commons/text/cases/CasesUtils.java b/src/main/java/org/apache/commons/text/cases/CasesUtils.java deleted file mode 100644 index 011ec3cba0..0000000000 --- a/src/main/java/org/apache/commons/text/cases/CasesUtils.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.text.cases; - -/** - * Util class for {@link org.apache.commons.text.cases.Case} implementations to force case - * conversion and throw consistent and clear exceptions. - */ -public class CasesUtils { - - /** - * Transforms a unicode code point into upper case using {@link java.lang.Character#toUpperCase} and confirms the - * result is upper case. - * @param codePoint - * @return the transformed code point - * @throws IllegalArgumentException if the converted code point cannot be mapped into an upper case character - */ - protected static int toUpperCase(int codePoint) { - int codePointFormatted = Character.toUpperCase(codePoint); - if (!Character.isUpperCase(codePointFormatted)) { - throw new IllegalArgumentException(createExceptionMessage(codePoint, " cannot be mapped to upper case")); - } - return codePointFormatted; - } - - /** - * Transforms a unicode code point into lower case using {@link java.lang.Character#toLowerCase} and confirms the - * result is lower case. - * @param codePoint the code point to transform - * @return the lower case code point that corresponds to the input parameter - * @throws IllegalArgumentException if the converted code point cannot be mapped into a lower case character - */ - protected static int toLowerCase(int codePoint) { - int codePointFormatted = Character.toLowerCase(codePoint); - if (!Character.isLowerCase(codePointFormatted)) { - throw new IllegalArgumentException(createExceptionMessage(codePoint, " cannot be mapped to lower case")); - } - return codePointFormatted; - } - - /** - * Creates an exception message that displays the unicode character as well as the hex value for clarity. - * @param codePoint the unicode code point - * @param suffix a string suffix for the message - * @return the message - */ - protected static String createExceptionMessage(int codePoint, String suffix) { - return "Character '" + new String(new int[] { codePoint }, 0, 1) + "' with value 0x" + Integer.toHexString(codePoint) + suffix; - } - -} diff --git a/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java b/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java index ac47d126de..17b313396e 100644 --- a/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java +++ b/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java @@ -25,7 +25,7 @@ public class UpperCaseDelimitedCase implements Case { /** flag to indicate whether the first character of the first token should be upper cased. */ - boolean lowerCaseFirstCharacter = false; + private boolean lowerCaseFirstCharacter = false; /** * Constructs a new UpperCaseDelimitedCase instance. @@ -51,9 +51,9 @@ public List parse(String string) { return tokens; } if (lowerCaseFirstCharacter) { - CasesUtils.toLowerCase(string.codePointAt(0)); + toLowerCase(string.codePointAt(0)); } else { - CasesUtils.toUpperCase(string.codePointAt(0)); + toUpperCase(string.codePointAt(0)); } int strLen = string.length(); int[] tokenCodePoints = new int[strLen]; @@ -105,12 +105,12 @@ public String format(Iterable tokens) { final int codePoint = token.codePointAt(i); int codePointFormatted = codePoint; if (i == 0 && tokenIndex == 0 && lowerCaseFirstCharacter) { - codePointFormatted = CasesUtils.toLowerCase(codePoint); + codePointFormatted = toLowerCase(codePoint); } else if (i == 0) { - codePointFormatted = CasesUtils.toUpperCase(codePoint); + codePointFormatted = toUpperCase(codePoint); } else if (Character.isUpperCase(codePointFormatted) || Character.isTitleCase(codePointFormatted)) { //if character is title or upper case, it must be converted to lower - codePointFormatted = CasesUtils.toLowerCase(codePoint); + codePointFormatted = toLowerCase(codePoint); } formattedString.appendCodePoint(codePointFormatted); i += Character.charCount(codePoint); @@ -120,4 +120,44 @@ public String format(Iterable tokens) { return formattedString.toString(); } + /** + * Transforms a unicode code point into upper case using {@link java.lang.Character#toUpperCase} and confirms the + * result is upper case. + * @param codePoint + * @return the transformed code point + * @throws IllegalArgumentException if the converted code point cannot be mapped into an upper case character + */ + private static int toUpperCase(int codePoint) { + int codePointFormatted = Character.toUpperCase(codePoint); + if (!Character.isUpperCase(codePointFormatted)) { + throw new IllegalArgumentException(createExceptionMessage(codePoint, " cannot be mapped to upper case")); + } + return codePointFormatted; + } + + /** + * Transforms a unicode code point into lower case using {@link java.lang.Character#toLowerCase} and confirms the + * result is lower case. + * @param codePoint the code point to transform + * @return the lower case code point that corresponds to the input parameter + * @throws IllegalArgumentException if the converted code point cannot be mapped into a lower case character + */ + private static int toLowerCase(int codePoint) { + int codePointFormatted = Character.toLowerCase(codePoint); + if (!Character.isLowerCase(codePointFormatted)) { + throw new IllegalArgumentException(createExceptionMessage(codePoint, " cannot be mapped to lower case")); + } + return codePointFormatted; + } + + /** + * Creates an exception message that displays the unicode character as well as the hex value for clarity. + * @param codePoint the unicode code point + * @param suffix a string suffix for the message + * @return the message + */ + private static String createExceptionMessage(int codePoint, String suffix) { + return "Character '" + new String(new int[] { codePoint }, 0, 1) + "' with value 0x" + Integer.toHexString(codePoint) + suffix; + } + } From 3c1d7a881b863c7e191993ec3ec0f101f975f50e Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Thu, 19 Oct 2023 13:06:34 -0400 Subject: [PATCH 25/52] javadocs --- .../java/org/apache/commons/text/cases/CamelCase.java | 4 ++-- .../commons/text/cases/CharacterDelimitedCase.java | 4 ++-- .../java/org/apache/commons/text/cases/KebabCase.java | 4 ++-- .../java/org/apache/commons/text/cases/PascalCase.java | 4 ++-- .../java/org/apache/commons/text/cases/SnakeCase.java | 4 ++-- .../commons/text/cases/UpperCaseDelimitedCase.java | 10 +++++----- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/apache/commons/text/cases/CamelCase.java b/src/main/java/org/apache/commons/text/cases/CamelCase.java index d91bd95085..acbc6646a6 100644 --- a/src/main/java/org/apache/commons/text/cases/CamelCase.java +++ b/src/main/java/org/apache/commons/text/cases/CamelCase.java @@ -20,13 +20,13 @@ * Case implementation that parses and formats strings of the form 'myCamelCase' *

* CamelCase is a case where tokens are delimited by upper case unicode characters. The very first - * token should begin with lower or non cased character, and any subsequent tokens begin with an + * token should begin with a lower case character, and any subsequent tokens begin with an * upper case character. All remaining characters will be lower cased or non cased. *

*/ public final class CamelCase extends UpperCaseDelimitedCase { - /** constant reusable instance of this case. */ + /** Constant reusable instance of this case. */ public static final CamelCase INSTANCE = new CamelCase(); /** diff --git a/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java b/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java index 7f7fe51c6f..a3ed9d1048 100644 --- a/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java +++ b/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java @@ -29,10 +29,10 @@ */ public class CharacterDelimitedCase implements Case { - /** delimiters to be used when parsing. */ + /** Delimiters to be used when parsing. */ private Set parseDelimiters; - /** delimiter to be used when formatting. */ + /** Delimiter to be used when formatting. */ private String formatDelimiter; /** diff --git a/src/main/java/org/apache/commons/text/cases/KebabCase.java b/src/main/java/org/apache/commons/text/cases/KebabCase.java index cee209f301..91995f5b67 100644 --- a/src/main/java/org/apache/commons/text/cases/KebabCase.java +++ b/src/main/java/org/apache/commons/text/cases/KebabCase.java @@ -24,10 +24,10 @@ */ public final class KebabCase extends CharacterDelimitedCase { - /** constant for delimiter. */ + /** Constant for delimiter. */ private static final char DELIMITER = '-'; - /** constant reusable instance of this case. */ + /** Constant reusable instance of this case. */ public static final KebabCase INSTANCE = new KebabCase(); /** diff --git a/src/main/java/org/apache/commons/text/cases/PascalCase.java b/src/main/java/org/apache/commons/text/cases/PascalCase.java index 96dd7b2a5c..d3fe08190a 100644 --- a/src/main/java/org/apache/commons/text/cases/PascalCase.java +++ b/src/main/java/org/apache/commons/text/cases/PascalCase.java @@ -19,13 +19,13 @@ /** * Case implementation which parses and formats strings of the form 'MyPascalString' *

- * PascalCase is a case where tokens are delimited by upper case unicode characters. Each parsed token + * PascalCase tokens are delimited by upper case unicode characters. Each parsed token * begins with an upper case character, and remaining token characters are either lower case or non cased. *

*/ public final class PascalCase extends UpperCaseDelimitedCase { - /** constant reusable instance of this case. */ + /** Constant reusable instance of this case. */ public static final PascalCase INSTANCE = new PascalCase(); /** diff --git a/src/main/java/org/apache/commons/text/cases/SnakeCase.java b/src/main/java/org/apache/commons/text/cases/SnakeCase.java index 4a33e2dce8..b6e1ae74d3 100644 --- a/src/main/java/org/apache/commons/text/cases/SnakeCase.java +++ b/src/main/java/org/apache/commons/text/cases/SnakeCase.java @@ -24,10 +24,10 @@ */ public final class SnakeCase extends CharacterDelimitedCase { - /** constant for delimiter. */ + /** Constant for delimiter. */ private static final char DELIMITER = '_'; - /** constant reusable instance of this case. */ + /** Constant reusable instance of this case. */ public static final SnakeCase INSTANCE = new SnakeCase(); /** diff --git a/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java b/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java index 17b313396e..17526daaa6 100644 --- a/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java +++ b/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java @@ -24,7 +24,7 @@ */ public class UpperCaseDelimitedCase implements Case { - /** flag to indicate whether the first character of the first token should be upper cased. */ + /** Flag to indicate whether the first character of the first token should be upper cased. */ private boolean lowerCaseFirstCharacter = false; /** @@ -87,7 +87,7 @@ public List parse(String string) { * throws an exception. Any other characters in the token are returned as-is. Empty tokens are * not supported and will cause an exception to be thrown. *

- * @param tokens The string tokens to be formatted + * @param tokens the string tokens to be formatted * @return the formatted string * @throws IllegalArgumentException if 1) any token is empty 2) any token begins with a * character that cannot be upper cased, or 3) any token contains an upper or title case @@ -123,7 +123,7 @@ public String format(Iterable tokens) { /** * Transforms a unicode code point into upper case using {@link java.lang.Character#toUpperCase} and confirms the * result is upper case. - * @param codePoint + * @param codePoint the code point to upper case * @return the transformed code point * @throws IllegalArgumentException if the converted code point cannot be mapped into an upper case character */ @@ -138,7 +138,7 @@ private static int toUpperCase(int codePoint) { /** * Transforms a unicode code point into lower case using {@link java.lang.Character#toLowerCase} and confirms the * result is lower case. - * @param codePoint the code point to transform + * @param codePoint the code point to lower case * @return the lower case code point that corresponds to the input parameter * @throws IllegalArgumentException if the converted code point cannot be mapped into a lower case character */ @@ -152,7 +152,7 @@ private static int toLowerCase(int codePoint) { /** * Creates an exception message that displays the unicode character as well as the hex value for clarity. - * @param codePoint the unicode code point + * @param codePoint the unicode code point to transform * @param suffix a string suffix for the message * @return the message */ From 8cb9291c1372b2d01de28e091210493954123a45 Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Fri, 20 Oct 2023 08:24:30 -0400 Subject: [PATCH 26/52] grammar and spelling --- .../apache/commons/text/cases/CamelCase.java | 4 ++-- .../org/apache/commons/text/cases/Case.java | 4 ++-- .../apache/commons/text/cases/PascalCase.java | 2 +- .../text/cases/UpperCaseDelimitedCase.java | 20 +++++++++---------- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/apache/commons/text/cases/CamelCase.java b/src/main/java/org/apache/commons/text/cases/CamelCase.java index acbc6646a6..583d95c1f0 100644 --- a/src/main/java/org/apache/commons/text/cases/CamelCase.java +++ b/src/main/java/org/apache/commons/text/cases/CamelCase.java @@ -19,9 +19,9 @@ /** * Case implementation that parses and formats strings of the form 'myCamelCase' *

- * CamelCase is a case where tokens are delimited by upper case unicode characters. The very first + * CamelCase is a case where tokens are delimited by upper case Unicode characters. The very first * token should begin with a lower case character, and any subsequent tokens begin with an - * upper case character. All remaining characters will be lower cased or non cased. + * upper case character. All remaining characters will be lower case or non cased. *

*/ public final class CamelCase extends UpperCaseDelimitedCase { diff --git a/src/main/java/org/apache/commons/text/cases/Case.java b/src/main/java/org/apache/commons/text/cases/Case.java index 99b7f9a0ed..5f4f089452 100644 --- a/src/main/java/org/apache/commons/text/cases/Case.java +++ b/src/main/java/org/apache/commons/text/cases/Case.java @@ -19,8 +19,8 @@ import java.util.List; /** - * Handles formatting and parsing tokens to/from a String. For most implementations tokens returned - * by the parse method should abide by any restrictions present in the format method. i.e. calling + * Formats and parses tokens to/from a String. In most implementations tokens returned + * by the parse method abide by any restrictions present in the format method. That is, calling * format() with the results of a call to parse() on the same Case instance should return a * matching String. * diff --git a/src/main/java/org/apache/commons/text/cases/PascalCase.java b/src/main/java/org/apache/commons/text/cases/PascalCase.java index d3fe08190a..3d298cdb37 100644 --- a/src/main/java/org/apache/commons/text/cases/PascalCase.java +++ b/src/main/java/org/apache/commons/text/cases/PascalCase.java @@ -19,7 +19,7 @@ /** * Case implementation which parses and formats strings of the form 'MyPascalString' *

- * PascalCase tokens are delimited by upper case unicode characters. Each parsed token + * PascalCase tokens are delimited by upper case Unicode characters. Each parsed token * begins with an upper case character, and remaining token characters are either lower case or non cased. *

*/ diff --git a/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java b/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java index 17526daaa6..d8764ecd43 100644 --- a/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java +++ b/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java @@ -24,7 +24,7 @@ */ public class UpperCaseDelimitedCase implements Case { - /** Flag to indicate whether the first character of the first token should be upper cased. */ + /** Flag to indicate whether the first character of the first token should be upper case. */ private boolean lowerCaseFirstCharacter = false; /** @@ -37,9 +37,9 @@ protected UpperCaseDelimitedCase(boolean lowerCaseFirstCharacter) { /** * Parses a string into tokens. *

- * String characters are iterated over and when an upper case unicode character is - * encountered, that character is considered to be the start of a new token, with the character - * itself included in the token. This method will never return empty tokens. + * String characters are iterated over and when an upper case Unicode character is + * encountered, that character starts a new token, with the character + * itself included in the token. This method never returns empty tokens. *

* @param string the string to parse * @return the list of tokens found in the string @@ -90,8 +90,8 @@ public List parse(String string) { * @param tokens the string tokens to be formatted * @return the formatted string * @throws IllegalArgumentException if 1) any token is empty 2) any token begins with a - * character that cannot be upper cased, or 3) any token contains an upper or title case - * character that cannot be converted to lower case. + * character that cannot be mapped to upper case, or 3) any token contains an upper or title case + * character that cannot be mapped to lower case. */ @Override public String format(Iterable tokens) { @@ -121,7 +121,7 @@ public String format(Iterable tokens) { } /** - * Transforms a unicode code point into upper case using {@link java.lang.Character#toUpperCase} and confirms the + * Transforms a Unicode code point into upper case using {@link java.lang.Character#toUpperCase} and confirms the * result is upper case. * @param codePoint the code point to upper case * @return the transformed code point @@ -136,7 +136,7 @@ private static int toUpperCase(int codePoint) { } /** - * Transforms a unicode code point into lower case using {@link java.lang.Character#toLowerCase} and confirms the + * Transforms a Unicode code point into lower case using {@link java.lang.Character#toLowerCase} and confirms the * result is lower case. * @param codePoint the code point to lower case * @return the lower case code point that corresponds to the input parameter @@ -151,8 +151,8 @@ private static int toLowerCase(int codePoint) { } /** - * Creates an exception message that displays the unicode character as well as the hex value for clarity. - * @param codePoint the unicode code point to transform + * Creates an exception message that displays the Unicode character as well as the hex value for clarity. + * @param codePoint the Unicode code point to transform * @param suffix a string suffix for the message * @return the message */ From cbfbad7a9e7bd7c55f6e1ebc6929c5a7fb031c95 Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Fri, 20 Oct 2023 08:31:11 -0400 Subject: [PATCH 27/52] separate error messages --- .../commons/text/cases/CharacterDelimitedCase.java | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java b/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java index a3ed9d1048..605f008743 100644 --- a/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java +++ b/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java @@ -50,11 +50,17 @@ protected CharacterDelimitedCase(char delimiter) { */ protected CharacterDelimitedCase(char[] parseDelimiters, String formatDelimiter) { super(); - if (parseDelimiters == null || parseDelimiters.length == 0) { - throw new IllegalArgumentException("Parse Delimiters cannot be null or empty"); + if (parseDelimiters == null) { + throw new IllegalArgumentException("Parse Delimiters cannot be null"); } - if (formatDelimiter == null || formatDelimiter.length() == 0) { - throw new IllegalArgumentException("Format Delimiters cannot be null or empty"); + if (parseDelimiters.length == 0) { + throw new IllegalArgumentException("Parse Delimiters cannot be empty"); + } + if (formatDelimiter == null) { + throw new IllegalArgumentException("Format Delimiters cannot be null"); + } + if (formatDelimiter.length() == 0) { + throw new IllegalArgumentException("Format Delimiters cannot be empty"); } this.parseDelimiters = generateDelimiterSet(parseDelimiters); this.formatDelimiter = formatDelimiter; From bbde93f2672dd0d801cc8a83790e912def9af86f Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Tue, 24 Oct 2023 07:18:11 -0400 Subject: [PATCH 28/52] use list for delimiters since collection size is likely small --- .../commons/text/cases/CharacterDelimitedCase.java | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java b/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java index 605f008743..f86eb9c44b 100644 --- a/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java +++ b/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java @@ -17,9 +17,7 @@ package org.apache.commons.text.cases; import java.util.ArrayList; -import java.util.HashSet; import java.util.List; -import java.util.Set; import org.apache.commons.lang3.CharUtils; @@ -30,7 +28,7 @@ public class CharacterDelimitedCase implements Case { /** Delimiters to be used when parsing. */ - private Set parseDelimiters; + private List parseDelimiters; /** Delimiter to be used when formatting. */ private String formatDelimiter; @@ -62,7 +60,7 @@ protected CharacterDelimitedCase(char[] parseDelimiters, String formatDelimiter) if (formatDelimiter.length() == 0) { throw new IllegalArgumentException("Format Delimiters cannot be empty"); } - this.parseDelimiters = generateDelimiterSet(parseDelimiters); + this.parseDelimiters = generateDelimiterList(parseDelimiters); this.formatDelimiter = formatDelimiter; } @@ -139,8 +137,8 @@ public List parse(String string) { * @param delimiters set of characters to determine capitalization, null means whitespace * @return the Set of delimiter characters in the input array */ - private static Set generateDelimiterSet(final char[] delimiters) { - final Set delimiterHashSet = new HashSet<>(); + private static List generateDelimiterList(final char[] delimiters) { + final List delimiterHashSet = new ArrayList<>(); for (int index = 0; index < delimiters.length; index++) { delimiterHashSet.add(Character.codePointAt(delimiters, index)); } From 8f5acb0d441ba9d5e0c219cf8ce42f4de73d47ae Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Tue, 24 Oct 2023 07:19:13 -0400 Subject: [PATCH 29/52] use isempty --- .../org/apache/commons/text/cases/CharacterDelimitedCase.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java b/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java index f86eb9c44b..c991757673 100644 --- a/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java +++ b/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java @@ -109,7 +109,7 @@ public String format(Iterable tokens) { @Override public List parse(String string) { List tokens = new ArrayList<>(); - if (string.length() == 0) { + if (string.isEmpty()) { return tokens; } int strLen = string.length(); From 9aba7a30775b41faeeba99b5d3386a5757655f47 Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Tue, 24 Oct 2023 07:40:27 -0400 Subject: [PATCH 30/52] allow null delimiters --- .../text/cases/CharacterDelimitedCase.java | 45 +++++++++---------- .../apache/commons/text/cases/CasesTest.java | 9 ++-- 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java b/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java index c991757673..ae6245986f 100644 --- a/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java +++ b/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java @@ -33,6 +33,13 @@ public class CharacterDelimitedCase implements Case { /** Delimiter to be used when formatting. */ private String formatDelimiter; + /** + * Constructs a new Delimited Case with null delimiters. + */ + protected CharacterDelimitedCase() { + this(null, null); + } + /** * Constructs a new Delimited Case. * @param delimiter the character to use as both the parse and format delimiter @@ -48,18 +55,6 @@ protected CharacterDelimitedCase(char delimiter) { */ protected CharacterDelimitedCase(char[] parseDelimiters, String formatDelimiter) { super(); - if (parseDelimiters == null) { - throw new IllegalArgumentException("Parse Delimiters cannot be null"); - } - if (parseDelimiters.length == 0) { - throw new IllegalArgumentException("Parse Delimiters cannot be empty"); - } - if (formatDelimiter == null) { - throw new IllegalArgumentException("Format Delimiters cannot be null"); - } - if (formatDelimiter.length() == 0) { - throw new IllegalArgumentException("Format Delimiters cannot be empty"); - } this.parseDelimiters = generateDelimiterList(parseDelimiters); this.formatDelimiter = formatDelimiter; } @@ -82,12 +77,14 @@ public String format(Iterable tokens) { StringBuilder formattedString = new StringBuilder(); int i = 0; for (String token : tokens) { - int delimiterFoundIndex = token.indexOf(formatDelimiter); - if (delimiterFoundIndex > -1) { - throw new IllegalArgumentException("Token " + i + " contains delimiter character '" + formatDelimiter + "' at index " + delimiterFoundIndex); - } - if (i > 0) { - formattedString.append(formatDelimiter); + if (formatDelimiter != null) { + int delimiterFoundIndex = token.indexOf(formatDelimiter); + if (delimiterFoundIndex > -1) { + throw new IllegalArgumentException("Token " + i + " contains delimiter character '" + formatDelimiter + "' at index " + delimiterFoundIndex); + } + if (i > 0) { + formattedString.append(formatDelimiter); + } } i++; formattedString.append(token); @@ -132,15 +129,17 @@ public List parse(String string) { } /** - * Converts an array of delimiters to a hash set of code points. The generated hash set provides O(1) lookup time. + * Converts an array of delimiters to a hash set of code points. * - * @param delimiters set of characters to determine capitalization, null means whitespace - * @return the Set of delimiter characters in the input array + * @param delimiters array of characters to add to list + * @return the List of delimiter characters in the input array */ private static List generateDelimiterList(final char[] delimiters) { final List delimiterHashSet = new ArrayList<>(); - for (int index = 0; index < delimiters.length; index++) { - delimiterHashSet.add(Character.codePointAt(delimiters, index)); + if (delimiters != null) { + for (int index = 0; index < delimiters.length; index++) { + delimiterHashSet.add(Character.codePointAt(delimiters, index)); + } } return delimiterHashSet; } diff --git a/src/test/java/org/apache/commons/text/cases/CasesTest.java b/src/test/java/org/apache/commons/text/cases/CasesTest.java index cdf73b08b9..fd0d5cee80 100644 --- a/src/test/java/org/apache/commons/text/cases/CasesTest.java +++ b/src/test/java/org/apache/commons/text/cases/CasesTest.java @@ -26,13 +26,12 @@ public class CasesTest { @Test - public void testDelimiterCharacterException() { + public void testCharacterDelimitedCase() { Assertions.assertThrows(IllegalArgumentException.class, () -> KebabCase.INSTANCE.format(Arrays.asList("a", "-"))); Assertions.assertThrows(IllegalArgumentException.class, () -> SnakeCase.INSTANCE.format(Arrays.asList("a", "_"))); - Assertions.assertThrows(IllegalArgumentException.class, () -> new CharacterDelimitedCase(null, ",")); - Assertions.assertThrows(IllegalArgumentException.class, () -> new CharacterDelimitedCase(new char[1], null)); - Assertions.assertThrows(IllegalArgumentException.class, () -> new CharacterDelimitedCase(new char[0], ",")); - Assertions.assertThrows(IllegalArgumentException.class, () -> new CharacterDelimitedCase(new char[0], "")); + CharacterDelimitedCase nullDelimiters = new CharacterDelimitedCase(); + assertFormat(nullDelimiters, "abc", Arrays.asList("a", "b", "c")); + assertParse(nullDelimiters, "abc", Arrays.asList("abc")); } @Test From fe070e89e275805df1e9ed0d8414073383acac51 Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Tue, 24 Oct 2023 09:30:00 -0400 Subject: [PATCH 31/52] allow null when parsing --- .../org/apache/commons/text/cases/CharacterDelimitedCase.java | 2 +- .../org/apache/commons/text/cases/UpperCaseDelimitedCase.java | 2 +- src/test/java/org/apache/commons/text/cases/CasesTest.java | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java b/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java index ae6245986f..d38f4f7cb2 100644 --- a/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java +++ b/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java @@ -106,7 +106,7 @@ public String format(Iterable tokens) { @Override public List parse(String string) { List tokens = new ArrayList<>(); - if (string.isEmpty()) { + if (string == null || string.isEmpty()) { return tokens; } int strLen = string.length(); diff --git a/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java b/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java index d8764ecd43..c35ac7ffe0 100644 --- a/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java +++ b/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java @@ -47,7 +47,7 @@ protected UpperCaseDelimitedCase(boolean lowerCaseFirstCharacter) { @Override public List parse(String string) { List tokens = new ArrayList<>(); - if (string.length() == 0) { + if (string == null || string.isEmpty()) { return tokens; } if (lowerCaseFirstCharacter) { diff --git a/src/test/java/org/apache/commons/text/cases/CasesTest.java b/src/test/java/org/apache/commons/text/cases/CasesTest.java index fd0d5cee80..cf97545897 100644 --- a/src/test/java/org/apache/commons/text/cases/CasesTest.java +++ b/src/test/java/org/apache/commons/text/cases/CasesTest.java @@ -37,6 +37,7 @@ public void testCharacterDelimitedCase() { @Test public void testKebabCase() { assertFormatAndParse(KebabCase.INSTANCE, "", Arrays.asList()); + assertParse(KebabCase.INSTANCE, null, Arrays.asList()); assertFormatAndParse(KebabCase.INSTANCE, "my-Tokens-123-a1", Arrays.asList("my", "Tokens", "123", "a1")); assertFormatAndParse(KebabCase.INSTANCE, "blank--token", Arrays.asList("blank", "", "token")); } @@ -58,6 +59,7 @@ public void testUtf32() { @Test public void testSnakeCase() { assertFormatAndParse(SnakeCase.INSTANCE, "", Arrays.asList()); + assertParse(SnakeCase.INSTANCE, null, Arrays.asList()); assertFormatAndParse(SnakeCase.INSTANCE, "my_Tokens_123_a1", Arrays.asList("my", "Tokens", "123", "a1")); assertFormatAndParse(SnakeCase.INSTANCE, "blank__token", Arrays.asList("blank", "", "token")); } From f92cae9c101f83cac89a334d0f788259d0a77f98 Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Tue, 24 Oct 2023 09:39:17 -0400 Subject: [PATCH 32/52] Updated package info --- .../apache/commons/text/cases/package-info.java | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/apache/commons/text/cases/package-info.java b/src/main/java/org/apache/commons/text/cases/package-info.java index 3dff24de0c..711f27db4a 100644 --- a/src/main/java/org/apache/commons/text/cases/package-info.java +++ b/src/main/java/org/apache/commons/text/cases/package-info.java @@ -16,14 +16,16 @@ */ /** *

Provides algorithms for parsing and formatting various programming "Cases".

- *

The provided implementations are for the four most common cases:
- * CamelCase - delimited by ascii uppercase alpha characters and always beginning with a lowercase ascii alpha
- * PascalCase - Similar to CamelCase but always begins with an uppercase ascii alpha
- * DelimitedCase - delimited by a constant character, which is omitted from parsed tokens
- * SnakeCase - implementation of DelimitedCase in which the delimiter is an underscore '_'
- * KebabCase - implementation of DelimitedCase in which the delimiter is a hyphen '-'
+ *

Two base classes are provided to hold functionality common to multiple cases:
+ * UpperCaseDelimitedCase - delimited by upper case characters.
+ * DelimitedCase - delimited by a constant character, which is omitted from parsed tokens.
+ * Four full implementations are provided for the most widely used cases:
+ * CamelCase - extension of UpperCaseDelimitedCase where first character must be lower case.
+ * PascalCase - extension of UpperCaseDelimitedCase where first character must be upper case.
+ * SnakeCase - extension of DelimitedCase in which the delimiter is an underscore '_'.
+ * KebabCase - extension of DelimitedCase in which the delimiter is a hyphen '-'.
*

* - * @since 1.0 + * @since 1.11 */ package org.apache.commons.text.cases; From 6613073a0d272c4f3341fe911c8c4a3ed7d807bf Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Tue, 24 Oct 2023 09:44:48 -0400 Subject: [PATCH 33/52] javadoc formatting --- .../apache/commons/text/cases/CharacterDelimitedCase.java | 4 ++++ .../apache/commons/text/cases/UpperCaseDelimitedCase.java | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java b/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java index d38f4f7cb2..ff5b14fe59 100644 --- a/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java +++ b/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java @@ -42,6 +42,7 @@ protected CharacterDelimitedCase() { /** * Constructs a new Delimited Case. + * * @param delimiter the character to use as both the parse and format delimiter */ protected CharacterDelimitedCase(char delimiter) { @@ -50,6 +51,7 @@ protected CharacterDelimitedCase(char delimiter) { /** * Constructs a new delimited case. + * * @param parseDelimiters the array of delimiters to use when parsing * @param formatDelimiter the delimiter to use when formatting */ @@ -68,6 +70,7 @@ protected CharacterDelimitedCase(char[] parseDelimiters, String formatDelimiter) * No other restrictions are placed on the contents of the tokens. * Note: This Case does support empty tokens.
*

+ * * @param tokens the tokens to be formatted into a delimited string * @return the delimited string * @throws IllegalArgumentException if any tokens contain the delimiter character @@ -100,6 +103,7 @@ public String format(Iterable tokens) { * considered reserved, and is omitted from the returned parsed tokens.
* No other restrictions are placed on the contents of the input string.
*

+ * * @param string the delimited string to be parsed * @return the list of tokens found in the string */ diff --git a/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java b/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java index c35ac7ffe0..8741befed4 100644 --- a/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java +++ b/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java @@ -41,6 +41,7 @@ protected UpperCaseDelimitedCase(boolean lowerCaseFirstCharacter) { * encountered, that character starts a new token, with the character * itself included in the token. This method never returns empty tokens. *

+ * * @param string the string to parse * @return the list of tokens found in the string */ @@ -87,6 +88,7 @@ public List parse(String string) { * throws an exception. Any other characters in the token are returned as-is. Empty tokens are * not supported and will cause an exception to be thrown. *

+ * * @param tokens the string tokens to be formatted * @return the formatted string * @throws IllegalArgumentException if 1) any token is empty 2) any token begins with a @@ -123,6 +125,7 @@ public String format(Iterable tokens) { /** * Transforms a Unicode code point into upper case using {@link java.lang.Character#toUpperCase} and confirms the * result is upper case. + * * @param codePoint the code point to upper case * @return the transformed code point * @throws IllegalArgumentException if the converted code point cannot be mapped into an upper case character @@ -138,6 +141,7 @@ private static int toUpperCase(int codePoint) { /** * Transforms a Unicode code point into lower case using {@link java.lang.Character#toLowerCase} and confirms the * result is lower case. + * * @param codePoint the code point to lower case * @return the lower case code point that corresponds to the input parameter * @throws IllegalArgumentException if the converted code point cannot be mapped into a lower case character @@ -152,6 +156,7 @@ private static int toLowerCase(int codePoint) { /** * Creates an exception message that displays the Unicode character as well as the hex value for clarity. + * * @param codePoint the Unicode code point to transform * @param suffix a string suffix for the message * @return the message From d3964ee1554dcf02de66f87a993058ebd9768b3d Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Tue, 24 Oct 2023 09:53:58 -0400 Subject: [PATCH 34/52] package protected constructors --- .../apache/commons/text/cases/CharacterDelimitedCase.java | 6 +++--- .../apache/commons/text/cases/UpperCaseDelimitedCase.java | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java b/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java index ff5b14fe59..14d5f16e68 100644 --- a/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java +++ b/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java @@ -36,7 +36,7 @@ public class CharacterDelimitedCase implements Case { /** * Constructs a new Delimited Case with null delimiters. */ - protected CharacterDelimitedCase() { + CharacterDelimitedCase() { this(null, null); } @@ -45,7 +45,7 @@ protected CharacterDelimitedCase() { * * @param delimiter the character to use as both the parse and format delimiter */ - protected CharacterDelimitedCase(char delimiter) { + CharacterDelimitedCase(char delimiter) { this(new char[] { delimiter }, CharUtils.toString(delimiter)); } @@ -55,7 +55,7 @@ protected CharacterDelimitedCase(char delimiter) { * @param parseDelimiters the array of delimiters to use when parsing * @param formatDelimiter the delimiter to use when formatting */ - protected CharacterDelimitedCase(char[] parseDelimiters, String formatDelimiter) { + CharacterDelimitedCase(char[] parseDelimiters, String formatDelimiter) { super(); this.parseDelimiters = generateDelimiterList(parseDelimiters); this.formatDelimiter = formatDelimiter; diff --git a/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java b/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java index 8741befed4..f70e8897c8 100644 --- a/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java +++ b/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java @@ -30,7 +30,7 @@ public class UpperCaseDelimitedCase implements Case { /** * Constructs a new UpperCaseDelimitedCase instance. */ - protected UpperCaseDelimitedCase(boolean lowerCaseFirstCharacter) { + UpperCaseDelimitedCase(boolean lowerCaseFirstCharacter) { this.lowerCaseFirstCharacter = lowerCaseFirstCharacter; } From 145b4e4c691cc630683415465a4c51d630dcd645 Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Tue, 24 Oct 2023 09:57:00 -0400 Subject: [PATCH 35/52] javadoc --- src/test/java/org/apache/commons/text/cases/CasesTest.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/test/java/org/apache/commons/text/cases/CasesTest.java b/src/test/java/org/apache/commons/text/cases/CasesTest.java index cf97545897..9634b04a78 100644 --- a/src/test/java/org/apache/commons/text/cases/CasesTest.java +++ b/src/test/java/org/apache/commons/text/cases/CasesTest.java @@ -241,10 +241,6 @@ private void assertParse(Case caseInstance, String string, List tokens) /** * Asserts that string parses into the expected tokens, ignoring case if the caseInsensitive parameter is true - * @param caseInstance - * @param string - * @param tokens - * @param caseInsensitive */ private void assertParse(Case caseInstance, String string, List tokens, Boolean caseInsensitive) { List parsedTokens = caseInstance.parse(string); From d585716967afbfaf132f594515e2cbf0c205db19 Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Mon, 30 Oct 2023 10:12:02 -0400 Subject: [PATCH 36/52] utils class to hold static references --- .../apache/commons/text/cases/CasesUtils.java | 44 +++++++++++++++++++ .../apache/commons/text/cases/CasesTest.java | 6 +++ 2 files changed, 50 insertions(+) create mode 100644 src/main/java/org/apache/commons/text/cases/CasesUtils.java diff --git a/src/main/java/org/apache/commons/text/cases/CasesUtils.java b/src/main/java/org/apache/commons/text/cases/CasesUtils.java new file mode 100644 index 0000000000..2f20cceae4 --- /dev/null +++ b/src/main/java/org/apache/commons/text/cases/CasesUtils.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.cases; + +/** + * Util methods for the Cases API. + */ +public class CasesUtils { + + /** Constant reusable instance of KebabCase. */ + public static final KebabCase KEBAB = KebabCase.INSTANCE; + /** Constant reusable instance of SnakeCase. */ + public static final SnakeCase SNAKE = SnakeCase.INSTANCE; + /** Constant reusable instance of CamelCase. */ + public static final CamelCase CAMEL = CamelCase.INSTANCE; + /** Constant reusable instance of PascalCase. */ + public static final PascalCase PASCAL = PascalCase.INSTANCE; + + /** + * Utility method for converting between cases. + * @param string the cased string to parse + * @param from the case of the existing string + * @param to the case to convert to + * @return the converted string + */ + public static String convert(String string, Case from, Case to) { + return to.format(from.parse(string)); + } + +} diff --git a/src/test/java/org/apache/commons/text/cases/CasesTest.java b/src/test/java/org/apache/commons/text/cases/CasesTest.java index 9634b04a78..f29d6f3eca 100644 --- a/src/test/java/org/apache/commons/text/cases/CasesTest.java +++ b/src/test/java/org/apache/commons/text/cases/CasesTest.java @@ -25,6 +25,12 @@ public class CasesTest { + @Test + public void testCasesUtils() { + Assertions.assertEquals("testUtils", CasesUtils.convert("test-utils", CasesUtils.KEBAB, CasesUtils.CAMEL)); + Assertions.assertEquals("Test_Utils", CasesUtils.convert("TestUtils", CasesUtils.PASCAL, CasesUtils.SNAKE)); + } + @Test public void testCharacterDelimitedCase() { Assertions.assertThrows(IllegalArgumentException.class, () -> KebabCase.INSTANCE.format(Arrays.asList("a", "-"))); From daa58d2cf4ebb428e81e00173d9305ca9963c9e5 Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Tue, 31 Oct 2023 07:07:18 -0400 Subject: [PATCH 37/52] moved tests to correct case methods --- .../apache/commons/text/cases/CasesTest.java | 25 +++---------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/src/test/java/org/apache/commons/text/cases/CasesTest.java b/src/test/java/org/apache/commons/text/cases/CasesTest.java index f29d6f3eca..fa42d5c4de 100644 --- a/src/test/java/org/apache/commons/text/cases/CasesTest.java +++ b/src/test/java/org/apache/commons/text/cases/CasesTest.java @@ -66,7 +66,7 @@ public void testUtf32() { public void testSnakeCase() { assertFormatAndParse(SnakeCase.INSTANCE, "", Arrays.asList()); assertParse(SnakeCase.INSTANCE, null, Arrays.asList()); - assertFormatAndParse(SnakeCase.INSTANCE, "my_Tokens_123_a1", Arrays.asList("my", "Tokens", "123", "a1")); + assertFormatAndParse(SnakeCase.INSTANCE, "My_var_NAME__mIXED_a1_c|=+", Arrays.asList("My", "var", "NAME", "", "mIXED", "a1", "c|=+")); assertFormatAndParse(SnakeCase.INSTANCE, "blank__token", Arrays.asList("blank", "", "token")); } @@ -74,7 +74,7 @@ public void testSnakeCase() { public void testPascalCase() { assertFormatAndParse(PascalCase.INSTANCE, "MyVarName", Arrays.asList("My", "Var", "Name")); - assertFormatAndParse(PascalCase.INSTANCE, "MyTokensA1D", Arrays.asList("My", "Tokens", "A1", "D")); + assertFormatAndParse(PascalCase.INSTANCE, "MyVarNameMixedA1C|=+", Arrays.asList("My", "var", "NAME", "mIXED", "a1", "c|=+"), true); assertFormatAndParse(PascalCase.INSTANCE, "", Arrays.asList()); assertParse(PascalCase.INSTANCE, "lowerFirst", Arrays.asList("lower", "First")); assertFormat(PascalCase.INSTANCE, "LowerFirst", Arrays.asList("lower", "First")); @@ -106,7 +106,7 @@ public void testNumberLetters() { public void testCamelCase() { assertFormatAndParse(CamelCase.INSTANCE, "", Arrays.asList()); - assertFormatAndParse(CamelCase.INSTANCE, "myTokensAbc123", Arrays.asList("my", "Tokens", "Abc123")); + assertFormatAndParse(CamelCase.INSTANCE, "myVarNameMixedA1C|=+", Arrays.asList("My", "var", "NAME", "mIXED", "a1", "c|=+"), true); assertFormatAndParse(CamelCase.INSTANCE, "specChar-Token+", Arrays.asList("spec", "Char-", "Token+")); assertParse(CamelCase.INSTANCE, "MyTokens", Arrays.asList("My", "Tokens")); @@ -131,25 +131,6 @@ public void testConversionsDelimited() { assertFormatAndParse(SnakeCase.INSTANCE, snakeString, tokens); } - @Test - public void testConversions() { - - List tokens = Arrays.asList("My", "var", "NAME", "mIXED", "a1", "c|=+"); - - String kebabString = "My-var-NAME-mIXED-a1-c|=+"; - assertFormatAndParse(KebabCase.INSTANCE, kebabString, tokens); - - String snakeString = "My_var_NAME_mIXED_a1_c|=+"; - assertFormatAndParse(SnakeCase.INSTANCE, snakeString, tokens); - - String camelString = "myVarNameMixedA1C|=+"; - assertFormatAndParse(CamelCase.INSTANCE, camelString, tokens, true); - - String pascalString = "MyVarNameMixedA1C|=+"; - assertFormatAndParse(PascalCase.INSTANCE, pascalString, tokens, true); - - } - @Test public void testEmptyTokens() { List tokens = Arrays.asList("HAS", "", "empty", "Tokens", ""); From 8db51ddc3f3a7321816b9487e115868d120e65f6 Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Tue, 31 Oct 2023 07:07:36 -0400 Subject: [PATCH 38/52] javadocs --- .../text/cases/CharacterDelimitedCase.java | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java b/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java index 14d5f16e68..4e40ef2636 100644 --- a/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java +++ b/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java @@ -22,8 +22,8 @@ import org.apache.commons.lang3.CharUtils; /** - * DelimitedCase is a case in which the true alphabetic case of the characters is ignored by default - * and tokens themselves are determined by the presence of a delimiter between each token. + * DelimitedCase is a case in which the true alphabetic case of the characters is ignored by default, + * and tokens are determined by the presence of a delimiter between each token. */ public class CharacterDelimitedCase implements Case { @@ -64,11 +64,11 @@ public class CharacterDelimitedCase implements Case { /** * Formats tokens into Delimited Case. *

- * Tokens are iterated on and appended to an output stream, with an instance of a - * delimiter character between them. This method validates that the delimiter character is not - * part of the token. If it is found within the token an exception is thrown.
- * No other restrictions are placed on the contents of the tokens. - * Note: This Case does support empty tokens.
+ * Tokens are appended to a string, with a delimiter between them. This method + * validates that the delimiter character is not part of the token. If it is found within the + * token an exception is thrown.
+ * No other restrictions are placed on the contents of the tokens. Note: This Case does support + * empty tokens.
*

* * @param tokens the tokens to be formatted into a delimited string @@ -98,7 +98,7 @@ public String format(Iterable tokens) { /** * Parses delimited string into tokens. *

- * Input string is parsed one character at a time until a delimiter character is reached. + * Input string is parsed one character at a time until a delimiter is reached. * When a delimiter character is reached a new token begins. The delimiter character is * considered reserved, and is omitted from the returned parsed tokens.
* No other restrictions are placed on the contents of the input string.
@@ -133,9 +133,9 @@ public List parse(String string) { } /** - * Converts an array of delimiters to a hash set of code points. + * Converts an array of delimiters to a List of code points. * - * @param delimiters array of characters to add to list + * @param delimiters array of characters to add to List * @return the List of delimiter characters in the input array */ private static List generateDelimiterList(final char[] delimiters) { From 7fc87701b0e359bebbd46d57a86dcac957629cf1 Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Tue, 31 Oct 2023 07:07:43 -0400 Subject: [PATCH 39/52] javadocs --- src/main/java/org/apache/commons/text/cases/Case.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/apache/commons/text/cases/Case.java b/src/main/java/org/apache/commons/text/cases/Case.java index 5f4f089452..c9be5e2d48 100644 --- a/src/main/java/org/apache/commons/text/cases/Case.java +++ b/src/main/java/org/apache/commons/text/cases/Case.java @@ -45,7 +45,7 @@ public interface Case { * * @param string the string to be parsed by this Case into a list of tokens * @return the list of parsed tokens - * @throws IllegalArgumentException if the string cannot be parsed + * @throws IllegalArgumentException if the string cannot be parsed by this implementation */ List parse(String string); From 89eca002184981a95325f191fae30926d8b4f3ff Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Tue, 31 Oct 2023 07:08:44 -0400 Subject: [PATCH 40/52] renamed class --- .../commons/text/cases/{CasesUtils.java => Cases.java} | 2 +- src/test/java/org/apache/commons/text/cases/CasesTest.java | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) rename src/main/java/org/apache/commons/text/cases/{CasesUtils.java => Cases.java} (98%) diff --git a/src/main/java/org/apache/commons/text/cases/CasesUtils.java b/src/main/java/org/apache/commons/text/cases/Cases.java similarity index 98% rename from src/main/java/org/apache/commons/text/cases/CasesUtils.java rename to src/main/java/org/apache/commons/text/cases/Cases.java index 2f20cceae4..1ca7d29f0e 100644 --- a/src/main/java/org/apache/commons/text/cases/CasesUtils.java +++ b/src/main/java/org/apache/commons/text/cases/Cases.java @@ -19,7 +19,7 @@ /** * Util methods for the Cases API. */ -public class CasesUtils { +public class Cases { /** Constant reusable instance of KebabCase. */ public static final KebabCase KEBAB = KebabCase.INSTANCE; diff --git a/src/test/java/org/apache/commons/text/cases/CasesTest.java b/src/test/java/org/apache/commons/text/cases/CasesTest.java index fa42d5c4de..6d30fe202a 100644 --- a/src/test/java/org/apache/commons/text/cases/CasesTest.java +++ b/src/test/java/org/apache/commons/text/cases/CasesTest.java @@ -26,9 +26,9 @@ public class CasesTest { @Test - public void testCasesUtils() { - Assertions.assertEquals("testUtils", CasesUtils.convert("test-utils", CasesUtils.KEBAB, CasesUtils.CAMEL)); - Assertions.assertEquals("Test_Utils", CasesUtils.convert("TestUtils", CasesUtils.PASCAL, CasesUtils.SNAKE)); + public void testCases() { + Assertions.assertEquals("testUtils", Cases.convert("test-utils", Cases.KEBAB, Cases.CAMEL)); + Assertions.assertEquals("Test_Utils", Cases.convert("TestUtils", Cases.PASCAL, Cases.SNAKE)); } @Test From 8aa6edba9af4f7bac9c0373e7a4b7b0de5c6c792 Mon Sep 17 00:00:00 2001 From: "daniel.watson" Date: Wed, 1 Nov 2023 07:32:54 -0400 Subject: [PATCH 41/52] added string matcher that matches on any Unicode upper case tokens --- .../text/matcher/AbstractStringMatcher.java | 53 +++++++++++++++++++ .../text/matcher/StringMatcherFactory.java | 14 +++++ 2 files changed, 67 insertions(+) diff --git a/src/main/java/org/apache/commons/text/matcher/AbstractStringMatcher.java b/src/main/java/org/apache/commons/text/matcher/AbstractStringMatcher.java index 1646b0e735..5438a46ab9 100644 --- a/src/main/java/org/apache/commons/text/matcher/AbstractStringMatcher.java +++ b/src/main/java/org/apache/commons/text/matcher/AbstractStringMatcher.java @@ -428,6 +428,59 @@ public int size() { } } + /** + * Matches Uppercase characters as determined by {@link java.lang.Character#isUpperCase(int)} + *

+ * Thread=safe. + *

+ */ + static final class UppercaseMatcher extends AbstractStringMatcher { + + /** + * Constructs a new instance of {@code UppercaseMatcher}. + */ + UppercaseMatcher() { + } + + /** + * Returns {@code 1} if there is a match, or {@code 0} if there is no match. + * + * @param buffer the text content to match against, do not change + * @param start the starting position for the match, valid for buffer + * @param bufferStart unused + * @param bufferEnd unused + * @return The number of matching characters, zero for no match + */ + @Override + public int isMatch(char[] buffer, int start, int bufferStart, int bufferEnd) { + int codePoint = Character.codePointAt(buffer, start); + return Character.isUpperCase(codePoint) ? Character.charCount(codePoint) : 0; + } + + /** + * Returns {@code 1} if there is a match, or {@code 0} if there is no match. + * + * @param buffer the text content to match against, do not change + * @param start the starting position for the match, valid for buffer + * @param bufferStart unused + * @param bufferEnd unused + * @return The number of matching characters, zero for no match + */ + @Override + public int isMatch(final CharSequence buffer, final int start, final int bufferStart, final int bufferEnd) { + int codePoint = Character.codePointAt(buffer, start); + return Character.isUpperCase(codePoint) ? Character.charCount(codePoint) : 0; + } + + /** + * Returns 1. + */ + @Override + public int size() { + return 1; + } + } + /** * Constructor. */ diff --git a/src/main/java/org/apache/commons/text/matcher/StringMatcherFactory.java b/src/main/java/org/apache/commons/text/matcher/StringMatcherFactory.java index c08b79553b..b6d16e1974 100644 --- a/src/main/java/org/apache/commons/text/matcher/StringMatcherFactory.java +++ b/src/main/java/org/apache/commons/text/matcher/StringMatcherFactory.java @@ -81,6 +81,11 @@ public final class StringMatcherFactory { */ private static final AbstractStringMatcher.TrimMatcher TRIM_MATCHER = new AbstractStringMatcher.TrimMatcher(); + /** + * Matches Unicode upper case characters. + */ + private static final AbstractStringMatcher.UppercaseMatcher UPPERCASE_MATCHER = new AbstractStringMatcher.UppercaseMatcher(); + /** * No need to build instances for now. */ @@ -255,4 +260,13 @@ public StringMatcher trimMatcher() { return TRIM_MATCHER; } + /** + * Matches Unicode uppercase characters. + * + * @return The upper case matcher + */ + public StringMatcher uppercaseMatcher() { + return UPPERCASE_MATCHER; + } + } From 8b2ced4a00bdce150c34135e32bfb995dd5c051f Mon Sep 17 00:00:00 2001 From: theshoeshiner <2922868+theshoeshiner@users.noreply.github.com> Date: Tue, 28 Nov 2023 16:00:04 -0500 Subject: [PATCH 42/52] allow setting whether to omit the delimiter from the token --- .../apache/commons/text/StringTokenizer.java | 30 +++++++++++++++++-- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/apache/commons/text/StringTokenizer.java b/src/main/java/org/apache/commons/text/StringTokenizer.java index 9e355115b3..f9bd25a4d8 100644 --- a/src/main/java/org/apache/commons/text/StringTokenizer.java +++ b/src/main/java/org/apache/commons/text/StringTokenizer.java @@ -239,6 +239,9 @@ public static StringTokenizer getTSVInstance(final String input) { /** Whether to ignore empty tokens. */ private boolean ignoreEmptyTokens = true; + /** Whether to omit delimiter matches from output. */ + private boolean omitDelimiterMatches = true; + /** * Constructs a tokenizer splitting on space, tab, newline and form feed as per StringTokenizer, but with no text to * tokenize. @@ -751,8 +754,11 @@ private int readNextToken(final char[] srcChars, int start, final int len, final // handle empty token final int delimLen = getDelimiterMatcher().isMatch(srcChars, start, start, len); if (delimLen > 0) { - addToken(tokenList, StringUtils.EMPTY); - return start + delimLen; + //empty token is not possible if we are including delimiters in token + if (omitDelimiterMatches) { + addToken(tokenList, StringUtils.EMPTY); + return start + delimLen; + } } // handle found token @@ -826,7 +832,14 @@ private int readWithQuotes(final char[] srcChars, final int start, final int len if (delimLen > 0) { // return condition when end of token found addToken(tokenList, workArea.substring(0, trimStart)); - return pos + delimLen; + if (omitDelimiterMatches) { + return pos + delimLen; + } else { + //increment position only if we found a new delimiter + if (pos > start) { + return pos; + } + } } // check for quote, and thus back into quoting mode @@ -1021,6 +1034,17 @@ public StringTokenizer setIgnoreEmptyTokens(final boolean ignoreEmptyTokens) { return this; } + /** + * Sets whether the tokenizer should omit the delimiter matches from the output tokens. Default is true. + * + * @param omitDelimiterMatches whether delimiter matches are omitted + * @return this, to enable chaining + */ + public StringTokenizer setOmitDelimiterMatches(final boolean omitDelimiterMatches) { + this.omitDelimiterMatches = omitDelimiterMatches; + return this; + } + /** * Sets the quote character to use. *

From 992ac13b0d03ceeedce78857dd1a0ac1bd57a450 Mon Sep 17 00:00:00 2001 From: theshoeshiner <2922868+theshoeshiner@users.noreply.github.com> Date: Tue, 28 Nov 2023 16:05:40 -0500 Subject: [PATCH 43/52] test omit delimiter flag --- .../commons/text/StringTokenizerTest.java | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/test/java/org/apache/commons/text/StringTokenizerTest.java b/src/test/java/org/apache/commons/text/StringTokenizerTest.java index 458cc813f6..57f0ad2b39 100644 --- a/src/test/java/org/apache/commons/text/StringTokenizerTest.java +++ b/src/test/java/org/apache/commons/text/StringTokenizerTest.java @@ -375,6 +375,40 @@ public void testBasicIgnoreTrimmed4() { assertFalse(tok.hasNext()); } + @Test + public void testOmitDelimiter1() { + final String input = "AbcDefGhi"; + final StringTokenizer tok = new StringTokenizer(input, StringMatcherFactory.INSTANCE.uppercaseMatcher()); + tok.setOmitDelimiterMatches(false); + assertEquals("Abc", tok.next()); + assertEquals("Def", tok.next()); + assertEquals("Ghi", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testOmitDelimiter2() { + final String input = "Abc:Def:Ghi"; + final StringTokenizer tok = new StringTokenizer(input, ':'); + tok.setOmitDelimiterMatches(false); + assertEquals("Abc", tok.next()); + assertEquals(":Def", tok.next()); + assertEquals(":Ghi", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testOmitDelimiter3() { + final String input = "Abc :Def :Ghi "; + final StringTokenizer tok = new StringTokenizer(input, ':'); + tok.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher()); + tok.setOmitDelimiterMatches(false); + assertEquals("Abc", tok.next()); + assertEquals(":Def", tok.next()); + assertEquals(":Ghi", tok.next()); + assertFalse(tok.hasNext()); + } + @Test public void testBasicQuoted1() { final String input = "a 'b' c"; From a50975ca245e7f1b1db814d88c7c198f14e760b6 Mon Sep 17 00:00:00 2001 From: theshoeshiner <2922868+theshoeshiner@users.noreply.github.com> Date: Tue, 28 Nov 2023 17:43:48 -0500 Subject: [PATCH 44/52] converted cases api to use StringTokenizer and TokenStringifier logic --- .../apache/commons/text/TokenFormatter.java | 5 + .../commons/text/TokenFormatterFactory.java | 114 ++++++++++++++++ .../apache/commons/text/TokenStringifier.java | 81 ++++++++++++ .../apache/commons/text/cases/CamelCase.java | 7 +- .../text/cases/CharacterDelimitedCase.java | 125 +++--------------- .../text/cases/PascalTokenFormatter.java | 103 +++++++++++++++ .../text/cases/UpperCaseDelimitedCase.java | 120 +++-------------- .../apache/commons/text/cases/CasesTest.java | 74 +++++++++-- 8 files changed, 407 insertions(+), 222 deletions(-) create mode 100644 src/main/java/org/apache/commons/text/TokenFormatter.java create mode 100644 src/main/java/org/apache/commons/text/TokenFormatterFactory.java create mode 100644 src/main/java/org/apache/commons/text/TokenStringifier.java create mode 100644 src/main/java/org/apache/commons/text/cases/PascalTokenFormatter.java diff --git a/src/main/java/org/apache/commons/text/TokenFormatter.java b/src/main/java/org/apache/commons/text/TokenFormatter.java new file mode 100644 index 0000000000..5dd90613c0 --- /dev/null +++ b/src/main/java/org/apache/commons/text/TokenFormatter.java @@ -0,0 +1,5 @@ +package org.apache.commons.text; + +public interface TokenFormatter { + String format(char[] prior, int tokenIndex, char[] token); +} diff --git a/src/main/java/org/apache/commons/text/TokenFormatterFactory.java b/src/main/java/org/apache/commons/text/TokenFormatterFactory.java new file mode 100644 index 0000000000..e3789a0077 --- /dev/null +++ b/src/main/java/org/apache/commons/text/TokenFormatterFactory.java @@ -0,0 +1,114 @@ +package org.apache.commons.text; + +import org.apache.commons.lang3.StringUtils; + +public class TokenFormatterFactory { + + /** + * Token formatter that returns the token as is. + */ + public static class NoOpFormatter implements TokenFormatter { + @Override + public String format(char[] prior, int tokenIndex, char[] token) { + return new String(token); + } + + } + + /** + * Token formatter that always returns a constant string, and optionally checks the passed in token + * for the constant and throws an error when found. + */ + public static class ConstantTokenFormatter implements TokenFormatter { + + /** + * The constant to return. + */ + private char[] constant; + + /** + * Whether or not to throw an exception if the constant is found. + */ + private boolean failOnConstantFound = true; + + public ConstantTokenFormatter(char constant) { + this(new char[] {constant}, true); + } + + public ConstantTokenFormatter(char constant, boolean failOnConstantFound) { + this(new char[] {constant}, failOnConstantFound); + } + + public ConstantTokenFormatter(String constant) { + this(constant, true); + } + + public ConstantTokenFormatter(String constant, boolean failOnConstantFound) { + this(constant.toCharArray(), failOnConstantFound); + } + + public ConstantTokenFormatter(char[] constant, boolean failOnConstantFound) { + this.constant = constant; + this.failOnConstantFound = failOnConstantFound; + } + + @Override + public String format(char[] prior, int tokenIndex, char[] token) { + if (failOnConstantFound) { + for (int i = 0; i < token.length; i++) { + boolean match = false; + int t = i; + for (int j = 0; j < constant.length; j++) { + if (token[t] == constant[j]) { + match = true; + } else { + match = false; + break; + } + t++; + } + if (match) { + throw new IllegalArgumentException("Token " + tokenIndex + " contains illegal character '" + new String(constant) + "' at index " + t); + } + } + } + + return new String(constant); + } + + /** + * Set whether to check the token for the constant. + * @param checkTokenForConstant whether to check. + */ + public void setFailOnConstantFound(boolean checkTokenForConstant) { + this.failOnConstantFound = checkTokenForConstant; + } + + } + + /** + * Reuseable NoOpFormatter instance. + */ + private static final NoOpFormatter NOOP_FORMATTER = new NoOpFormatter(); + + /** + * Reuseable Empty String formatter instance. + */ + private static final ConstantTokenFormatter EMPTY_STRING_FORMATTER = new ConstantTokenFormatter(StringUtils.EMPTY, false); + + public static NoOpFormatter noOpFormatter() { + return NOOP_FORMATTER; + } + + public static ConstantTokenFormatter constantFormatter(char[] constant, boolean failOnConstant) { + return new ConstantTokenFormatter(constant, failOnConstant); + } + + public static ConstantTokenFormatter constantFormatter(char constant, boolean failOnConstant) { + return new ConstantTokenFormatter(constant, failOnConstant); + } + + public static ConstantTokenFormatter emptyFormatter() { + return EMPTY_STRING_FORMATTER; + } +} diff --git a/src/main/java/org/apache/commons/text/TokenStringifier.java b/src/main/java/org/apache/commons/text/TokenStringifier.java new file mode 100644 index 0000000000..424b277e6b --- /dev/null +++ b/src/main/java/org/apache/commons/text/TokenStringifier.java @@ -0,0 +1,81 @@ +package org.apache.commons.text; + +/** + * Takes a collection of String tokens and combines them into a single String. + *

+ * This class functions as the inverse of {@link org.apache.commons.text.StringTokenizer}. All tokens are formatted + * by a {@link TokenFormatter} which allows fine grained control over the final output. + *

+ */ +public class TokenStringifier { + + /** + * The formatter for the delimiter. + */ + private TokenFormatter delimiterFormatter; + + /** + * The formatter for the tokens. + */ + private TokenFormatter tokenFormatter; + + /** + * Builder used to hold formatted tokens. + */ + private StringBuilder builder; + + /** + * The final string. + */ + private String string; + + /** + * The tokens to turn into a String. + */ + private Iterable tokens; + + public TokenStringifier(TokenFormatter delimiterFormatter, TokenFormatter tokenFormatter) { + super(); + this.delimiterFormatter = delimiterFormatter; + this.tokenFormatter = tokenFormatter; + } + + public void reset(Iterable tokens) { + this.tokens = tokens; + this.string = null; + this.builder = null; + } + + public TokenStringifier() { + tokenFormatter = TokenFormatterFactory.noOpFormatter(); + delimiterFormatter = TokenFormatterFactory.noOpFormatter(); + } + + private void stringify() { + builder = new StringBuilder(); + char[] priorToken = null; + int i = 0; + for (String token : tokens) { + char[] tokenChars = token.toCharArray(); + if (i > 0) { + String delimiter = delimiterFormatter.format(priorToken, i, tokenChars); + if (delimiter != null) { + builder.append(delimiter); + } + } + String formatted = tokenFormatter.format(priorToken, i, tokenChars); + if (formatted != null) { + builder.append(formatted); + } + i++; + } + string = builder.toString(); + } + + public String getString() { + if (string == null) { + stringify(); + } + return string; + } +} diff --git a/src/main/java/org/apache/commons/text/cases/CamelCase.java b/src/main/java/org/apache/commons/text/cases/CamelCase.java index 583d95c1f0..27e6494be8 100644 --- a/src/main/java/org/apache/commons/text/cases/CamelCase.java +++ b/src/main/java/org/apache/commons/text/cases/CamelCase.java @@ -32,8 +32,9 @@ public final class CamelCase extends UpperCaseDelimitedCase { /** * Constructs new CamelCase instance. */ - private CamelCase() { - super(true); - } + + private CamelCase() { + super(true); + } } diff --git a/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java b/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java index 4e40ef2636..51f366cdfc 100644 --- a/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java +++ b/src/main/java/org/apache/commons/text/cases/CharacterDelimitedCase.java @@ -16,136 +16,43 @@ */ package org.apache.commons.text.cases; -import java.util.ArrayList; import java.util.List; -import org.apache.commons.lang3.CharUtils; +import org.apache.commons.text.StringTokenizer; +import org.apache.commons.text.TokenFormatterFactory; +import org.apache.commons.text.TokenStringifier; -/** - * DelimitedCase is a case in which the true alphabetic case of the characters is ignored by default, - * and tokens are determined by the presence of a delimiter between each token. - */ public class CharacterDelimitedCase implements Case { - /** Delimiters to be used when parsing. */ - private List parseDelimiters; - - /** Delimiter to be used when formatting. */ - private String formatDelimiter; - /** - * Constructs a new Delimited Case with null delimiters. + * The tokenizer. */ - CharacterDelimitedCase() { - this(null, null); - } + private StringTokenizer tokenizer; /** - * Constructs a new Delimited Case. - * - * @param delimiter the character to use as both the parse and format delimiter + * The stringifier. */ - CharacterDelimitedCase(char delimiter) { - this(new char[] { delimiter }, CharUtils.toString(delimiter)); - } + private TokenStringifier stringifier; /** - * Constructs a new delimited case. - * - * @param parseDelimiters the array of delimiters to use when parsing - * @param formatDelimiter the delimiter to use when formatting + * Constructs a new CharacterDelimitedCase instance. */ - CharacterDelimitedCase(char[] parseDelimiters, String formatDelimiter) { - super(); - this.parseDelimiters = generateDelimiterList(parseDelimiters); - this.formatDelimiter = formatDelimiter; + protected CharacterDelimitedCase(char delimiter) { + tokenizer = new StringTokenizer((String) null, delimiter); + tokenizer.setIgnoreEmptyTokens(false); + stringifier = new TokenStringifier(TokenFormatterFactory.constantFormatter(delimiter, true), TokenFormatterFactory.noOpFormatter()); } - /** - * Formats tokens into Delimited Case. - *

- * Tokens are appended to a string, with a delimiter between them. This method - * validates that the delimiter character is not part of the token. If it is found within the - * token an exception is thrown.
- * No other restrictions are placed on the contents of the tokens. Note: This Case does support - * empty tokens.
- *

- * - * @param tokens the tokens to be formatted into a delimited string - * @return the delimited string - * @throws IllegalArgumentException if any tokens contain the delimiter character - */ @Override public String format(Iterable tokens) { - StringBuilder formattedString = new StringBuilder(); - int i = 0; - for (String token : tokens) { - if (formatDelimiter != null) { - int delimiterFoundIndex = token.indexOf(formatDelimiter); - if (delimiterFoundIndex > -1) { - throw new IllegalArgumentException("Token " + i + " contains delimiter character '" + formatDelimiter + "' at index " + delimiterFoundIndex); - } - if (i > 0) { - formattedString.append(formatDelimiter); - } - } - i++; - formattedString.append(token); - } - return formattedString.toString(); + stringifier.reset(tokens); + return stringifier.getString(); } - /** - * Parses delimited string into tokens. - *

- * Input string is parsed one character at a time until a delimiter is reached. - * When a delimiter character is reached a new token begins. The delimiter character is - * considered reserved, and is omitted from the returned parsed tokens.
- * No other restrictions are placed on the contents of the input string.
- *

- * - * @param string the delimited string to be parsed - * @return the list of tokens found in the string - */ @Override public List parse(String string) { - List tokens = new ArrayList<>(); - if (string == null || string.isEmpty()) { - return tokens; - } - int strLen = string.length(); - int[] tokenCodePoints = new int[strLen]; - int tokenCodePointsOffset = 0; - for (int i = 0; i < string.length();) { - final int codePoint = string.codePointAt(i); - if (parseDelimiters.contains(codePoint)) { - tokens.add(new String(tokenCodePoints, 0, tokenCodePointsOffset)); - tokenCodePoints = new int[strLen]; - tokenCodePointsOffset = 0; - i++; - } else { - tokenCodePoints[tokenCodePointsOffset++] = codePoint; - i += Character.charCount(codePoint); - } - } - tokens.add(new String(tokenCodePoints, 0, tokenCodePointsOffset)); - return tokens; - } - - /** - * Converts an array of delimiters to a List of code points. - * - * @param delimiters array of characters to add to List - * @return the List of delimiter characters in the input array - */ - private static List generateDelimiterList(final char[] delimiters) { - final List delimiterHashSet = new ArrayList<>(); - if (delimiters != null) { - for (int index = 0; index < delimiters.length; index++) { - delimiterHashSet.add(Character.codePointAt(delimiters, index)); - } - } - return delimiterHashSet; + tokenizer.reset(string); + return tokenizer.getTokenList(); } } diff --git a/src/main/java/org/apache/commons/text/cases/PascalTokenFormatter.java b/src/main/java/org/apache/commons/text/cases/PascalTokenFormatter.java new file mode 100644 index 0000000000..d9874a9f77 --- /dev/null +++ b/src/main/java/org/apache/commons/text/cases/PascalTokenFormatter.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.cases; + +import org.apache.commons.text.TokenFormatter; + +public class PascalTokenFormatter implements TokenFormatter { + + /** + * Whether or not to set the first character of the first token as lower case. + */ + private boolean lowerCaseFirstCharacter = false; + + public PascalTokenFormatter(boolean lowerCaseFirstCharacter) { + this.lowerCaseFirstCharacter = lowerCaseFirstCharacter; + } + + public PascalTokenFormatter() { } + + @Override + public String format(char[] prior, int tokenIndex, char[] token) { + if (token == null || token.length == 0) { + throw new IllegalArgumentException("Unsupported empty token at index " + tokenIndex); + } + StringBuilder formattedString = new StringBuilder(); + + for (int i = 0; i < token.length;) { + final int codePoint = Character.codePointAt(token, i); + //final int codePoint = token.codePointAt(i); + int codePointFormatted = codePoint; + if (i == 0 && tokenIndex == 0 && lowerCaseFirstCharacter) { + codePointFormatted = toLowerCase(codePoint); + } else if (i == 0) { + codePointFormatted = toUpperCase(codePoint); + } else if (Character.isUpperCase(codePointFormatted) || Character.isTitleCase(codePointFormatted)) { + //if character is title or upper case, it must be converted to lower + codePointFormatted = toLowerCase(codePoint); + } + formattedString.appendCodePoint(codePointFormatted); + i += Character.charCount(codePoint); + } + + return formattedString.toString(); + } + + /** + * Transforms a Unicode code point into upper case using {@link java.lang.Character#toUpperCase} and confirms the + * result is upper case. + * + * @param codePoint the code point to upper case + * @return the transformed code point + * @throws IllegalArgumentException if the converted code point cannot be mapped into an upper case character + */ + private static int toUpperCase(int codePoint) { + int codePointFormatted = Character.toUpperCase(codePoint); + if (!Character.isUpperCase(codePointFormatted)) { + throw new IllegalArgumentException(createExceptionMessage(codePoint, " cannot be mapped to upper case")); + } + return codePointFormatted; + } + + /** + * Transforms a Unicode code point into lower case using {@link java.lang.Character#toLowerCase} and confirms the + * result is lower case. + * + * @param codePoint the code point to lower case + * @return the lower case code point that corresponds to the input parameter + * @throws IllegalArgumentException if the converted code point cannot be mapped into a lower case character + */ + private static int toLowerCase(int codePoint) { + int codePointFormatted = Character.toLowerCase(codePoint); + if (!Character.isLowerCase(codePointFormatted)) { + throw new IllegalArgumentException(createExceptionMessage(codePoint, " cannot be mapped to lower case")); + } + return codePointFormatted; + } + + /** + * Creates an exception message that displays the Unicode character as well as the hex value for clarity. + * + * @param codePoint the Unicode code point to transform + * @param suffix a string suffix for the message + * @return the message + */ + private static String createExceptionMessage(int codePoint, String suffix) { + return "Character '" + new String(new int[] { codePoint }, 0, 1) + "' with value 0x" + Integer.toHexString(codePoint) + suffix; + } + +} diff --git a/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java b/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java index f70e8897c8..940522fec5 100644 --- a/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java +++ b/src/main/java/org/apache/commons/text/cases/UpperCaseDelimitedCase.java @@ -16,22 +16,36 @@ */ package org.apache.commons.text.cases; -import java.util.ArrayList; import java.util.List; +import org.apache.commons.text.StringTokenizer; +import org.apache.commons.text.TokenFormatterFactory; +import org.apache.commons.text.TokenStringifier; +import org.apache.commons.text.matcher.StringMatcherFactory; + + /** * Case implementation which parses and formats strings where tokens are delimited by upper case characters. */ public class UpperCaseDelimitedCase implements Case { - /** Flag to indicate whether the first character of the first token should be upper case. */ - private boolean lowerCaseFirstCharacter = false; + /** + * The tokenizer. + */ + private StringTokenizer tokenizer; + + /** + * The stringifier. + */ + private TokenStringifier stringifier; /** * Constructs a new UpperCaseDelimitedCase instance. */ UpperCaseDelimitedCase(boolean lowerCaseFirstCharacter) { - this.lowerCaseFirstCharacter = lowerCaseFirstCharacter; + tokenizer = new StringTokenizer((String) null, StringMatcherFactory.INSTANCE.uppercaseMatcher()); + tokenizer.setOmitDelimiterMatches(false); + stringifier = new TokenStringifier(TokenFormatterFactory.emptyFormatter(), new PascalTokenFormatter(lowerCaseFirstCharacter)); } /** @@ -47,35 +61,8 @@ public class UpperCaseDelimitedCase implements Case { */ @Override public List parse(String string) { - List tokens = new ArrayList<>(); - if (string == null || string.isEmpty()) { - return tokens; - } - if (lowerCaseFirstCharacter) { - toLowerCase(string.codePointAt(0)); - } else { - toUpperCase(string.codePointAt(0)); - } - int strLen = string.length(); - int[] tokenCodePoints = new int[strLen]; - int tokenCodePointsOffset = 0; - for (int i = 0; i < string.length();) { - final int codePoint = string.codePointAt(i); - if (Character.isUpperCase(codePoint)) { - if (tokenCodePointsOffset > 0) { - tokens.add(new String(tokenCodePoints, 0, tokenCodePointsOffset)); - tokenCodePoints = new int[strLen]; - tokenCodePointsOffset = 0; - } - tokenCodePoints[tokenCodePointsOffset++] = codePoint; - i += Character.charCount(codePoint); - } else { - tokenCodePoints[tokenCodePointsOffset++] = codePoint; - i += Character.charCount(codePoint); - } - } - tokens.add(new String(tokenCodePoints, 0, tokenCodePointsOffset)); - return tokens; + tokenizer.reset(string); + return tokenizer.getTokenList(); } /** @@ -97,72 +84,9 @@ public List parse(String string) { */ @Override public String format(Iterable tokens) { - StringBuilder formattedString = new StringBuilder(); - int tokenIndex = 0; - for (String token : tokens) { - if (token.length() == 0) { - throw new IllegalArgumentException("Unsupported empty token at index " + tokenIndex); - } - for (int i = 0; i < token.length();) { - final int codePoint = token.codePointAt(i); - int codePointFormatted = codePoint; - if (i == 0 && tokenIndex == 0 && lowerCaseFirstCharacter) { - codePointFormatted = toLowerCase(codePoint); - } else if (i == 0) { - codePointFormatted = toUpperCase(codePoint); - } else if (Character.isUpperCase(codePointFormatted) || Character.isTitleCase(codePointFormatted)) { - //if character is title or upper case, it must be converted to lower - codePointFormatted = toLowerCase(codePoint); - } - formattedString.appendCodePoint(codePointFormatted); - i += Character.charCount(codePoint); - } - tokenIndex++; - } - return formattedString.toString(); + stringifier.reset(tokens); + return stringifier.getString(); } - /** - * Transforms a Unicode code point into upper case using {@link java.lang.Character#toUpperCase} and confirms the - * result is upper case. - * - * @param codePoint the code point to upper case - * @return the transformed code point - * @throws IllegalArgumentException if the converted code point cannot be mapped into an upper case character - */ - private static int toUpperCase(int codePoint) { - int codePointFormatted = Character.toUpperCase(codePoint); - if (!Character.isUpperCase(codePointFormatted)) { - throw new IllegalArgumentException(createExceptionMessage(codePoint, " cannot be mapped to upper case")); - } - return codePointFormatted; - } - - /** - * Transforms a Unicode code point into lower case using {@link java.lang.Character#toLowerCase} and confirms the - * result is lower case. - * - * @param codePoint the code point to lower case - * @return the lower case code point that corresponds to the input parameter - * @throws IllegalArgumentException if the converted code point cannot be mapped into a lower case character - */ - private static int toLowerCase(int codePoint) { - int codePointFormatted = Character.toLowerCase(codePoint); - if (!Character.isLowerCase(codePointFormatted)) { - throw new IllegalArgumentException(createExceptionMessage(codePoint, " cannot be mapped to lower case")); - } - return codePointFormatted; - } - - /** - * Creates an exception message that displays the Unicode character as well as the hex value for clarity. - * - * @param codePoint the Unicode code point to transform - * @param suffix a string suffix for the message - * @return the message - */ - private static String createExceptionMessage(int codePoint, String suffix) { - return "Character '" + new String(new int[] { codePoint }, 0, 1) + "' with value 0x" + Integer.toHexString(codePoint) + suffix; - } } diff --git a/src/test/java/org/apache/commons/text/cases/CasesTest.java b/src/test/java/org/apache/commons/text/cases/CasesTest.java index 6d30fe202a..c2f0725511 100644 --- a/src/test/java/org/apache/commons/text/cases/CasesTest.java +++ b/src/test/java/org/apache/commons/text/cases/CasesTest.java @@ -32,22 +32,21 @@ public void testCases() { } @Test - public void testCharacterDelimitedCase() { - Assertions.assertThrows(IllegalArgumentException.class, () -> KebabCase.INSTANCE.format(Arrays.asList("a", "-"))); - Assertions.assertThrows(IllegalArgumentException.class, () -> SnakeCase.INSTANCE.format(Arrays.asList("a", "_"))); - CharacterDelimitedCase nullDelimiters = new CharacterDelimitedCase(); - assertFormat(nullDelimiters, "abc", Arrays.asList("a", "b", "c")); - assertParse(nullDelimiters, "abc", Arrays.asList("abc")); - } - - @Test - public void testKebabCase() { + public void testKebabFormat() { assertFormatAndParse(KebabCase.INSTANCE, "", Arrays.asList()); assertParse(KebabCase.INSTANCE, null, Arrays.asList()); assertFormatAndParse(KebabCase.INSTANCE, "my-Tokens-123-a1", Arrays.asList("my", "Tokens", "123", "a1")); assertFormatAndParse(KebabCase.INSTANCE, "blank--token", Arrays.asList("blank", "", "token")); } + @Test + public void testKebabParse() { + assertParse(KebabCase.INSTANCE, "", Arrays.asList()); + assertParse(KebabCase.INSTANCE, null, Arrays.asList()); + assertParse(KebabCase.INSTANCE, "my-Tokens-123-a1", Arrays.asList("my", "Tokens", "123", "a1")); + assertParse(KebabCase.INSTANCE, "blank--token", Arrays.asList("blank", "", "token")); + } + @Test public void testUtf32() { assertFormatAndParse(KebabCase.INSTANCE, "\uD800\uDF00-\uD800\uDF01\uD800\uDF14-\uD800\uDF02\uD800\uDF03", @@ -63,13 +62,21 @@ public void testUtf32() { } @Test - public void testSnakeCase() { + public void testSnakeFormat() { assertFormatAndParse(SnakeCase.INSTANCE, "", Arrays.asList()); assertParse(SnakeCase.INSTANCE, null, Arrays.asList()); assertFormatAndParse(SnakeCase.INSTANCE, "My_var_NAME__mIXED_a1_c|=+", Arrays.asList("My", "var", "NAME", "", "mIXED", "a1", "c|=+")); assertFormatAndParse(SnakeCase.INSTANCE, "blank__token", Arrays.asList("blank", "", "token")); } + @Test + public void testSnakeParse() { + assertParse(SnakeCase.INSTANCE, "", Arrays.asList()); + assertParse(SnakeCase.INSTANCE, null, Arrays.asList()); + assertParse(SnakeCase.INSTANCE, "My_var_NAME__mIXED_a1_c|=+", Arrays.asList("My", "var", "NAME", "", "mIXED", "a1", "c|=+")); + assertParse(SnakeCase.INSTANCE, "blank__token", Arrays.asList("blank", "", "token")); + } + @Test public void testPascalCase() { @@ -85,6 +92,32 @@ public void testPascalCase() { Assertions.assertThrows(IllegalArgumentException.class, () -> PascalCase.INSTANCE.format(Arrays.asList(""))); } + @Test + public void testPascalFormat() { + + assertFormat(PascalCase.INSTANCE, "MyVarName", Arrays.asList("My", "Var", "Name")); + assertFormat(PascalCase.INSTANCE, "MyVarNameMixedA1C|=+", Arrays.asList("My", "var", "NAME", "mIXED", "a1", "c|=+"), true); + assertFormat(PascalCase.INSTANCE, "", Arrays.asList()); + assertFormat(PascalCase.INSTANCE, "LowerFirst", Arrays.asList("lower", "FIRST")); + + Assertions.assertThrows(IllegalArgumentException.class, () -> + PascalCase.INSTANCE.format(Arrays.asList("1"))); + Assertions.assertThrows(IllegalArgumentException.class, () -> + PascalCase.INSTANCE.format(Arrays.asList("a1", "2c"))); + Assertions.assertThrows(IllegalArgumentException.class, () -> + PascalCase.INSTANCE.format(Arrays.asList("1a"))); + Assertions.assertThrows(IllegalArgumentException.class, () -> + PascalCase.INSTANCE.format(Arrays.asList(""))); + } + + @Test + public void testPascalParse() { + assertParse(PascalCase.INSTANCE, "MyVarName", Arrays.asList("My", "Var", "Name")); + assertParse(PascalCase.INSTANCE, "MyVarNameMixedA1C|=+", Arrays.asList("My", "var", "NAME", "mIXED", "a1", "c|=+"), true); + assertParse(PascalCase.INSTANCE, "", Arrays.asList()); + assertParse(PascalCase.INSTANCE, "lowerFirst", Arrays.asList("lower", "First")); + } + @Test public void testNumberLetters() { @@ -103,7 +136,7 @@ public void testNumberLetters() { } @Test - public void testCamelCase() { + public void testCamelFormat() { assertFormatAndParse(CamelCase.INSTANCE, "", Arrays.asList()); assertFormatAndParse(CamelCase.INSTANCE, "myVarNameMixedA1C|=+", Arrays.asList("My", "var", "NAME", "mIXED", "a1", "c|=+"), true); @@ -119,6 +152,23 @@ public void testCamelCase() { Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.format(Arrays.asList("1a"))); } + @Test + public void testCamelParse() { + + assertParse(CamelCase.INSTANCE, "", Arrays.asList()); + assertParse(CamelCase.INSTANCE, "myVarNameMixedA1C|=+", Arrays.asList("My", "var", "NAME", "mIXED", "a1", "c|=+"), true); + assertParse(CamelCase.INSTANCE, "specChar-Token+", Arrays.asList("spec", "Char-", "Token+")); + + assertParse(CamelCase.INSTANCE, "MyTokens", Arrays.asList("My", "Tokens")); + //assertFormat(CamelCase.INSTANCE, "myTokens", Arrays.asList("My", "Tokens")); + + // empty token not supported + //Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.format(Arrays.asList("a", "b", ""))); + // must begin with character that can be uppercased + //Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.format(Arrays.asList("a", "1b"))); + //Assertions.assertThrows(IllegalArgumentException.class, () -> CamelCase.INSTANCE.format(Arrays.asList("1a"))); + } + @Test public void testConversionsDelimited() { From 1406d30ca52c8e87a37e2edeac5b41c33199769d Mon Sep 17 00:00:00 2001 From: theshoeshiner <2922868+theshoeshiner@users.noreply.github.com> Date: Tue, 28 Nov 2023 18:04:59 -0500 Subject: [PATCH 45/52] throw unsupported from uppercase matcher --- .../org/apache/commons/text/matcher/AbstractStringMatcher.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/apache/commons/text/matcher/AbstractStringMatcher.java b/src/main/java/org/apache/commons/text/matcher/AbstractStringMatcher.java index 5438a46ab9..59ec582865 100644 --- a/src/main/java/org/apache/commons/text/matcher/AbstractStringMatcher.java +++ b/src/main/java/org/apache/commons/text/matcher/AbstractStringMatcher.java @@ -477,7 +477,7 @@ public int isMatch(final CharSequence buffer, final int start, final int bufferS */ @Override public int size() { - return 1; + throw new UnsupportedOperationException("Uppercase Matcher doesn't support size() method"); } } From 674734848ad7fca6db21195582d148f2a196d879 Mon Sep 17 00:00:00 2001 From: theshoeshiner <2922868+theshoeshiner@users.noreply.github.com> Date: Tue, 28 Nov 2023 18:05:07 -0500 Subject: [PATCH 46/52] licenses --- .../org/apache/commons/text/TokenFormatter.java | 16 ++++++++++++++++ .../commons/text/TokenFormatterFactory.java | 16 ++++++++++++++++ .../apache/commons/text/TokenStringifier.java | 16 ++++++++++++++++ 3 files changed, 48 insertions(+) diff --git a/src/main/java/org/apache/commons/text/TokenFormatter.java b/src/main/java/org/apache/commons/text/TokenFormatter.java index 5dd90613c0..45609e837e 100644 --- a/src/main/java/org/apache/commons/text/TokenFormatter.java +++ b/src/main/java/org/apache/commons/text/TokenFormatter.java @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.commons.text; public interface TokenFormatter { diff --git a/src/main/java/org/apache/commons/text/TokenFormatterFactory.java b/src/main/java/org/apache/commons/text/TokenFormatterFactory.java index e3789a0077..5314c33006 100644 --- a/src/main/java/org/apache/commons/text/TokenFormatterFactory.java +++ b/src/main/java/org/apache/commons/text/TokenFormatterFactory.java @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.commons.text; import org.apache.commons.lang3.StringUtils; diff --git a/src/main/java/org/apache/commons/text/TokenStringifier.java b/src/main/java/org/apache/commons/text/TokenStringifier.java index 424b277e6b..ee4e902de1 100644 --- a/src/main/java/org/apache/commons/text/TokenStringifier.java +++ b/src/main/java/org/apache/commons/text/TokenStringifier.java @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.commons.text; /** From 61bf2e5046e9027b45b8df6b0cc35e13b8d2e3f4 Mon Sep 17 00:00:00 2001 From: theshoeshiner <2922868+theshoeshiner@users.noreply.github.com> Date: Wed, 29 Nov 2023 11:42:23 -0500 Subject: [PATCH 47/52] dont need to check when there is not enough room left for constant --- .../java/org/apache/commons/text/TokenFormatterFactory.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/apache/commons/text/TokenFormatterFactory.java b/src/main/java/org/apache/commons/text/TokenFormatterFactory.java index 5314c33006..77fe197bcc 100644 --- a/src/main/java/org/apache/commons/text/TokenFormatterFactory.java +++ b/src/main/java/org/apache/commons/text/TokenFormatterFactory.java @@ -71,7 +71,8 @@ public ConstantTokenFormatter(char[] constant, boolean failOnConstantFound) { @Override public String format(char[] prior, int tokenIndex, char[] token) { if (failOnConstantFound) { - for (int i = 0; i < token.length; i++) { + int end = token.length - (constant.length - 1); + for (int i = 0; i < end; i++) { boolean match = false; int t = i; for (int j = 0; j < constant.length; j++) { From 05422f1048e96b871733bca25a63edd513003f73 Mon Sep 17 00:00:00 2001 From: theshoeshiner <2922868+theshoeshiner@users.noreply.github.com> Date: Wed, 29 Nov 2023 11:42:42 -0500 Subject: [PATCH 48/52] tests for token factory and stringifier --- .../text/TokenFormatterFactoryTest.java | 55 +++++++++++++++++++ .../commons/text/TokenStringifierTest.java | 25 +++++++++ 2 files changed, 80 insertions(+) create mode 100644 src/test/java/org/apache/commons/text/TokenFormatterFactoryTest.java create mode 100644 src/test/java/org/apache/commons/text/TokenStringifierTest.java diff --git a/src/test/java/org/apache/commons/text/TokenFormatterFactoryTest.java b/src/test/java/org/apache/commons/text/TokenFormatterFactoryTest.java new file mode 100644 index 0000000000..dc94ae6224 --- /dev/null +++ b/src/test/java/org/apache/commons/text/TokenFormatterFactoryTest.java @@ -0,0 +1,55 @@ +package org.apache.commons.text; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import org.apache.commons.text.TokenFormatterFactory.ConstantTokenFormatter; +import org.apache.commons.text.TokenFormatterFactory.NoOpFormatter; +import org.junit.jupiter.api.Test; + +public class TokenFormatterFactoryTest { + + @Test + public void testConstantTokenFormatterFailOnConstant() { + ConstantTokenFormatter formatter = TokenFormatterFactory.constantFormatter("abc".toCharArray(), true); + assertThrows(IllegalArgumentException.class, () -> formatter.format(null, 0, "dabc".toCharArray())); + assertThrows(IllegalArgumentException.class, () -> formatter.format(null, 0, "abc".toCharArray())); + assertThrows(IllegalArgumentException.class, () -> formatter.format(null, 0, "abcd".toCharArray())); + + ConstantTokenFormatter unicode = TokenFormatterFactory.constantFormatter("\uD801\uDC00".toCharArray(), true); + assertThrows(IllegalArgumentException.class, () -> unicode.format(null, 0, "\uD801\uDC00".toCharArray())); + assertThrows(IllegalArgumentException.class, () -> unicode.format(null, 0, "a\uD801\uDC00".toCharArray())); + assertThrows(IllegalArgumentException.class, () -> unicode.format(null, 0, "\uD801\uDC00b".toCharArray())); + } + + @Test + public void testConstantTokenFormatter() { + ConstantTokenFormatter formatter = TokenFormatterFactory.constantFormatter("abc".toCharArray(), false); + assertEquals("abc", formatter.format(null, 0, new char[0])); + assertEquals("abc", formatter.format(null, 0, "abc".toCharArray())); + assertEquals("abc", formatter.format(null, 0, "abdc".toCharArray())); + assertEquals("abc", formatter.format(null, 0, "".toCharArray())); + + formatter = TokenFormatterFactory.constantFormatter("\uD801\uDC00".toCharArray(), true); + assertEquals("\uD801\uDC00", formatter.format(null, 0, new char[0])); + assertEquals("\uD801\uDC00", formatter.format(null, 0, "abc".toCharArray())); + } + + @Test + public void testNoOpFormatter() { + NoOpFormatter formatter = TokenFormatterFactory.noOpFormatter(); + assertEquals("\uD801\uDC00", formatter.format(null, 0, "\uD801\uDC00".toCharArray())); + assertEquals("\uD801\uDC00a", formatter.format(null, 0, "\uD801\uDC00a".toCharArray())); + assertEquals("abc", formatter.format(null, 0, "abc".toCharArray())); + assertEquals("", formatter.format(null, 0, "".toCharArray())); + } + + @Test + public void testEmptyFormatter() { + ConstantTokenFormatter formatter = TokenFormatterFactory.emptyFormatter(); + assertEquals("", formatter.format(null, 0, "\uD801\uDC00".toCharArray())); + assertEquals("", formatter.format(null, 0, "\uD801\uDC00a".toCharArray())); + assertEquals("", formatter.format(null, 0, "abc".toCharArray())); + assertEquals("", formatter.format(null, 0, "".toCharArray())); + } +} diff --git a/src/test/java/org/apache/commons/text/TokenStringifierTest.java b/src/test/java/org/apache/commons/text/TokenStringifierTest.java new file mode 100644 index 0000000000..30f49df9cf --- /dev/null +++ b/src/test/java/org/apache/commons/text/TokenStringifierTest.java @@ -0,0 +1,25 @@ +package org.apache.commons.text; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.util.Arrays; +import java.util.List; + +import org.junit.jupiter.api.Test; + +public class TokenStringifierTest { + + @Test + public void testTokenStringifier() { + TokenStringifier stringifier = new TokenStringifier(TokenFormatterFactory.constantFormatter(',', true), TokenFormatterFactory.noOpFormatter()); + List tokens = Arrays.asList(new String[]{"my", "csv", "tokens"}); + stringifier.reset(tokens); + String csv = stringifier.getString(); + assertEquals("my,csv,tokens", csv); + //double check that csv tokenizer can read the csv string + StringTokenizer csvTokenizer = StringTokenizer.getCSVInstance(csv); + csvTokenizer.reset(csv); + List tokenizerTokens = csvTokenizer.getTokenList(); + assertEquals(tokens, tokenizerTokens); + } +} From 58f05e31b51dd93072ea2e56e88191d06b9d1d63 Mon Sep 17 00:00:00 2001 From: theshoeshiner <2922868+theshoeshiner@users.noreply.github.com> Date: Wed, 29 Nov 2023 11:46:23 -0500 Subject: [PATCH 49/52] formatting --- src/main/java/org/apache/commons/text/cases/CamelCase.java | 1 - src/main/java/org/apache/commons/text/cases/Cases.java | 1 + .../org/apache/commons/text/matcher/AbstractStringMatcher.java | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/apache/commons/text/cases/CamelCase.java b/src/main/java/org/apache/commons/text/cases/CamelCase.java index 27e6494be8..2e68d000b8 100644 --- a/src/main/java/org/apache/commons/text/cases/CamelCase.java +++ b/src/main/java/org/apache/commons/text/cases/CamelCase.java @@ -32,7 +32,6 @@ public final class CamelCase extends UpperCaseDelimitedCase { /** * Constructs new CamelCase instance. */ - private CamelCase() { super(true); } diff --git a/src/main/java/org/apache/commons/text/cases/Cases.java b/src/main/java/org/apache/commons/text/cases/Cases.java index 1ca7d29f0e..b8b8479ce7 100644 --- a/src/main/java/org/apache/commons/text/cases/Cases.java +++ b/src/main/java/org/apache/commons/text/cases/Cases.java @@ -32,6 +32,7 @@ public class Cases { /** * Utility method for converting between cases. + * * @param string the cased string to parse * @param from the case of the existing string * @param to the case to convert to diff --git a/src/main/java/org/apache/commons/text/matcher/AbstractStringMatcher.java b/src/main/java/org/apache/commons/text/matcher/AbstractStringMatcher.java index 59ec582865..643e5dcea0 100644 --- a/src/main/java/org/apache/commons/text/matcher/AbstractStringMatcher.java +++ b/src/main/java/org/apache/commons/text/matcher/AbstractStringMatcher.java @@ -449,7 +449,7 @@ static final class UppercaseMatcher extends AbstractStringMatcher { * @param start the starting position for the match, valid for buffer * @param bufferStart unused * @param bufferEnd unused - * @return The number of matching characters, zero for no match + * @return the number of matching characters, zero for no match */ @Override public int isMatch(char[] buffer, int start, int bufferStart, int bufferEnd) { From 5481f1d67f17bb8681360e7b0834e3c1b125fcc9 Mon Sep 17 00:00:00 2001 From: theshoeshiner <2922868+theshoeshiner@users.noreply.github.com> Date: Wed, 29 Nov 2023 11:46:30 -0500 Subject: [PATCH 50/52] spelling --- .../java/org/apache/commons/text/TokenFormatterFactory.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/apache/commons/text/TokenFormatterFactory.java b/src/main/java/org/apache/commons/text/TokenFormatterFactory.java index 77fe197bcc..b693301527 100644 --- a/src/main/java/org/apache/commons/text/TokenFormatterFactory.java +++ b/src/main/java/org/apache/commons/text/TokenFormatterFactory.java @@ -104,12 +104,12 @@ public void setFailOnConstantFound(boolean checkTokenForConstant) { } /** - * Reuseable NoOpFormatter instance. + * Reusable NoOpFormatter instance. */ private static final NoOpFormatter NOOP_FORMATTER = new NoOpFormatter(); /** - * Reuseable Empty String formatter instance. + * Reusable Empty String formatter instance. */ private static final ConstantTokenFormatter EMPTY_STRING_FORMATTER = new ConstantTokenFormatter(StringUtils.EMPTY, false); From 25236f19d549d8d3a3d4e1d9efc235e8f291e393 Mon Sep 17 00:00:00 2001 From: theshoeshiner <2922868+theshoeshiner@users.noreply.github.com> Date: Wed, 29 Nov 2023 11:49:04 -0500 Subject: [PATCH 51/52] licenses --- .../commons/text/TokenFormatterFactoryTest.java | 16 ++++++++++++++++ .../commons/text/TokenStringifierTest.java | 16 ++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/src/test/java/org/apache/commons/text/TokenFormatterFactoryTest.java b/src/test/java/org/apache/commons/text/TokenFormatterFactoryTest.java index dc94ae6224..787e0f7c0c 100644 --- a/src/test/java/org/apache/commons/text/TokenFormatterFactoryTest.java +++ b/src/test/java/org/apache/commons/text/TokenFormatterFactoryTest.java @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.commons.text; import static org.junit.jupiter.api.Assertions.assertEquals; diff --git a/src/test/java/org/apache/commons/text/TokenStringifierTest.java b/src/test/java/org/apache/commons/text/TokenStringifierTest.java index 30f49df9cf..33d7dc8ae6 100644 --- a/src/test/java/org/apache/commons/text/TokenStringifierTest.java +++ b/src/test/java/org/apache/commons/text/TokenStringifierTest.java @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.commons.text; import static org.junit.jupiter.api.Assertions.assertEquals; From f91a1a22e368c13a45f2303fc860e68811cdaafe Mon Sep 17 00:00:00 2001 From: theshoeshiner Date: Wed, 20 Mar 2024 09:34:12 -0400 Subject: [PATCH 52/52] corrected merge --- .../text/matcher/AbstractStringMatcher.java | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/src/main/java/org/apache/commons/text/matcher/AbstractStringMatcher.java b/src/main/java/org/apache/commons/text/matcher/AbstractStringMatcher.java index cf9dcab432..8f055ad73c 100644 --- a/src/main/java/org/apache/commons/text/matcher/AbstractStringMatcher.java +++ b/src/main/java/org/apache/commons/text/matcher/AbstractStringMatcher.java @@ -428,6 +428,59 @@ public int size() { } } + /** + * Matches Uppercase characters as determined by {@link java.lang.Character#isUpperCase(int)} + *

+ * Thread=safe. + *

+ */ + static final class UppercaseMatcher extends AbstractStringMatcher { + + /** + * Constructs a new instance of {@code UppercaseMatcher}. + */ + UppercaseMatcher() { + } + + /** + * Returns {@code 1} if there is a match, or {@code 0} if there is no match. + * + * @param buffer the text content to match against, do not change + * @param start the starting position for the match, valid for buffer + * @param bufferStart unused + * @param bufferEnd unused + * @return the number of matching characters, zero for no match + */ + @Override + public int isMatch(char[] buffer, int start, int bufferStart, int bufferEnd) { + int codePoint = Character.codePointAt(buffer, start); + return Character.isUpperCase(codePoint) ? Character.charCount(codePoint) : 0; + } + + /** + * Returns {@code 1} if there is a match, or {@code 0} if there is no match. + * + * @param buffer the text content to match against, do not change + * @param start the starting position for the match, valid for buffer + * @param bufferStart unused + * @param bufferEnd unused + * @return The number of matching characters, zero for no match + */ + @Override + public int isMatch(final CharSequence buffer, final int start, final int bufferStart, final int bufferEnd) { + int codePoint = Character.codePointAt(buffer, start); + return Character.isUpperCase(codePoint) ? Character.charCount(codePoint) : 0; + } + + /** + * Returns 1. + */ + @Override + public int size() { + throw new UnsupportedOperationException("Uppercase Matcher doesn't support size() method"); + } + } + /** * Constructs a new instance. */