From cf8770749a6200881b0e04569d9bf431fa09a81c Mon Sep 17 00:00:00 2001 From: sagar7162 Date: Wed, 18 Mar 2026 15:34:14 +0530 Subject: [PATCH] feat: add `string/base/distances/hamming-code-points` --- type: pre_commit_static_analysis_report description: Results of running static analysis checks when committing changes. report: - task: lint_filenames status: passed - task: lint_editorconfig status: passed - task: lint_markdown status: passed - task: lint_package_json status: passed - task: lint_repl_help status: passed - task: lint_javascript_src status: passed - task: lint_javascript_cli status: na - task: lint_javascript_examples status: passed - task: lint_javascript_tests status: passed - task: lint_javascript_benchmarks status: passed - task: lint_python status: na - task: lint_r status: na - task: lint_c_src status: na - task: lint_c_examples status: na - task: lint_c_benchmarks status: na - task: lint_c_tests_fixtures status: na - task: lint_shell status: na - task: lint_typescript_declarations status: passed - task: lint_typescript_tests status: passed - task: lint_license_headers status: passed --- --- .../distances/hamming-code-points/README.md | 128 ++++++++++++++ .../benchmark/benchmark.js | 67 ++++++++ .../hamming-code-points/docs/repl.txt | 37 ++++ .../hamming-code-points/docs/types/index.d.ts | 49 ++++++ .../hamming-code-points/docs/types/test.ts | 60 +++++++ .../hamming-code-points/examples/index.js | 42 +++++ .../hamming-code-points/lib/index.js | 47 ++++++ .../distances/hamming-code-points/lib/main.js | 127 ++++++++++++++ .../hamming-code-points/package.json | 73 ++++++++ .../hamming-code-points/test/test.js | 159 ++++++++++++++++++ 10 files changed, 789 insertions(+) create mode 100644 lib/node_modules/@stdlib/string/base/distances/hamming-code-points/README.md create mode 100644 lib/node_modules/@stdlib/string/base/distances/hamming-code-points/benchmark/benchmark.js create mode 100644 lib/node_modules/@stdlib/string/base/distances/hamming-code-points/docs/repl.txt create mode 100644 lib/node_modules/@stdlib/string/base/distances/hamming-code-points/docs/types/index.d.ts create mode 100644 lib/node_modules/@stdlib/string/base/distances/hamming-code-points/docs/types/test.ts create mode 100644 lib/node_modules/@stdlib/string/base/distances/hamming-code-points/examples/index.js create mode 100644 lib/node_modules/@stdlib/string/base/distances/hamming-code-points/lib/index.js create mode 100644 lib/node_modules/@stdlib/string/base/distances/hamming-code-points/lib/main.js create mode 100644 lib/node_modules/@stdlib/string/base/distances/hamming-code-points/package.json create mode 100644 lib/node_modules/@stdlib/string/base/distances/hamming-code-points/test/test.js diff --git a/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/README.md b/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/README.md new file mode 100644 index 000000000000..b44a57ef76ad --- /dev/null +++ b/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/README.md @@ -0,0 +1,128 @@ + + +# hammingDistanceCodePoints + +> Calculate the [Hamming distance][hamming-distance] between two equal-length strings by comparing Unicode code points. + + + +
+ +## Usage + +```javascript +var hammingDistanceCodePoints = require( '@stdlib/string/base/distances/hamming-code-points' ); +``` + +#### hammingDistanceCodePoints( s1, s2 ) + +Calculates the [Hamming distance][hamming-distance] between two equal-length strings by comparing Unicode code points. + +```javascript +var dist = hammingDistanceCodePoints( 'frog', 'from' ); +// returns 1 + +dist = hammingDistanceCodePoints( 'tooth', 'froth' ); +// returns 2 + +dist = hammingDistanceCodePoints( 'cat', 'cot' ); +// returns 1 + +dist = hammingDistanceCodePoints( '', '' ); +// returns 0 + +// Emoji are treated as single Unicode code points: +dist = hammingDistanceCodePoints( 'πŸ‘‹', '🌍' ); +// returns 1 + +dist = hammingDistanceCodePoints( 'aπŸ‘‹b', 'c🌍d' ); +// returns 3 +``` + +
+ + + + + +
+ +## Notes + +- If the two strings differ in the number of Unicode code points, the [Hamming distance][hamming-distance] is not defined. Consequently, when provided two input strings with an unequal number of Unicode code points, the function returns a sentinel value of `-1`. +- Unlike the UTF-16 code unit implementation in `@stdlib/string/base/distances/hamming`, this function iterates over **Unicode code points** rather than UTF-16 code units. This means surrogate pairs (used to encode characters outside the Basic Multilingual Plane, such as most emoji) are treated as a single unit of comparison. For example, the emoji `'πŸ‘‹'` (U+1F44B) is encoded as a UTF-16 surrogate pair `\uD83D\uDC4B` and has a `String.length` of `2`, but this function treats it as a single code point. +- The function is **not** grapheme-cluster aware. Characters composed of multiple Unicode code points (e.g., family emoji built from multiple code points joined by Zero Width Joiners, or letters with combining diacritical marks) are treated as multiple code points. + +
+ + + + + +
+ +## Examples + +```javascript +var hammingDistanceCodePoints = require( '@stdlib/string/base/distances/hamming-code-points' ); + +var dist = hammingDistanceCodePoints( 'algorithms', 'altruistic' ); +// returns 7 + +dist = hammingDistanceCodePoints( 'elephant', 'hippopod' ); +// returns 7 + +dist = hammingDistanceCodePoints( 'javascript', 'typescript' ); +// returns 4 + +dist = hammingDistanceCodePoints( 'hamming', 'ladybug' ); +// returns 5 + +// Emoji strings (each emoji = 1 Unicode code point): +dist = hammingDistanceCodePoints( 'πŸ‘‹πŸŒπŸŽ‰', 'πŸŒŸπŸ’«βœ¨' ); +// returns 3 + +// Mixed ASCII and emoji: +dist = hammingDistanceCodePoints( 'helloπŸ‘‹', 'hallo🌍' ); +// returns 2 +``` + +
+ + + + + + + + + + + + + + diff --git a/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/benchmark/benchmark.js b/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/benchmark/benchmark.js new file mode 100644 index 000000000000..8127ce36c17f --- /dev/null +++ b/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/benchmark/benchmark.js @@ -0,0 +1,67 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +// MODULES // + +var bench = require( '@stdlib/bench' ); +var pkg = require( './../package.json' ).name; +var hammingDistanceCodePoints = require( './../lib' ); + + +// MAIN // + +bench( pkg, function benchmark( b ) { + var values; + var value; + var out; + var i; + + values = [ + [ 'algorithms', 'altruistic' ], + [ '1638452297', '4444884447' ], + [ '', '' ], + [ 'z', 'a' ], + [ 'aaappppk', 'aardvark' ], + [ 'frog', 'flog' ], + [ 'fly', 'ant' ], + [ 'elephant', 'hippopod' ], + [ 'hippopod', 'elephant' ], + [ 'hippo', 'zzzzz' ], + [ 'hello', 'hallo' ], + [ 'πŸ‘‹πŸŒπŸŽ‰', 'πŸŒŸπŸ’«βœ¨' ], + [ 'aπŸ‘‹b', 'c🌍d' ], + [ 'congratulations', 'conmgeautlatins' ] + ]; + + b.tic(); + for ( i = 0; i < b.iterations; i++ ) { + value = values[ i%values.length ]; + out = hammingDistanceCodePoints( value[0], value[1] ); + if ( typeof out !== 'number' ) { + b.fail( 'should return a number' ); + } + } + b.toc(); + if ( typeof out !== 'number' ) { + b.fail( 'should return a number' ); + } + b.pass( 'benchmark finished' ); + b.end(); +}); diff --git a/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/docs/repl.txt b/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/docs/repl.txt new file mode 100644 index 000000000000..21c3bab27dff --- /dev/null +++ b/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/docs/repl.txt @@ -0,0 +1,37 @@ + +{{alias}}( s1, s2 ) + Calculates the Hamming distance between two equal-length strings by + comparing Unicode code points. + + The function returns a sentinel value of -1 if the two input strings differ + in the number of Unicode code points. + + Parameters + ---------- + s1: string + First input string. + + s2: string + Second input string. + + Returns + ------- + out: number + Hamming distance. + + Examples + -------- + > var d = {{alias}}( 'algorithms', 'altruistic' ) + 7 + > d = {{alias}}( 'elephant', 'hippopod' ) + 7 + > d = {{alias}}( 'javascript', 'typescript' ) + 4 + > d = {{alias}}( 'πŸ‘‹', '🌍' ) + 1 + > d = {{alias}}( 'aπŸ‘‹', 'b🌍' ) + 2 + + See Also + -------- + diff --git a/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/docs/types/index.d.ts b/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/docs/types/index.d.ts new file mode 100644 index 000000000000..08e1642c472d --- /dev/null +++ b/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/docs/types/index.d.ts @@ -0,0 +1,49 @@ +/* +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +// TypeScript Version: 4.1 + +/** +* Calculates the Hamming distance between two equal-length strings by comparing Unicode code points. +* +* ## Notes +* +* - The function returns a sentinel value of `-1` if the two input strings differ in the number of Unicode code points. +* +* @param str1 - first input string +* @param str2 - second input string +* @returns Hamming distance +* +* @example +* var dist = hammingDistanceCodePoints( 'fly', 'ant' ); +* // returns 3 +* +* @example +* var dist = hammingDistanceCodePoints( 'πŸ‘‹', '🌍' ); +* // returns 1 +* +* @example +* var dist = hammingDistanceCodePoints( 'algorithms', 'altruistic' ); +* // returns 7 +*/ +declare function hammingDistanceCodePoints( str1: string, str2: string ): number; + + +// EXPORTS // + +export = hammingDistanceCodePoints; diff --git a/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/docs/types/test.ts b/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/docs/types/test.ts new file mode 100644 index 000000000000..5af55069dbd8 --- /dev/null +++ b/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/docs/types/test.ts @@ -0,0 +1,60 @@ +/* +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +import hammingDistanceCodePoints = require( './index' ); + + +// TESTS // + +// The function returns a number... +{ + hammingDistanceCodePoints( '', '' ); // $ExpectType number + hammingDistanceCodePoints( 'fly', 'ant' ); // $ExpectType number + hammingDistanceCodePoints( 'πŸ‘‹', '🌍' ); // $ExpectType number +} + +// The compiler throws an error if the function is provided a first argument which is not a string... +{ + hammingDistanceCodePoints( true, '' ); // $ExpectError + hammingDistanceCodePoints( false, '' ); // $ExpectError + hammingDistanceCodePoints( null, '' ); // $ExpectError + hammingDistanceCodePoints( undefined, '' ); // $ExpectError + hammingDistanceCodePoints( 5, '' ); // $ExpectError + hammingDistanceCodePoints( [], '' ); // $ExpectError + hammingDistanceCodePoints( {}, '' ); // $ExpectError + hammingDistanceCodePoints( ( x: number ): number => x, '' ); // $ExpectError +} + +// The compiler throws an error if the function is provided a second argument which is not a string... +{ + hammingDistanceCodePoints( '', true ); // $ExpectError + hammingDistanceCodePoints( '', false ); // $ExpectError + hammingDistanceCodePoints( '', null ); // $ExpectError + hammingDistanceCodePoints( '', undefined ); // $ExpectError + hammingDistanceCodePoints( '', 5 ); // $ExpectError + hammingDistanceCodePoints( '', [] ); // $ExpectError + hammingDistanceCodePoints( '', {} ); // $ExpectError + hammingDistanceCodePoints( '', ( x: number ): number => x ); // $ExpectError +} + +// The compiler throws an error if the function is provided an unsupported number of arguments... +{ + hammingDistanceCodePoints(); // $ExpectError + hammingDistanceCodePoints( '' ); // $ExpectError + hammingDistanceCodePoints( '', '', 3 ); // $ExpectError +} diff --git a/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/examples/index.js b/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/examples/index.js new file mode 100644 index 000000000000..93768e90297e --- /dev/null +++ b/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/examples/index.js @@ -0,0 +1,42 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +var hammingDistanceCodePoints = require( './../lib' ); + +console.log( hammingDistanceCodePoints( 'algorithms', 'altruistic' ) ); +// => 7 + +console.log( hammingDistanceCodePoints( 'elephant', 'hippopod' ) ); +// => 7 + +console.log( hammingDistanceCodePoints( 'javascript', 'typescript' ) ); +// => 4 + +// All emoji strings: +console.log( hammingDistanceCodePoints( 'πŸ‘‹πŸŒπŸŽ‰', 'πŸŒŸπŸ’«βœ¨' ) ); +// => 3 + +// Mixed ASCII and emoji strings: +console.log( hammingDistanceCodePoints( 'aπŸ‘‹b', 'c🌍d' ) ); +// => 3 + +// Unequal code-point lengths return -1: +console.log( hammingDistanceCodePoints( 'a', 'abcissa' ) ); +// => -1 diff --git a/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/lib/index.js b/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/lib/index.js new file mode 100644 index 000000000000..543d84b71f5c --- /dev/null +++ b/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/lib/index.js @@ -0,0 +1,47 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +/** +* Calculate the Hamming distance between two equal-length strings by comparing Unicode code points. +* +* @module @stdlib/string/base/distances/hamming-code-points +* +* @example +* var hammingDistanceCodePoints = require( '@stdlib/string/base/distances/hamming-code-points' ); +* +* var dist = hammingDistanceCodePoints( 'fly', 'ant' ); +* // returns 3 +* +* dist = hammingDistanceCodePoints( 'frog', 'blog' ); +* // returns 2 +* +* dist = hammingDistanceCodePoints( 'πŸ‘‹', '🌍' ); +* // returns 1 +*/ + + +// MODULES // + +var main = require( './main.js' ); + + +// EXPORTS // + +module.exports = main; diff --git a/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/lib/main.js b/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/lib/main.js new file mode 100644 index 000000000000..c806d8d4e4de --- /dev/null +++ b/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/lib/main.js @@ -0,0 +1,127 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +// MODULES // + +var isString = require( '@stdlib/assert/is-string' ).isPrimitive; +var format = require( '@stdlib/string/format' ); + + +// VARIABLES // + +var RE_UTF16_LOW_SURROGATE = /[\uDC00-\uDFFF]/; // TODO: replace with stdlib pkg +var RE_UTF16_HIGH_SURROGATE = /[\uD800-\uDBFF]/; // TODO: replace with stdlib pkg + + +// MAIN // + +/** +* Calculates the Hamming distance between two equal-length strings by comparing Unicode code points. +* +* ## Notes +* +* - The function returns a sentinel value of `-1` if the two input strings differ in the number of Unicode code points. +* +* @param {string} s1 - first input string +* @param {string} s2 - second input string +* @throws {TypeError} first argument must be a string +* @throws {TypeError} second argument must be a string +* @returns {integer} Hamming distance +* +* @example +* var dist = hammingDistanceCodePoints( 'fly', 'ant' ); +* // returns 3 +* +* @example +* var dist = hammingDistanceCodePoints( 'πŸ‘‹', '🌍' ); +* // returns 1 +* +* @example +* var dist = hammingDistanceCodePoints( 'aπŸ‘‹', 'b🌍' ); +* // returns 2 +*/ +function hammingDistanceCodePoints( s1, s2 ) { + var out; + var cp1; + var cp2; + var ch1; + var ch2; + var n1; + var n2; + var i1; + var i2; + + if ( !isString( s1 ) ) { + throw new TypeError( format( 'invalid argument. First argument must be a string. Value: `%s`.', s1 ) ); + } + if ( !isString( s2 ) ) { + throw new TypeError( format( 'invalid argument. Second argument must be a string. Value: `%s`.', s2 ) ); + } + n1 = s1.length; + n2 = s2.length; + + out = 0; + i1 = 0; + i2 = 0; + + // Simultaneously iterate over both strings one Unicode code point at a time + while ( i1 < n1 && i2 < n2 ) { + // Extract the next code point from s1, handling UTF-16 surrogate pairs + ch1 = s1[ i1 ]; + cp1 = ch1; + if ( i1 < n1-1 && RE_UTF16_HIGH_SURROGATE.test( ch1 ) ) { + ch2 = s1[ i1+1 ]; + if ( RE_UTF16_LOW_SURROGATE.test( ch2 ) ) { + // We found a surrogate pair; treat as a single code point: + cp1 += ch2; + i1 += 1; + } + } + i1 += 1; + + // Extract the next code point from s2, handling UTF-16 surrogate pairs + ch1 = s2[ i2 ]; + cp2 = ch1; + if ( i2 < n2-1 && RE_UTF16_HIGH_SURROGATE.test( ch1 ) ) { + ch2 = s2[ i2+1 ]; + if ( RE_UTF16_LOW_SURROGATE.test( ch2 ) ) { + // We found a surrogate pair; treat as a single code point: + cp2 += ch2; + i2 += 1; + } + } + i2 += 1; + + // Compare the extracted code points: + if ( cp1 !== cp2 ) { + out += 1; + } + } + // If either string has remaining code points, the strings have unequal lengths: + if ( i1 < n1 || i2 < n2 ) { + return -1; + } + return out; +} + + +// EXPORTS // + +module.exports = hammingDistanceCodePoints; diff --git a/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/package.json b/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/package.json new file mode 100644 index 000000000000..cfa8fa5f6d6a --- /dev/null +++ b/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/package.json @@ -0,0 +1,73 @@ +{ + "name": "@stdlib/string/base/distances/hamming-code-points", + "version": "0.0.0", + "description": "Calculate the Hamming distance between two equal-length strings by comparing Unicode code points.", + "license": "Apache-2.0", + "author": { + "name": "The Stdlib Authors", + "url": "https://github.com/stdlib-js/stdlib/graphs/contributors" + }, + "contributors": [ + { + "name": "The Stdlib Authors", + "url": "https://github.com/stdlib-js/stdlib/graphs/contributors" + } + ], + "main": "./lib", + "directories": { + "benchmark": "./benchmark", + "doc": "./docs", + "example": "./examples", + "lib": "./lib", + "test": "./test" + }, + "types": "./docs/types", + "scripts": {}, + "homepage": "https://github.com/stdlib-js/stdlib", + "repository": { + "type": "git", + "url": "git://github.com/stdlib-js/stdlib.git" + }, + "bugs": { + "url": "https://github.com/stdlib-js/stdlib/issues" + }, + "dependencies": {}, + "devDependencies": {}, + "engines": { + "node": ">=0.10.0", + "npm": ">2.7.0" + }, + "os": [ + "aix", + "darwin", + "freebsd", + "linux", + "macos", + "openbsd", + "sunos", + "win32", + "windows" + ], + "keywords": [ + "stdlib", + "stdstring", + "utilities", + "utility", + "utils", + "util", + "base", + "string", + "str", + "distances", + "distance", + "hamming", + "edit", + "unicode", + "code", + "point", + "codepoint", + "emoji", + "surrogate" + ], + "__stdlib__": {} +} diff --git a/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/test/test.js b/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/test/test.js new file mode 100644 index 000000000000..91c1981a2150 --- /dev/null +++ b/lib/node_modules/@stdlib/string/base/distances/hamming-code-points/test/test.js @@ -0,0 +1,159 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2026 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +// MODULES // + +var tape = require( 'tape' ); +var hammingDistanceCodePoints = require( './../lib' ); + + +// TESTS // + +tape( 'main export is a function', function test( t ) { + t.ok( true, __filename ); + t.strictEqual( typeof hammingDistanceCodePoints, 'function', 'main export is a function' ); + t.end(); +}); + +tape( 'the function throws an error if not provided a string as its first argument', function test( t ) { + var values; + var i; + + values = [ + 5, + NaN, + true, + false, + null, + void 0, + [], + {}, + function noop() {} + ]; + + for ( i = 0; i < values.length; i++ ) { + t.throws( badValue( values[i] ), TypeError, 'throws an error when provided '+values[i] ); + } + t.end(); + + function badValue( value ) { + return function badValue() { + hammingDistanceCodePoints( value, 'foo' ); + }; + } +}); + +tape( 'the function throws an error if not provided a string as its second argument', function test( t ) { + var values; + var i; + + values = [ + 5, + NaN, + true, + false, + null, + void 0, + [], + {}, + function noop() {} + ]; + + for ( i = 0; i < values.length; i++ ) { + t.throws( badValue( values[i] ), TypeError, 'throws an error when provided '+values[i] ); + } + t.end(); + + function badValue( value ) { + return function badValue() { + hammingDistanceCodePoints( 'foo', value ); + }; + } +}); + +tape( 'the function returns -1 as a sentinel value if provided strings with an unequal number of Unicode code points', function test( t ) { + t.strictEqual( hammingDistanceCodePoints( 'length', 'differs' ), -1, 'returns expected value' ); + t.strictEqual( hammingDistanceCodePoints( 'a', 'ab' ), -1, 'returns expected value' ); + t.strictEqual( hammingDistanceCodePoints( 'πŸ‘‹', 'ab' ), -1, 'returns expected value' ); + t.strictEqual( hammingDistanceCodePoints( 'abc', 'πŸ‘‹' ), -1, 'returns expected value' ); + t.end(); +}); + +tape( 'the function calculates the Hamming distance between two equal-length strings (ASCII)', function test( t ) { + var expected; + var values; + var i; + + values = [ + [ '1638452297', '4444884442' ], // 10 + [ '', '' ], // 0 + [ 'a', 'a' ], // 0 + [ 'a', 'b' ], // 1 + [ 'xy', 'xy' ], // 0 + [ 'xx', 'xy' ], // 1 + [ 'frog', 'blog' ], // 2 + [ 'fly', 'ant' ], // 3 + [ 'hello', 'hallo' ], // 1 + [ 'congratulations', 'conmgeautlatins' ] // 9 + ]; + + expected = [ 10, 0, 0, 1, 0, 1, 2, 3, 1, 9 ]; + + for ( i = 0; i < values.length; i++ ) { + t.strictEqual( hammingDistanceCodePoints( values[i][0], values[i][1] ), expected[i], 'returns expected value' ); + } + t.end(); +}); + +tape( 'the function calculates the Hamming distance between two equal-length strings (emoji / surrogate pairs)', function test( t ) { + var expected; + var values; + var i; + + // Each emoji is a single Unicode code point encoded as a UTF-16 surrogate pair. + values = [ + [ 'πŸ‘‹', 'πŸ‘‹' ], // same emoji: 0 + [ 'πŸ‘‹', '🌍' ], // different emoji: 1 + [ 'πŸ‘‹πŸŒ', 'πŸŒπŸ‘‹' ], // swapped pair: 2 + [ 'πŸ‘‹πŸŒπŸŽ‰', 'πŸŒŸπŸ’«βœ¨' ], // all different: 3 + [ 'aπŸ‘‹', 'a🌍' ], // ASCII + emoji, first matches: 1 + [ 'aπŸ‘‹b', 'c🌍d' ], // mixed: all three differ: 3 + [ 'helloπŸ‘‹', 'hallo🌍' ] // ASCII substring differs: 2 + ]; + + expected = [ 0, 1, 2, 3, 1, 3, 2 ]; + + for ( i = 0; i < values.length; i++ ) { + t.strictEqual( hammingDistanceCodePoints( values[i][0], values[i][1] ), expected[i], 'returns expected value for pair: ' + JSON.stringify(values[i]) ); + } + t.end(); +}); + +tape( 'the function treats a surrogate pair as a single code point when comparing lengths', function test( t ) { + // 'πŸ‘‹' has UTF-16 length 2, but is 1 Unicode code point. + // 'ab' has UTF-16 length 2, and is 2 Unicode code points. + // Therefore their code point counts differ and the result should be -1. + t.strictEqual( hammingDistanceCodePoints( 'πŸ‘‹', 'ab' ), -1, 'returns expected value' ); + t.strictEqual( hammingDistanceCodePoints( 'ab', 'πŸ‘‹' ), -1, 'returns expected value' ); + + // 'aπŸ‘‹' has 2 code points; 'bc' has 2 code points β€” equal length, so compare: + t.strictEqual( hammingDistanceCodePoints( 'aπŸ‘‹', 'bc' ), 2, 'returns expected value' ); + t.end(); +});