Chromium Code Reviews| Index: lib/src/comparators.dart |
| diff --git a/lib/src/comparators.dart b/lib/src/comparators.dart |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..b94b7f9860deafcaed72b138fdc91c276b3e3831 |
| --- /dev/null |
| +++ b/lib/src/comparators.dart |
| @@ -0,0 +1,269 @@ |
| +// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file |
| +// for details. All rights reserved. Use of this source code is governed by a |
| +// BSD-style license that can be found in the LICENSE file. |
| + |
| +library dart.pkg.collection.comparators; |
| + |
| +/// Checks if strings [a] and [b] differ only on the case of ASCII letters. |
| +/// |
| +/// Strings are equal if they have the same length, and the characters at |
| +/// each index are the same, or they are ASCII letters where one is upper-case |
| +/// and the other is the lower-case version of the same letter. |
| +/// |
| +/// Ignoring non-ASCII letters is not generally a good idea, but it makes sense |
| +/// for situations where the strings are known to be ASCII. Examples could |
| +/// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar |
| +/// strings with a known structure. |
|
kevmoo
2015/11/10 19:30:35
Describe how non-ASCII characters are handled.
|
| +bool equalsIgnoreAsciiCase(String a, String b) { |
| + if (a.length != b.length) return false; |
| + for (int i = 0; i < a.length; i++) { |
| + var aChar = a.codeUnitAt(i); |
| + var bChar = b.codeUnitAt(i); |
| + if (aChar == bChar) continue; |
| + const int upperCaseA = 0x41; |
| + const int upperCaseZ = 0x5d; |
| + if (aChar ^ bChar != 0x20) return false; |
| + if (upperCaseA <= aChar && aChar <= upperCaseZ) continue; |
| + if (upperCaseA <= bChar && bChar <= upperCaseZ) continue; |
| + return false; |
| + } |
| + return true; |
| +} |
| + |
| +/// Hash code for a string which is compatible with [equalsIgnoreAsciiCase]. |
|
kevmoo
2015/11/10 19:30:35
Describe how non-ASCII characters are handled.
|
| +int hashIgnoreAsciiCase(String string) { |
| + int hash = 0; |
| + for (int i = 0; i < string.length; i++) { |
| + int char = string.codeUnitAt(i); |
| + const int lowerCaseA = 0x61; |
| + const int lowerCaseZ = 0x7D; |
| + if (lowerCaseA <= char && char <= lowerCaseZ) char -= 0x20; |
| + hash = 0x1fffffff & (hash + char); |
| + hash = 0x1fffffff & (hash + ((0x0007ffff & hash) << 10)); |
| + hash >>= 6; |
| + } |
| + hash = 0x1fffffff & (hash + ((0x03ffffff & hash) << 3)); |
| + hash >>= 11; |
| + return 0x1fffffff & (hash + ((0x00003fff & hash) << 15)); |
| +} |
| + |
| +/// Compares [a] and [b] lexically, converting ASCII letters to upper case. |
| +/// |
| +/// Comparison treats all lower-case ASCII letters as their upper-case letter, |
| +/// but does no case conversion for non-ASCII letters. |
| +/// |
| +/// If two strings differ only on the case of ASCII letters, the one with the |
| +/// capital letter at the first difference will compare as less than the other |
| +/// string. This tie-breaking ensures that the comparison is a total ordering |
| +/// on strings. |
| +/// |
| +/// Ignoring non-ASCII letters is not generally a good idea, but it makes sense |
| +/// for situations where the strings are known to be ASCII. Examples could |
| +/// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar |
| +/// strings with a known structure. |
| +int compareAsciiUpperCase(String a, String b) { |
| + int defaultResult = 0; // Returned if no difference found. |
| + for (int i = 0; i < a.length; i++) { |
| + if (i == b.length) return 1; |
| + var aChar = a.codeUnitAt(i); |
| + var bChar = b.codeUnitAt(i); |
| + if (aChar == bChar) continue; |
| + const int lowerCaseA = 0x61; |
| + const int lowerCaseZ = 0x7d; |
| + // Lower case if letters. |
| + int aUpperCase = aChar; |
| + int bUpperCase = bChar; |
| + if (lowerCaseA <= aChar && aChar <= lowerCaseZ) aUpperCase -= 0x20; |
| + if (lowerCaseA <= bChar && bChar <= lowerCaseZ) bUpperCase -= 0x20; |
| + if (aUpperCase != bUpperCase) return (aUpperCase - bUpperCase).sign; |
| + if (defaultResult == 0) defaultResult = (aChar - bChar); |
| + } |
| + if (b.length > a.length) return -1; |
| + return defaultResult.sign; |
| +} |
| + |
| +/// Compares [a] and [b] lexically, converting ASCII letters to lower case. |
| +/// |
| +/// Comparison treats all upper-case ASCII letters as lower-case letter, |
| +/// but does no case conversion for non-ASCII letters. |
| +/// |
| +/// If two strings differ only on the case of ASCII letters, the one with the |
| +/// capital letter at the first difference will compare as less than the other |
| +/// string. This tie-breaking ensures that the comparison is a total ordering |
| +/// on strings. |
| +/// |
| +/// Ignoring non-ASCII letters is not generally a good idea, but it makes sense |
| +/// for situations where the strings are known to be ASCII. Examples could |
| +/// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar |
| +/// strings with a known structure. |
| +int compareAsciiLowerCase(String a, String b) { |
| + int defaultResult = 0; |
| + for (int i = 0; i < a.length; i++) { |
| + if (i == b.length) return 1; |
| + var aChar = a.codeUnitAt(i); |
| + var bChar = b.codeUnitAt(i); |
| + if (aChar == bChar) continue; |
| + const int upperCaseA = 0x41; |
| + const int upperCaseZ = 0x5d; |
| + int aLowerCase = aChar; |
| + int bLowerCase = bChar; |
| + // Upper case if ASCII letters. |
| + if (upperCaseA <= aChar && aChar <= upperCaseZ) aLowerCase += 0x20; |
| + if (upperCaseA <= bChar && bChar <= upperCaseZ) bLowerCase += 0x20; |
| + if (aLowerCase != bLowerCase) return (aLowerCase - bLowerCase).sign; |
| + if (defaultResult == 0) defaultResult = aChar - bChar; |
| + } |
| + if (b.length > a.length) return -1; |
| + return defaultResult.sign; |
| +} |
| + |
| +/// Compares strings [a] and [b] lexically, with embedded numbers recognized. |
|
sra1
2015/11/10 04:41:40
'with embedded sequences of digits compared as a u
Lasse Reichstein Nielsen
2015/11/10 08:21:33
What I intend is:
Leixcally: Dictionary sorting (
|
| +/// |
| +/// If both strings contain an embedded number at the same locations, the |
| +/// numbers are compared lexicographically. This means that a longer number |
| +/// is considered greater than a shorter number, and numbers of the same length |
| +/// are ordered numerically (or alhabetically, it's the same thing). |
| +/// If numbers do not have leading zeros, lexicographical ordering is also |
| +/// the same as numerical ordering. |
|
sra1
2015/11/10 04:41:40
Add examples in the doc to tease out when this is
Lasse Reichstein Nielsen
2015/11/10 08:21:33
Good idea.
|
| +int compareWithNumbers(String a, String b) { |
|
sra1
2015/11/10 04:41:40
'With' makes it sound like one of the arguments is
Lasse Reichstein Nielsen
2015/11/10 08:21:33
Yes, the name sucks. Suggestions will be appreciat
|
| + for (int i = 0; i < a.length; i++) { |
| + if (i == b.length) return 1; |
| + var aChar = a.codeUnitAt(i); |
| + var bChar = b.codeUnitAt(i); |
| + if (aChar != bChar) { |
| + return _compareDigitsLexicographically(a, b, i, aChar, bChar); |
| + } |
| + } |
| + if (b.length > a.length) return -1; |
| + return 0; |
| +} |
| + |
| +/// Compares strings [a] and [b] lexically, with embedded numbers recognized. |
| +/// |
| +/// ASCII letters are converted to lower case before being compared, like |
| +/// for [compareAsciiLowerCase]. |
| +/// |
| +/// If two strings differ only on the case of ASCII letters, the one with the |
| +/// capital letter at the first difference will compare as less than the other |
| +/// string. This tie-breaking ensures that the comparison is a total ordering |
| +/// on strings. |
| +/// |
| +/// If both strings contain an embedded number at the same locations, the |
| +/// numbers are compared lexicographically. This means that a longer number |
| +/// is considered greater than a shorter number, and numbers of the same length |
| +/// are ordered numerically (or alhabetically, it's the same thing). |
| +/// If numbers do not have leading zeros, lexicographical ordering is also |
| +/// the same as numerical ordering. |
| +int compareAsciiLowerCaseWithNumbers(String a, String b) { |
| + int defaultResult = 0; // Returned if no difference found. |
| + for (int i = 0; i < a.length; i++) { |
| + if (i == b.length) return 1; |
| + var aChar = a.codeUnitAt(i); |
| + var bChar = b.codeUnitAt(i); |
| + if (aChar == bChar) continue; |
| + const int upperCaseA = 0x41; |
| + const int upperCaseZ = 0x5d; |
| + int aLowerCase = aChar; |
| + int bLowerCase = bChar; |
| + if (upperCaseA <= aChar && aChar <= upperCaseZ) aLowerCase += 0x20; |
| + if (upperCaseA <= bChar && bChar <= upperCaseZ) bLowerCase += 0x20; |
| + if (aLowerCase != bLowerCase) { |
| + return _compareDigitsLexicographically(a, b, i, aLowerCase, bLowerCase); |
| + } |
| + if (defaultResult == 0) defaultResult = aChar - bChar; |
| + } |
| + if (b.length > a.length) return -1; |
| + return defaultResult.sign; |
| +} |
| + |
| +/// Compares strings [a] and [b] lexically, with embedded numbers recognized. |
| +/// |
| +/// ASCII letters are converted to upper case before being compared, like |
| +/// for [compareAsciiUpperCase]. |
| +/// |
| +/// If two strings differ only on the case of ASCII letters, the one with the |
| +/// capital letter at the first difference will compare as less than the other |
| +/// string. This tie-breaking ensures that the comparison is a total ordering |
| +/// on strings. |
| +/// |
| +/// If both strings contain an embedded number at the same locations, the |
| +/// numbers are compared lexicographically. This means that a longer number |
| +/// is considered greater than a shorter number, and numbers of the same length |
| +/// are ordered numerically (or alhabetically, it's the same thing). |
| +/// If numbers do not have leading zeros, lexicographical ordering is also |
| +/// the same as numerical ordering. |
| +int compareAsciiUpperCaseWithNumbers(String a, String b) { |
| + int defaultResult = 0; |
| + for (int i = 0; i < a.length; i++) { |
| + if (i == b.length) return 1; |
| + var aChar = a.codeUnitAt(i); |
| + var bChar = b.codeUnitAt(i); |
| + if (aChar == bChar) continue; |
| + const int lowerCaseA = 0x61; |
| + const int lowerCaseZ = 0x7d; |
| + int aUpperCase = aChar; |
| + int bUpperCase = bChar; |
| + if (lowerCaseA <= aChar && aChar <= lowerCaseZ) aUpperCase -= 0x20; |
| + if (lowerCaseA <= bChar && bChar <= lowerCaseZ) bUpperCase -= 0x20; |
| + if (aUpperCase != bUpperCase) { |
| + return _compareDigitsLexicographically(a, b, i, aUpperCase, bUpperCase); |
| + } |
| + if (defaultResult == 0) defaultResult = aChar - bChar; |
| + } |
| + if (b.length > a.length) return -1; |
| + return defaultResult.sign; |
| +} |
| + |
| +/// Check for numbers overlapping the current mismatched characters. |
| +/// |
| +/// If either [aChar] or [bChar] or both are digits we check if one is part of |
| +/// a longer digits sequence than the other, |
| +/// and let the longer sequence compare as greater. |
| +/// |
| +/// Otherwise just returns the difference between [aChar] and [bChar]. |
| +int _compareDigitsLexicographically( |
| + String a, String b, int index, int aChar, int bChar) { |
| + assert(aChar != bChar); |
| + var aIsDigit = _isDigit(aChar); |
| + var bIsDigit = _isDigit(bChar); |
| + if (aIsDigit) { |
| + if (bIsDigit) { |
| + int lengthCompare = _compareDigitCount(a, b, index); |
| + if (lengthCompare != 0) return lengthCompare; |
| + } else if (index > 0 && _isDigit(a.codeUnitAt(index - 1))) { |
| + // aChar is the continuation of a longer number. |
| + return 1; |
| + } |
| + } else if (bIsDigit && index > 0 && _isDigit(b.codeUnitAt(index - 1))) { |
| + // bChar is the continuation of a longer number. |
| + return -1; |
| + } |
| + // Characters are not both part , or part |
|
sra1
2015/11/10 04:41:40
Fix comment.
Lasse Reichstein Nielsen
2015/11/10 08:21:33
I wonder what I meant to say :)
|
| + return (aChar - bChar).sign; |
| +} |
| + |
| +/// Checks which of [a] and [b] has the longest sequence of digits. |
| +/// |
| +/// Starts counting from `i + 1` (assumes that `a[i]` and `b[i]` are both |
| +/// already known to be digits). |
| +int _compareDigitCount(String a, String b, int i) { |
| + while (++i < a.length) { |
| + bool aIsDigit = _isDigit(a.codeUnitAt(i)); |
| + if (i == b.length) return aIsDigit ? 1 : 0; |
| + bool bIsDigit = _isDigit(b.codeUnitAt(i)); |
| + if (aIsDigit) { |
| + if (bIsDigit) continue; |
| + return 1; |
| + } else if (bIsDigit) { |
| + return -1; |
| + } else { |
| + return 0; |
| + } |
| + } |
| + if (i < b.length && _isDigit(b.codeUnitAt(i))) { |
| + return -1; |
| + } |
| + return 0; |
| +} |
| + |
| +bool _isDigit(int charCode) => (charCode ^ 0x30) <= 9; |