Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1386)

Unified Diff: lib/src/comparators.dart

Issue 1412293008: Add string comparators that can compare ASCII strings ignoring case. (Closed) Base URL: https://github.com/dart-lang/collection.git@master
Patch Set: Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « lib/collection.dart ('k') | test/comparators_test.dart » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/src/comparators.dart
diff --git a/lib/src/comparators.dart b/lib/src/comparators.dart
new file mode 100644
index 0000000000000000000000000000000000000000..b94b7f9860deafcaed72b138fdc91c276b3e3831
--- /dev/null
+++ b/lib/src/comparators.dart
@@ -0,0 +1,269 @@
+// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+library dart.pkg.collection.comparators;
+
+/// Checks if strings [a] and [b] differ only on the case of ASCII letters.
+///
+/// Strings are equal if they have the same length, and the characters at
+/// each index are the same, or they are ASCII letters where one is upper-case
+/// and the other is the lower-case version of the same letter.
+///
+/// Ignoring non-ASCII letters is not generally a good idea, but it makes sense
+/// for situations where the strings are known to be ASCII. Examples could
+/// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar
+/// strings with a known structure.
kevmoo 2015/11/10 19:30:35 Describe how non-ASCII characters are handled.
+bool equalsIgnoreAsciiCase(String a, String b) {
+ if (a.length != b.length) return false;
+ for (int i = 0; i < a.length; i++) {
+ var aChar = a.codeUnitAt(i);
+ var bChar = b.codeUnitAt(i);
+ if (aChar == bChar) continue;
+ const int upperCaseA = 0x41;
+ const int upperCaseZ = 0x5d;
+ if (aChar ^ bChar != 0x20) return false;
+ if (upperCaseA <= aChar && aChar <= upperCaseZ) continue;
+ if (upperCaseA <= bChar && bChar <= upperCaseZ) continue;
+ return false;
+ }
+ return true;
+}
+
+/// Hash code for a string which is compatible with [equalsIgnoreAsciiCase].
kevmoo 2015/11/10 19:30:35 Describe how non-ASCII characters are handled.
+int hashIgnoreAsciiCase(String string) {
+ int hash = 0;
+ for (int i = 0; i < string.length; i++) {
+ int char = string.codeUnitAt(i);
+ const int lowerCaseA = 0x61;
+ const int lowerCaseZ = 0x7D;
+ if (lowerCaseA <= char && char <= lowerCaseZ) char -= 0x20;
+ hash = 0x1fffffff & (hash + char);
+ hash = 0x1fffffff & (hash + ((0x0007ffff & hash) << 10));
+ hash >>= 6;
+ }
+ hash = 0x1fffffff & (hash + ((0x03ffffff & hash) << 3));
+ hash >>= 11;
+ return 0x1fffffff & (hash + ((0x00003fff & hash) << 15));
+}
+
+/// Compares [a] and [b] lexically, converting ASCII letters to upper case.
+///
+/// Comparison treats all lower-case ASCII letters as their upper-case letter,
+/// but does no case conversion for non-ASCII letters.
+///
+/// If two strings differ only on the case of ASCII letters, the one with the
+/// capital letter at the first difference will compare as less than the other
+/// string. This tie-breaking ensures that the comparison is a total ordering
+/// on strings.
+///
+/// Ignoring non-ASCII letters is not generally a good idea, but it makes sense
+/// for situations where the strings are known to be ASCII. Examples could
+/// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar
+/// strings with a known structure.
+int compareAsciiUpperCase(String a, String b) {
+ int defaultResult = 0; // Returned if no difference found.
+ for (int i = 0; i < a.length; i++) {
+ if (i == b.length) return 1;
+ var aChar = a.codeUnitAt(i);
+ var bChar = b.codeUnitAt(i);
+ if (aChar == bChar) continue;
+ const int lowerCaseA = 0x61;
+ const int lowerCaseZ = 0x7d;
+ // Lower case if letters.
+ int aUpperCase = aChar;
+ int bUpperCase = bChar;
+ if (lowerCaseA <= aChar && aChar <= lowerCaseZ) aUpperCase -= 0x20;
+ if (lowerCaseA <= bChar && bChar <= lowerCaseZ) bUpperCase -= 0x20;
+ if (aUpperCase != bUpperCase) return (aUpperCase - bUpperCase).sign;
+ if (defaultResult == 0) defaultResult = (aChar - bChar);
+ }
+ if (b.length > a.length) return -1;
+ return defaultResult.sign;
+}
+
+/// Compares [a] and [b] lexically, converting ASCII letters to lower case.
+///
+/// Comparison treats all upper-case ASCII letters as lower-case letter,
+/// but does no case conversion for non-ASCII letters.
+///
+/// If two strings differ only on the case of ASCII letters, the one with the
+/// capital letter at the first difference will compare as less than the other
+/// string. This tie-breaking ensures that the comparison is a total ordering
+/// on strings.
+///
+/// Ignoring non-ASCII letters is not generally a good idea, but it makes sense
+/// for situations where the strings are known to be ASCII. Examples could
+/// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar
+/// strings with a known structure.
+int compareAsciiLowerCase(String a, String b) {
+ int defaultResult = 0;
+ for (int i = 0; i < a.length; i++) {
+ if (i == b.length) return 1;
+ var aChar = a.codeUnitAt(i);
+ var bChar = b.codeUnitAt(i);
+ if (aChar == bChar) continue;
+ const int upperCaseA = 0x41;
+ const int upperCaseZ = 0x5d;
+ int aLowerCase = aChar;
+ int bLowerCase = bChar;
+ // Upper case if ASCII letters.
+ if (upperCaseA <= aChar && aChar <= upperCaseZ) aLowerCase += 0x20;
+ if (upperCaseA <= bChar && bChar <= upperCaseZ) bLowerCase += 0x20;
+ if (aLowerCase != bLowerCase) return (aLowerCase - bLowerCase).sign;
+ if (defaultResult == 0) defaultResult = aChar - bChar;
+ }
+ if (b.length > a.length) return -1;
+ return defaultResult.sign;
+}
+
+/// Compares strings [a] and [b] lexically, with embedded numbers recognized.
sra1 2015/11/10 04:41:40 'with embedded sequences of digits compared as a u
Lasse Reichstein Nielsen 2015/11/10 08:21:33 What I intend is: Leixcally: Dictionary sorting (
+///
+/// If both strings contain an embedded number at the same locations, the
+/// numbers are compared lexicographically. This means that a longer number
+/// is considered greater than a shorter number, and numbers of the same length
+/// are ordered numerically (or alhabetically, it's the same thing).
+/// If numbers do not have leading zeros, lexicographical ordering is also
+/// the same as numerical ordering.
sra1 2015/11/10 04:41:40 Add examples in the doc to tease out when this is
Lasse Reichstein Nielsen 2015/11/10 08:21:33 Good idea.
+int compareWithNumbers(String a, String b) {
sra1 2015/11/10 04:41:40 'With' makes it sound like one of the arguments is
Lasse Reichstein Nielsen 2015/11/10 08:21:33 Yes, the name sucks. Suggestions will be appreciat
+ for (int i = 0; i < a.length; i++) {
+ if (i == b.length) return 1;
+ var aChar = a.codeUnitAt(i);
+ var bChar = b.codeUnitAt(i);
+ if (aChar != bChar) {
+ return _compareDigitsLexicographically(a, b, i, aChar, bChar);
+ }
+ }
+ if (b.length > a.length) return -1;
+ return 0;
+}
+
+/// Compares strings [a] and [b] lexically, with embedded numbers recognized.
+///
+/// ASCII letters are converted to lower case before being compared, like
+/// for [compareAsciiLowerCase].
+///
+/// If two strings differ only on the case of ASCII letters, the one with the
+/// capital letter at the first difference will compare as less than the other
+/// string. This tie-breaking ensures that the comparison is a total ordering
+/// on strings.
+///
+/// If both strings contain an embedded number at the same locations, the
+/// numbers are compared lexicographically. This means that a longer number
+/// is considered greater than a shorter number, and numbers of the same length
+/// are ordered numerically (or alhabetically, it's the same thing).
+/// If numbers do not have leading zeros, lexicographical ordering is also
+/// the same as numerical ordering.
+int compareAsciiLowerCaseWithNumbers(String a, String b) {
+ int defaultResult = 0; // Returned if no difference found.
+ for (int i = 0; i < a.length; i++) {
+ if (i == b.length) return 1;
+ var aChar = a.codeUnitAt(i);
+ var bChar = b.codeUnitAt(i);
+ if (aChar == bChar) continue;
+ const int upperCaseA = 0x41;
+ const int upperCaseZ = 0x5d;
+ int aLowerCase = aChar;
+ int bLowerCase = bChar;
+ if (upperCaseA <= aChar && aChar <= upperCaseZ) aLowerCase += 0x20;
+ if (upperCaseA <= bChar && bChar <= upperCaseZ) bLowerCase += 0x20;
+ if (aLowerCase != bLowerCase) {
+ return _compareDigitsLexicographically(a, b, i, aLowerCase, bLowerCase);
+ }
+ if (defaultResult == 0) defaultResult = aChar - bChar;
+ }
+ if (b.length > a.length) return -1;
+ return defaultResult.sign;
+}
+
+/// Compares strings [a] and [b] lexically, with embedded numbers recognized.
+///
+/// ASCII letters are converted to upper case before being compared, like
+/// for [compareAsciiUpperCase].
+///
+/// If two strings differ only on the case of ASCII letters, the one with the
+/// capital letter at the first difference will compare as less than the other
+/// string. This tie-breaking ensures that the comparison is a total ordering
+/// on strings.
+///
+/// If both strings contain an embedded number at the same locations, the
+/// numbers are compared lexicographically. This means that a longer number
+/// is considered greater than a shorter number, and numbers of the same length
+/// are ordered numerically (or alhabetically, it's the same thing).
+/// If numbers do not have leading zeros, lexicographical ordering is also
+/// the same as numerical ordering.
+int compareAsciiUpperCaseWithNumbers(String a, String b) {
+ int defaultResult = 0;
+ for (int i = 0; i < a.length; i++) {
+ if (i == b.length) return 1;
+ var aChar = a.codeUnitAt(i);
+ var bChar = b.codeUnitAt(i);
+ if (aChar == bChar) continue;
+ const int lowerCaseA = 0x61;
+ const int lowerCaseZ = 0x7d;
+ int aUpperCase = aChar;
+ int bUpperCase = bChar;
+ if (lowerCaseA <= aChar && aChar <= lowerCaseZ) aUpperCase -= 0x20;
+ if (lowerCaseA <= bChar && bChar <= lowerCaseZ) bUpperCase -= 0x20;
+ if (aUpperCase != bUpperCase) {
+ return _compareDigitsLexicographically(a, b, i, aUpperCase, bUpperCase);
+ }
+ if (defaultResult == 0) defaultResult = aChar - bChar;
+ }
+ if (b.length > a.length) return -1;
+ return defaultResult.sign;
+}
+
+/// Check for numbers overlapping the current mismatched characters.
+///
+/// If either [aChar] or [bChar] or both are digits we check if one is part of
+/// a longer digits sequence than the other,
+/// and let the longer sequence compare as greater.
+///
+/// Otherwise just returns the difference between [aChar] and [bChar].
+int _compareDigitsLexicographically(
+ String a, String b, int index, int aChar, int bChar) {
+ assert(aChar != bChar);
+ var aIsDigit = _isDigit(aChar);
+ var bIsDigit = _isDigit(bChar);
+ if (aIsDigit) {
+ if (bIsDigit) {
+ int lengthCompare = _compareDigitCount(a, b, index);
+ if (lengthCompare != 0) return lengthCompare;
+ } else if (index > 0 && _isDigit(a.codeUnitAt(index - 1))) {
+ // aChar is the continuation of a longer number.
+ return 1;
+ }
+ } else if (bIsDigit && index > 0 && _isDigit(b.codeUnitAt(index - 1))) {
+ // bChar is the continuation of a longer number.
+ return -1;
+ }
+ // Characters are not both part , or part
sra1 2015/11/10 04:41:40 Fix comment.
Lasse Reichstein Nielsen 2015/11/10 08:21:33 I wonder what I meant to say :)
+ return (aChar - bChar).sign;
+}
+
+/// Checks which of [a] and [b] has the longest sequence of digits.
+///
+/// Starts counting from `i + 1` (assumes that `a[i]` and `b[i]` are both
+/// already known to be digits).
+int _compareDigitCount(String a, String b, int i) {
+ while (++i < a.length) {
+ bool aIsDigit = _isDigit(a.codeUnitAt(i));
+ if (i == b.length) return aIsDigit ? 1 : 0;
+ bool bIsDigit = _isDigit(b.codeUnitAt(i));
+ if (aIsDigit) {
+ if (bIsDigit) continue;
+ return 1;
+ } else if (bIsDigit) {
+ return -1;
+ } else {
+ return 0;
+ }
+ }
+ if (i < b.length && _isDigit(b.codeUnitAt(i))) {
+ return -1;
+ }
+ return 0;
+}
+
+bool _isDigit(int charCode) => (charCode ^ 0x30) <= 9;
« no previous file with comments | « lib/collection.dart ('k') | test/comparators_test.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698