| OLD | NEW |
| 1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 library dart.pkg.collection.comparators; | |
| 6 | |
| 7 // Character constants. | 5 // Character constants. |
| 8 const int _zero = 0x30; | 6 const int _zero = 0x30; |
| 9 const int _upperCaseA = 0x41; | 7 const int _upperCaseA = 0x41; |
| 10 const int _upperCaseZ = 0x5a; | 8 const int _upperCaseZ = 0x5a; |
| 11 const int _lowerCaseA = 0x61; | 9 const int _lowerCaseA = 0x61; |
| 12 const int _lowerCaseZ = 0x7a; | 10 const int _lowerCaseZ = 0x7a; |
| 13 const int _asciiCaseBit = 0x20; | 11 const int _asciiCaseBit = 0x20; |
| 14 | 12 |
| 15 /// Checks if strings [a] and [b] differ only on the case of ASCII letters. | 13 /// Checks if strings [a] and [b] differ only on the case of ASCII letters. |
| 16 /// | 14 /// |
| 17 /// Strings are equal if they have the same length, and the characters at | 15 /// Strings are equal if they have the same length, and the characters at |
| 18 /// each index are the same, or they are ASCII letters where one is upper-case | 16 /// each index are the same, or they are ASCII letters where one is upper-case |
| 19 /// and the other is the lower-case version of the same letter. | 17 /// and the other is the lower-case version of the same letter. |
| 20 /// | 18 /// |
| 21 /// The comparison does not ignore the case of non-ASCII letters, so | 19 /// The comparison does not ignore the case of non-ASCII letters, so |
| 22 /// an upper-case ae-ligature (Æ) is different from | 20 /// an upper-case ae-ligature (Æ) is different from |
| 23 /// a lower case ae-ligature (æ). | 21 /// a lower case ae-ligature (æ). |
| 24 /// | 22 /// |
| 25 /// Ignoring non-ASCII letters is not generally a good idea, but it makes sense | 23 /// Ignoring non-ASCII letters is not generally a good idea, but it makes sense |
| 26 /// for situations where the strings are known to be ASCII. Examples could | 24 /// for situations where the strings are known to be ASCII. Examples could |
| 27 /// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar | 25 /// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar |
| 28 /// strings with a known structure. | 26 /// strings with a known structure. |
| 29 bool equalsIgnoreAsciiCase(String a, String b) { | 27 bool equalsIgnoreAsciiCase(String a, String b) { |
| 30 if (a.length != b.length) return false; | 28 if (a.length != b.length) return false; |
| 31 for (int i = 0; i < a.length; i++) { | 29 for (int i = 0; i < a.length; i++) { |
| 32 var aChar = a.codeUnitAt(i); | 30 var aChar = a.codeUnitAt(i); |
| 33 var bChar = b.codeUnitAt(i); | 31 var bChar = b.codeUnitAt(i); |
| 34 if (aChar == bChar) continue; | 32 if (aChar == bChar) continue; |
| 35 // Quick-check for whether this may be different cases of the same letter. | 33 // Quick-check for whether this may be different cases of the same letter. |
| 36 if (aChar ^ bChar != _asciiCaseBit) return false; | 34 if (aChar ^ bChar != _asciiCaseBit) return false; |
| 37 // If it's possible, then check if either character is actually an ASCII | 35 // If it's possible, then check if either character is actually an ASCII |
| 38 // letter. | 36 // letter. |
| 39 int aCharUpperCase = aChar | _asciiCaseBit; | 37 int aCharLowerCase = aChar | _asciiCaseBit; |
| 40 if (_upperCaseA <= aCharUpperCase && aCharUpperCase <= _upperCaseZ) { | 38 if (_lowerCaseA <= aCharLowerCase && aCharLowerCase <= _lowerCaseZ) { |
| 41 continue; | 39 continue; |
| 42 } | 40 } |
| 43 return false; | 41 return false; |
| 44 } | 42 } |
| 45 return true; | 43 return true; |
| 46 } | 44 } |
| 47 | 45 |
| 48 | |
| 49 /// Hash code for a string which is compatible with [equalsIgnoreAsciiCase]. | 46 /// Hash code for a string which is compatible with [equalsIgnoreAsciiCase]. |
| 50 /// | 47 /// |
| 51 /// The hash code is unaffected by changing the case of ASCII letters, but | 48 /// The hash code is unaffected by changing the case of ASCII letters, but |
| 52 /// the case of non-ASCII letters do affect the result. | 49 /// the case of non-ASCII letters do affect the result. |
| 53 int hashIgnoreAsciiCase(String string) { | 50 int hashIgnoreAsciiCase(String string) { |
| 54 // Jenkins hash code ( http://en.wikipedia.org/wiki/Jenkins_hash_function). | 51 // Jenkins hash code ( http://en.wikipedia.org/wiki/Jenkins_hash_function). |
| 55 // adapted to smi values. | 52 // adapted to smi values. |
| 56 // Same hash used by dart2js for strings, modified to ignore ASCII letter | 53 // Same hash used by dart2js for strings, modified to ignore ASCII letter |
| 57 // case. | 54 // case. |
| 58 int hash = 0; | 55 int hash = 0; |
| 59 for (int i = 0; i < string.length; i++) { | 56 for (int i = 0; i < string.length; i++) { |
| 60 int char = string.codeUnitAt(i); | 57 int char = string.codeUnitAt(i); |
| 61 // Convert lower-case ASCII letters to upper case.upper | 58 // Convert lower-case ASCII letters to upper case.upper |
| 62 // This ensures that strings that differ only in case will have the | 59 // This ensures that strings that differ only in case will have the |
| 63 // same hash code. | 60 // same hash code. |
| 64 if (_lowerCaseA <= char && char <= _lowerCaseZ) char -= _asciiCaseBit; | 61 if (_lowerCaseA <= char && char <= _lowerCaseZ) char -= _asciiCaseBit; |
| 65 hash = 0x1fffffff & (hash + char); | 62 hash = 0x1fffffff & (hash + char); |
| 66 hash = 0x1fffffff & (hash + ((0x0007ffff & hash) << 10)); | 63 hash = 0x1fffffff & (hash + ((0x0007ffff & hash) << 10)); |
| 67 hash >>= 6; | 64 hash >>= 6; |
| 68 } | 65 } |
| 69 hash = 0x1fffffff & (hash + ((0x03ffffff & hash) << 3)); | 66 hash = 0x1fffffff & (hash + ((0x03ffffff & hash) << 3)); |
| 70 hash >>= 11; | 67 hash >>= 11; |
| 71 return 0x1fffffff & (hash + ((0x00003fff & hash) << 15)); | 68 return 0x1fffffff & (hash + ((0x00003fff & hash) << 15)); |
| 72 } | 69 } |
| 73 | 70 |
| 74 | |
| 75 /// Compares [a] and [b] lexically, converting ASCII letters to upper case. | 71 /// Compares [a] and [b] lexically, converting ASCII letters to upper case. |
| 76 /// | 72 /// |
| 77 /// Comparison treats all lower-case ASCII letters as upper-case letters, | 73 /// Comparison treats all lower-case ASCII letters as upper-case letters, |
| 78 /// but does no case conversion for non-ASCII letters. | 74 /// but does no case conversion for non-ASCII letters. |
| 79 /// | 75 /// |
| 80 /// If two strings differ only on the case of ASCII letters, the one with the | 76 /// If two strings differ only on the case of ASCII letters, the one with the |
| 81 /// capital letter at the first difference will compare as less than the other | 77 /// capital letter at the first difference will compare as less than the other |
| 82 /// string. This tie-breaking ensures that the comparison is a total ordering | 78 /// string. This tie-breaking ensures that the comparison is a total ordering |
| 83 /// on strings and is compatible with equality. | 79 /// on strings and is compatible with equality. |
| 84 /// | 80 /// |
| (...skipping 17 matching lines...) Expand all Loading... |
| 102 if (_lowerCaseA <= bChar && bChar <= _lowerCaseZ) { | 98 if (_lowerCaseA <= bChar && bChar <= _lowerCaseZ) { |
| 103 bUpperCase -= _asciiCaseBit; | 99 bUpperCase -= _asciiCaseBit; |
| 104 } | 100 } |
| 105 if (aUpperCase != bUpperCase) return (aUpperCase - bUpperCase).sign; | 101 if (aUpperCase != bUpperCase) return (aUpperCase - bUpperCase).sign; |
| 106 if (defaultResult == 0) defaultResult = (aChar - bChar); | 102 if (defaultResult == 0) defaultResult = (aChar - bChar); |
| 107 } | 103 } |
| 108 if (b.length > a.length) return -1; | 104 if (b.length > a.length) return -1; |
| 109 return defaultResult.sign; | 105 return defaultResult.sign; |
| 110 } | 106 } |
| 111 | 107 |
| 112 | |
| 113 /// Compares [a] and [b] lexically, converting ASCII letters to lower case. | 108 /// Compares [a] and [b] lexically, converting ASCII letters to lower case. |
| 114 /// | 109 /// |
| 115 /// Comparison treats all upper-case ASCII letters as lower-case letters, | 110 /// Comparison treats all upper-case ASCII letters as lower-case letters, |
| 116 /// but does no case conversion for non-ASCII letters. | 111 /// but does no case conversion for non-ASCII letters. |
| 117 /// | 112 /// |
| 118 /// If two strings differ only on the case of ASCII letters, the one with the | 113 /// If two strings differ only on the case of ASCII letters, the one with the |
| 119 /// capital letter at the first difference will compare as less than the other | 114 /// capital letter at the first difference will compare as less than the other |
| 120 /// string. This tie-breaking ensures that the comparison is a total ordering | 115 /// string. This tie-breaking ensures that the comparison is a total ordering |
| 121 /// on strings. | 116 /// on strings. |
| 122 /// | 117 /// |
| (...skipping 17 matching lines...) Expand all Loading... |
| 140 if (_upperCaseA <= aChar && aChar <= _upperCaseZ) { | 135 if (_upperCaseA <= aChar && aChar <= _upperCaseZ) { |
| 141 aLowerCase += _asciiCaseBit; | 136 aLowerCase += _asciiCaseBit; |
| 142 } | 137 } |
| 143 if (aLowerCase != bLowerCase) return (aLowerCase - bLowerCase).sign; | 138 if (aLowerCase != bLowerCase) return (aLowerCase - bLowerCase).sign; |
| 144 if (defaultResult == 0) defaultResult = aChar - bChar; | 139 if (defaultResult == 0) defaultResult = aChar - bChar; |
| 145 } | 140 } |
| 146 if (b.length > a.length) return -1; | 141 if (b.length > a.length) return -1; |
| 147 return defaultResult.sign; | 142 return defaultResult.sign; |
| 148 } | 143 } |
| 149 | 144 |
| 150 /// Compares strings [a] and [b] according to [natural sort ordering]. | 145 /// Compares strings [a] and [b] according to [natural sort ordering][]. |
| 151 /// | 146 /// |
| 152 /// A natural sort ordering is a lexical ordering where embedded | 147 /// A natural sort ordering is a lexical ordering where embedded |
| 153 /// numerals (digit sequences) are treated as a single unit and ordered by | 148 /// numerals (digit sequences) are treated as a single unit and ordered by |
| 154 /// numerical value. | 149 /// numerical value. |
| 155 /// This means that `"a10b"` will be ordered after `"a7b"` in natural | 150 /// This means that `"a10b"` will be ordered after `"a7b"` in natural |
| 156 /// ordering, where lexical ordering would put the `1` before the `7`, ignoring | 151 /// ordering, where lexical ordering would put the `1` before the `7`, ignoring |
| 157 /// that the `1` is part of a larger number. | 152 /// that the `1` is part of a larger number. |
| 158 /// | 153 /// |
| 159 /// Example: | 154 /// Example: |
| 160 /// The following strings are in the order they would be sorted by using this | 155 /// The following strings are in the order they would be sorted by using this |
| 161 /// comparison function: | 156 /// comparison function: |
| 162 /// | 157 /// |
| 163 /// "a", "a0", "a0b", "a1", "a01", "a9", "a10", "a100", "a100b", "aa" | 158 /// "a", "a0", "a0b", "a1", "a01", "a9", "a10", "a100", "a100b", "aa" |
| 164 /// | 159 /// |
| 165 /// [natural sort ordering]: https://en.wikipedia.org/wiki/Natural_sort_order | 160 /// [natural sort ordering]: https://en.wikipedia.org/wiki/Natural_sort_order |
| 166 int compareNatural(String a, String b) { | 161 int compareNatural(String a, String b) { |
| 167 for (int i = 0; i < a.length; i++) { | 162 for (int i = 0; i < a.length; i++) { |
| 168 if (i >= b.length) return 1; | 163 if (i >= b.length) return 1; |
| 169 var aChar = a.codeUnitAt(i); | 164 var aChar = a.codeUnitAt(i); |
| 170 var bChar = b.codeUnitAt(i); | 165 var bChar = b.codeUnitAt(i); |
| 171 if (aChar != bChar) { | 166 if (aChar != bChar) { |
| 172 return _compareNaturally(a, b, i, aChar, bChar); | 167 return _compareNaturally(a, b, i, aChar, bChar); |
| 173 } | 168 } |
| 174 } | 169 } |
| 175 if (b.length > a.length) return -1; | 170 if (b.length > a.length) return -1; |
| 176 return 0; | 171 return 0; |
| 177 } | 172 } |
| 178 | 173 |
| 179 /// Compares strings [a] and [b] according to lower-case | 174 /// Compares strings [a] and [b] according to lower-case |
| 180 /// [natural sort ordering]. | 175 /// [natural sort ordering][]. |
| 181 /// | 176 /// |
| 182 /// ASCII letters are converted to lower case before being compared, like | 177 /// ASCII letters are converted to lower case before being compared, like |
| 183 /// for [compareAsciiLowerCase], then the result is compared like for | 178 /// for [compareAsciiLowerCase], then the result is compared like for |
| 184 /// [compareNatural]. | 179 /// [compareNatural]. |
| 185 /// | 180 /// |
| 186 /// If two strings differ only on the case of ASCII letters, the one with the | 181 /// If two strings differ only on the case of ASCII letters, the one with the |
| 187 /// capital letter at the first difference will compare as less than the other | 182 /// capital letter at the first difference will compare as less than the other |
| 188 /// string. This tie-breaking ensures that the comparison is a total ordering | 183 /// string. This tie-breaking ensures that the comparison is a total ordering |
| 189 /// on strings. | 184 /// on strings. |
| 190 /// | 185 /// |
| (...skipping 16 matching lines...) Expand all Loading... |
| 207 if (aLowerCase != bLowerCase) { | 202 if (aLowerCase != bLowerCase) { |
| 208 return _compareNaturally(a, b, i, aLowerCase, bLowerCase); | 203 return _compareNaturally(a, b, i, aLowerCase, bLowerCase); |
| 209 } | 204 } |
| 210 if (defaultResult == 0) defaultResult = aChar - bChar; | 205 if (defaultResult == 0) defaultResult = aChar - bChar; |
| 211 } | 206 } |
| 212 if (b.length > a.length) return -1; | 207 if (b.length > a.length) return -1; |
| 213 return defaultResult.sign; | 208 return defaultResult.sign; |
| 214 } | 209 } |
| 215 | 210 |
| 216 /// Compares strings [a] and [b] according to upper-case | 211 /// Compares strings [a] and [b] according to upper-case |
| 217 /// [natural sort ordering]. | 212 /// [natural sort ordering][]. |
| 218 /// | 213 /// |
| 219 /// ASCII letters are converted to upper case before being compared, like | 214 /// ASCII letters are converted to upper case before being compared, like |
| 220 /// for [compareAsciiUpperCase], then the result is compared like for | 215 /// for [compareAsciiUpperCase], then the result is compared like for |
| 221 /// [compareNatural]. | 216 /// [compareNatural]. |
| 222 /// | 217 /// |
| 223 /// If two strings differ only on the case of ASCII letters, the one with the | 218 /// If two strings differ only on the case of ASCII letters, the one with the |
| 224 /// capital letter at the first difference will compare as less than the other | 219 /// capital letter at the first difference will compare as less than the other |
| 225 /// string. This tie-breaking ensures that the comparison is a total ordering | 220 /// string. This tie-breaking ensures that the comparison is a total ordering |
| 226 /// on strings | 221 /// on strings |
| 227 /// | 222 /// |
| (...skipping 25 matching lines...) Expand all Loading... |
| 253 /// Check for numbers overlapping the current mismatched characters. | 248 /// Check for numbers overlapping the current mismatched characters. |
| 254 /// | 249 /// |
| 255 /// If both [aChar] and [bChar] are digits, use numerical comparison. | 250 /// If both [aChar] and [bChar] are digits, use numerical comparison. |
| 256 /// Check if the previous characters is a non-zero number, and if not, | 251 /// Check if the previous characters is a non-zero number, and if not, |
| 257 /// skip - but count - leading zeros before comparing numbers. | 252 /// skip - but count - leading zeros before comparing numbers. |
| 258 /// | 253 /// |
| 259 /// If one is a digit and the other isn't, check if the previous character | 254 /// If one is a digit and the other isn't, check if the previous character |
| 260 /// is a digit, and if so, the the one with the digit is the greater number. | 255 /// is a digit, and if so, the the one with the digit is the greater number. |
| 261 /// | 256 /// |
| 262 /// Otherwise just returns the difference between [aChar] and [bChar]. | 257 /// Otherwise just returns the difference between [aChar] and [bChar]. |
| 263 int _compareNaturally( | 258 int _compareNaturally(String a, String b, int index, int aChar, int bChar) { |
| 264 String a, String b, int index, int aChar, int bChar) { | |
| 265 assert(aChar != bChar); | 259 assert(aChar != bChar); |
| 266 var aIsDigit = _isDigit(aChar); | 260 var aIsDigit = _isDigit(aChar); |
| 267 var bIsDigit = _isDigit(bChar); | 261 var bIsDigit = _isDigit(bChar); |
| 268 if (aIsDigit) { | 262 if (aIsDigit) { |
| 269 if (bIsDigit) { | 263 if (bIsDigit) { |
| 270 return _compareNumerically(a, b, aChar, bChar, index); | 264 return _compareNumerically(a, b, aChar, bChar, index); |
| 271 } else if (index > 0 && _isDigit(a.codeUnitAt(index - 1))) { | 265 } else if (index > 0 && _isDigit(a.codeUnitAt(index - 1))) { |
| 272 // aChar is the continuation of a longer number. | 266 // aChar is the continuation of a longer number. |
| 273 return 1; | 267 return 1; |
| 274 } | 268 } |
| (...skipping 22 matching lines...) Expand all Loading... |
| 297 // digit. | 291 // digit. |
| 298 return (aChar - bChar).sign; | 292 return (aChar - bChar).sign; |
| 299 } | 293 } |
| 300 // Not part of larger (non-zero) number, so skip leading zeros before | 294 // Not part of larger (non-zero) number, so skip leading zeros before |
| 301 // comparing numbers. | 295 // comparing numbers. |
| 302 int aIndex = index; | 296 int aIndex = index; |
| 303 int bIndex = index; | 297 int bIndex = index; |
| 304 if (aChar == _zero) { | 298 if (aChar == _zero) { |
| 305 do { | 299 do { |
| 306 aIndex++; | 300 aIndex++; |
| 307 if (aIndex == a.length) return -1; // number in a is zero, b is not. | 301 if (aIndex == a.length) return -1; // number in a is zero, b is not. |
| 308 aChar = a.codeUnitAt(aIndex); | 302 aChar = a.codeUnitAt(aIndex); |
| 309 } while (aChar == _zero); | 303 } while (aChar == _zero); |
| 310 if (!_isDigit(aChar)) return -1; | 304 if (!_isDigit(aChar)) return -1; |
| 311 } else if (bChar == _zero) { | 305 } else if (bChar == _zero) { |
| 312 do { | 306 do { |
| 313 bIndex++; | 307 bIndex++; |
| 314 if (bIndex == b.length) return 1; // number in b is zero, a is not. | 308 if (bIndex == b.length) return 1; // number in b is zero, a is not. |
| 315 bChar = b.codeUnitAt(bIndex); | 309 bChar = b.codeUnitAt(bIndex); |
| 316 } while (bChar == _zero); | 310 } while (bChar == _zero); |
| 317 if (!_isDigit(bChar)) return 1; | 311 if (!_isDigit(bChar)) return 1; |
| 318 } | 312 } |
| 319 if (aChar != bChar) { | 313 if (aChar != bChar) { |
| 320 int result = _compareDigitCount(a, b, aIndex, bIndex); | 314 int result = _compareDigitCount(a, b, aIndex, bIndex); |
| 321 if (result != 0) return result; | 315 if (result != 0) return result; |
| 322 return (aChar - bChar).sign; | 316 return (aChar - bChar).sign; |
| 323 } | 317 } |
| 324 // Same leading digit, one had more leading zeros. | 318 // Same leading digit, one had more leading zeros. |
| (...skipping 13 matching lines...) Expand all Loading... |
| 338 } | 332 } |
| 339 if (aIsDigit) { | 333 if (aIsDigit) { |
| 340 if (bIsDigit) { | 334 if (bIsDigit) { |
| 341 if (aChar == bChar) continue; | 335 if (aChar == bChar) continue; |
| 342 // First different digit found. | 336 // First different digit found. |
| 343 break; | 337 break; |
| 344 } | 338 } |
| 345 // bChar is non-digit, so a has longer number. | 339 // bChar is non-digit, so a has longer number. |
| 346 return 1; | 340 return 1; |
| 347 } else if (bIsDigit) { | 341 } else if (bIsDigit) { |
| 348 return -1; // b has longer number. | 342 return -1; // b has longer number. |
| 349 } else { | 343 } else { |
| 350 // Neither is digit, so numbers had same numerical value. | 344 // Neither is digit, so numbers had same numerical value. |
| 351 // Fall back on number of leading zeros | 345 // Fall back on number of leading zeros |
| 352 // (reflected by difference in indices). | 346 // (reflected by difference in indices). |
| 353 return (aIndex - bIndex).sign; | 347 return (aIndex - bIndex).sign; |
| 354 } | 348 } |
| 355 } | 349 } |
| 356 // At first differing digits. | 350 // At first differing digits. |
| 357 int result = _compareDigitCount(a, b, aIndex, bIndex); | 351 int result = _compareDigitCount(a, b, aIndex, bIndex); |
| 358 if (result != 0) return result; | 352 if (result != 0) return result; |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 390 /// If there is no non-zero digits before, then leading zeros at [index] | 384 /// If there is no non-zero digits before, then leading zeros at [index] |
| 391 /// are also ignored when comparing numerically. If there is a non-zero digit | 385 /// are also ignored when comparing numerically. If there is a non-zero digit |
| 392 /// before, then zeros at [index] are significant. | 386 /// before, then zeros at [index] are significant. |
| 393 bool _isNonZeroNumberSuffix(String string, int index) { | 387 bool _isNonZeroNumberSuffix(String string, int index) { |
| 394 while (--index >= 0) { | 388 while (--index >= 0) { |
| 395 int char = string.codeUnitAt(index); | 389 int char = string.codeUnitAt(index); |
| 396 if (char != _zero) return _isDigit(char); | 390 if (char != _zero) return _isDigit(char); |
| 397 } | 391 } |
| 398 return false; | 392 return false; |
| 399 } | 393 } |
| OLD | NEW |