Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file | |
| 2 // for details. All rights reserved. Use of this source code is governed by a | |
| 3 // BSD-style license that can be found in the LICENSE file. | |
| 4 | |
| 5 library dart.pkg.collection.comparators; | |
| 6 | |
| 7 // Character constants. | |
| 8 const int _zero = 0x30; | |
| 9 const int _upperCaseA = 0x41; | |
| 10 const int _upperCaseZ = 0x5d; | |
|
Lasse Reichstein Nielsen
2015/11/19 07:15:37
Whoops, that should be 0x5a, and 0x7a below. Will
| |
| 11 const int _lowerCaseA = 0x61; | |
| 12 const int _lowerCaseZ = 0x7D; | |
| 13 const int _asciiCaseBit = 0x20; | |
| 14 | |
| 15 /// Checks if strings [a] and [b] differ only on the case of ASCII letters. | |
| 16 /// | |
| 17 /// Strings are equal if they have the same length, and the characters at | |
| 18 /// each index are the same, or they are ASCII letters where one is upper-case | |
| 19 /// and the other is the lower-case version of the same letter. | |
| 20 /// | |
| 21 /// The comparison does not ignore the case of non-ASCII letters, so | |
| 22 /// a capital ae-ligature (Æ) is different from a lower case ae-ligature (æ). | |
| 23 /// | |
| 24 /// Ignoring non-ASCII letters is not generally a good idea, but it makes sense | |
| 25 /// for situations where the strings are known to be ASCII. Examples could | |
| 26 /// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar | |
| 27 /// strings with a known structure. | |
| 28 bool equalsIgnoreAsciiCase(String a, String b) { | |
| 29 if (a.length != b.length) return false; | |
| 30 for (int i = 0; i < a.length; i++) { | |
| 31 var aChar = a.codeUnitAt(i); | |
| 32 var bChar = b.codeUnitAt(i); | |
| 33 if (aChar == bChar) continue; | |
| 34 // Quick-check for whether this may be different cases of the same letter. | |
| 35 if (aChar ^ bChar != _asciiCaseBit) return false; | |
| 36 // If it's possible, then check if either character is actually an ASCII | |
| 37 // letter. | |
| 38 int aCharUpperCase = aChar | _asciiCaseBit; | |
| 39 if (_upperCaseA <= aCharUpperCase && aCharUpperCase <= _upperCaseZ) { | |
| 40 continue; | |
| 41 } | |
| 42 return false; | |
| 43 } | |
| 44 return true; | |
| 45 } | |
| 46 | |
| 47 | |
| 48 /// Hash code for a string which is compatible with [equalsIgnoreAsciiCase]. | |
| 49 /// | |
| 50 /// The hash code is unaffected by changing the case of ASCII letters, but | |
| 51 /// the case of non-ASCII letters do affect the result. | |
| 52 int hashIgnoreAsciiCase(String string) { | |
| 53 // Jenkins hash code ( http://en.wikipedia.org/wiki/Jenkins_hash_function). | |
|
sra1
2015/11/18 23:40:16
Jenkins hash code modified to operate on Smi value
Lasse Reichstein Nielsen
2015/11/19 07:15:37
Done.
| |
| 54 // Same hash used by dart2js for strings, modified to ignore ASCII letter | |
| 55 // case. | |
| 56 int hash = 0; | |
| 57 for (int i = 0; i < string.length; i++) { | |
| 58 int char = string.codeUnitAt(i); | |
| 59 // Convert lower-case ASCII letters to upper case.upper | |
| 60 // This ensures that strings that differ only in case will have the | |
| 61 // same hash code. | |
| 62 if (_lowerCaseA <= char && char <= _lowerCaseZ) char -= _asciiCaseBit; | |
| 63 hash = 0x1fffffff & (hash + char); | |
| 64 hash = 0x1fffffff & (hash + ((0x0007ffff & hash) << 10)); | |
| 65 hash >>= 6; | |
| 66 } | |
| 67 hash = 0x1fffffff & (hash + ((0x03ffffff & hash) << 3)); | |
| 68 hash >>= 11; | |
| 69 return 0x1fffffff & (hash + ((0x00003fff & hash) << 15)); | |
| 70 } | |
| 71 | |
| 72 | |
| 73 /// Compares [a] and [b] lexically, converting ASCII letters to upper case. | |
| 74 /// | |
| 75 /// Comparison treats all lower-case ASCII letters as upper-case letters, | |
| 76 /// but does no case conversion for non-ASCII letters. | |
| 77 /// | |
| 78 /// If two strings differ only on the case of ASCII letters, the one with the | |
| 79 /// capital letter at the first difference will compare as less than the other | |
| 80 /// string. This tie-breaking ensures that the comparison is a total ordering | |
| 81 /// on strings and is compatible with equality. | |
| 82 /// | |
| 83 /// Ignoring non-ASCII letters is not generally a good idea, but it makes sense | |
| 84 /// for situations where the strings are known to be ASCII. Examples could | |
| 85 /// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar | |
| 86 /// strings with a known structure. | |
| 87 int compareAsciiUpperCase(String a, String b) { | |
| 88 int defaultResult = 0; // Returned if no difference found. | |
| 89 for (int i = 0; i < a.length; i++) { | |
| 90 if (i == b.length) return 1; | |
| 91 var aChar = a.codeUnitAt(i); | |
| 92 var bChar = b.codeUnitAt(i); | |
| 93 if (aChar == bChar) continue; | |
| 94 // Upper-case if letters. | |
| 95 int aUpperCase = aChar; | |
| 96 int bUpperCase = bChar; | |
| 97 if (_lowerCaseA <= aChar && aChar <= _lowerCaseZ) { | |
| 98 aUpperCase -= _asciiCaseBit; | |
| 99 } | |
| 100 if (_lowerCaseA <= bChar && bChar <= _lowerCaseZ) { | |
| 101 bUpperCase -= _asciiCaseBit; | |
| 102 } | |
| 103 if (aUpperCase != bUpperCase) return (aUpperCase - bUpperCase).sign; | |
| 104 if (defaultResult == 0) defaultResult = (aChar - bChar); | |
| 105 } | |
| 106 if (b.length > a.length) return -1; | |
| 107 return defaultResult.sign; | |
| 108 } | |
| 109 | |
| 110 | |
| 111 /// Compares [a] and [b] lexically, converting ASCII letters to lower case. | |
| 112 /// | |
| 113 /// Comparison treats all upper-case ASCII letters as lower-case letters, | |
| 114 /// but does no case conversion for non-ASCII letters. | |
| 115 /// | |
| 116 /// If two strings differ only on the case of ASCII letters, the one with the | |
| 117 /// capital letter at the first difference will compare as less than the other | |
| 118 /// string. This tie-breaking ensures that the comparison is a total ordering | |
| 119 /// on strings. | |
| 120 /// | |
| 121 /// Ignoring non-ASCII letters is not generally a good idea, but it makes sense | |
| 122 /// for situations where the strings are known to be ASCII. Examples could | |
| 123 /// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar | |
| 124 /// strings with a known structure. | |
| 125 int compareAsciiLowerCase(String a, String b) { | |
|
floitsch
2015/11/18 22:50:11
How is this one different from compareAsciiUpperCa
Lasse Reichstein Nielsen
2015/11/19 07:15:37
Only in how it orders, e.g., "a" vs. "_". Since "
| |
| 126 int defaultResult = 0; | |
| 127 for (int i = 0; i < a.length; i++) { | |
| 128 if (i == b.length) return 1; | |
| 129 var aChar = a.codeUnitAt(i); | |
| 130 var bChar = b.codeUnitAt(i); | |
| 131 if (aChar == bChar) continue; | |
| 132 int aLowerCase = aChar; | |
| 133 int bLowerCase = bChar; | |
| 134 // Upper case if ASCII letters. | |
| 135 if (_upperCaseA <= bChar && bChar <= _upperCaseZ) { | |
| 136 bLowerCase += _asciiCaseBit; | |
| 137 } | |
| 138 if (_upperCaseA <= aChar && aChar <= _upperCaseZ) { | |
| 139 aLowerCase += _asciiCaseBit; | |
| 140 } | |
| 141 if (aLowerCase != bLowerCase) return (aLowerCase - bLowerCase).sign; | |
| 142 if (defaultResult == 0) defaultResult = aChar - bChar; | |
| 143 } | |
| 144 if (b.length > a.length) return -1; | |
| 145 return defaultResult.sign; | |
| 146 } | |
| 147 | |
| 148 /// Compares strings [a] and [b] according to [natural sort ordering]. | |
| 149 /// | |
| 150 /// A natural sort ordering is a lexical ordering where embedded | |
| 151 /// numerals (digit sequeunces) are treated as a single unit and ordered by | |
| 152 /// numerical value. | |
| 153 /// This means that `"a10b"` will be ordered after `"a7b"` in natural | |
| 154 /// ordering, where leixcal ordering would put the `1` before the `7`, ignoring | |
|
floitsch
2015/11/18 22:50:11
lexical
| |
| 155 /// that the `1` is part of a larger number. | |
| 156 /// | |
| 157 /// Example: | |
| 158 /// The following strings are in the order they would be sorted by using this | |
| 159 /// comparison function: | |
| 160 /// | |
| 161 /// "a", "a0", "a0b", "a1", "a01", "a9", "a10", "a100", "a100b", "aa" | |
| 162 /// | |
| 163 /// [natural sort ordering]: https://en.wikipedia.org/wiki/Natural_sort_order | |
| 164 int compareNatural(String a, String b) { | |
| 165 for (int i = 0; i < a.length; i++) { | |
| 166 if (i == b.length) return 1; | |
|
sra1
2015/11/18 23:40:16
Use
if (i >= b.length)
to improve constraint-b
Lasse Reichstein Nielsen
2015/11/19 07:15:37
Done.
| |
| 167 var aChar = a.codeUnitAt(i); | |
| 168 var bChar = b.codeUnitAt(i); | |
| 169 if (aChar != bChar) { | |
| 170 return _compareNaturally(a, b, i, aChar, bChar); | |
| 171 } | |
| 172 } | |
| 173 if (b.length > a.length) return -1; | |
| 174 return 0; | |
| 175 } | |
| 176 | |
| 177 /// Compares strings [a] and [b] according to lower-case | |
| 178 /// [natural sort ordering]. | |
| 179 /// | |
| 180 /// ASCII letters are converted to lower case before being compared, like | |
| 181 /// for [compareAsciiLowerCase], then the result is compared like for | |
| 182 /// [compareNatural]. | |
| 183 /// | |
| 184 /// If two strings differ only on the case of ASCII letters, the one with the | |
| 185 /// capital letter at the first difference will compare as less than the other | |
| 186 /// string. This tie-breaking ensures that the comparison is a total ordering | |
| 187 /// on strings. | |
| 188 /// | |
| 189 /// [natural sort ordering]: https://en.wikipedia.org/wiki/Natural_sort_order | |
| 190 int compareAsciiLowerCaseNatural(String a, String b) { | |
| 191 int defaultResult = 0; // Returned if no difference found. | |
| 192 for (int i = 0; i < a.length; i++) { | |
| 193 if (i == b.length) return 1; | |
| 194 var aChar = a.codeUnitAt(i); | |
| 195 var bChar = b.codeUnitAt(i); | |
| 196 if (aChar == bChar) continue; | |
| 197 int aLowerCase = aChar; | |
| 198 int bLowerCase = bChar; | |
| 199 if (_upperCaseA <= aChar && aChar <= _upperCaseZ) { | |
| 200 aLowerCase += _asciiCaseBit; | |
| 201 } | |
| 202 if (_upperCaseA <= bChar && bChar <= _upperCaseZ) { | |
| 203 bLowerCase += _asciiCaseBit; | |
| 204 } | |
| 205 if (aLowerCase != bLowerCase) { | |
| 206 return _compareNaturally(a, b, i, aLowerCase, bLowerCase); | |
| 207 } | |
| 208 if (defaultResult == 0) defaultResult = aChar - bChar; | |
| 209 } | |
| 210 if (b.length > a.length) return -1; | |
| 211 return defaultResult.sign; | |
| 212 } | |
| 213 | |
| 214 /// Compares strings [a] and [b] according to upper-case | |
| 215 /// [natural sort ordering]. | |
| 216 /// | |
| 217 /// ASCII letters are converted to upper case before being compared, like | |
| 218 /// for [compareAsciiUpperCase], then the result is compared like for | |
| 219 /// [compareNatural]. | |
| 220 /// | |
| 221 /// If two strings differ only on the case of ASCII letters, the one with the | |
| 222 /// capital letter at the first difference will compare as less than the other | |
| 223 /// string. This tie-breaking ensures that the comparison is a total ordering | |
| 224 /// on strings | |
| 225 /// | |
| 226 /// [natural sort ordering]: https://en.wikipedia.org/wiki/Natural_sort_order | |
| 227 int compareAsciiUpperCaseNatural(String a, String b) { | |
|
floitsch
2015/11/18 22:50:11
How is this one different from compareAsciiLowerCa
Lasse Reichstein Nielsen
2015/11/19 07:15:37
Same as above, differs on six characters that lie
| |
| 228 int defaultResult = 0; | |
| 229 for (int i = 0; i < a.length; i++) { | |
| 230 if (i == b.length) return 1; | |
| 231 var aChar = a.codeUnitAt(i); | |
| 232 var bChar = b.codeUnitAt(i); | |
| 233 if (aChar == bChar) continue; | |
| 234 int aUpperCase = aChar; | |
| 235 int bUpperCase = bChar; | |
| 236 if (_lowerCaseA <= aChar && aChar <= _lowerCaseZ) { | |
| 237 aUpperCase -= _asciiCaseBit; | |
| 238 } | |
| 239 if (_lowerCaseA <= bChar && bChar <= _lowerCaseZ) { | |
| 240 bUpperCase -= _asciiCaseBit; | |
| 241 } | |
| 242 if (aUpperCase != bUpperCase) { | |
| 243 return _compareNaturally(a, b, i, aUpperCase, bUpperCase); | |
| 244 } | |
| 245 if (defaultResult == 0) defaultResult = aChar - bChar; | |
| 246 } | |
| 247 if (b.length > a.length) return -1; | |
| 248 return defaultResult.sign; | |
| 249 } | |
| 250 | |
| 251 /// Check for numbers overlapping the current mismatched characters. | |
| 252 /// | |
| 253 /// If both [aChar] and [bChar] are digits, use numerical comparison. | |
| 254 /// Check if the previous characters is a non-zero number, and if not, | |
| 255 /// skip - but count - leading zeros before comparing numbers. | |
| 256 /// | |
| 257 /// If one is a digit and the other isn't, check if the previous character | |
| 258 /// is a digit, and if so, the the one with the digit is the greater number. | |
| 259 /// | |
| 260 /// Otherwise just returns the difference between [aChar] and [bChar]. | |
| 261 int _compareNaturally( | |
| 262 String a, String b, int index, int aChar, int bChar) { | |
| 263 assert(aChar != bChar); | |
| 264 var aIsDigit = _isDigit(aChar); | |
| 265 var bIsDigit = _isDigit(bChar); | |
| 266 if (aIsDigit) { | |
| 267 if (bIsDigit) { | |
| 268 return _compareNumerically(a, b, aChar, bChar, index); | |
| 269 } else if (index > 0 && _isDigit(a.codeUnitAt(index - 1))) { | |
| 270 // aChar is the continuation of a longer number. | |
| 271 return 1; | |
| 272 } | |
| 273 } else if (bIsDigit && index > 0 && _isDigit(b.codeUnitAt(index - 1))) { | |
| 274 // bChar is the continuation of a longer number. | |
| 275 return -1; | |
| 276 } | |
| 277 // Characters are both non-digits, or not continuation of earlier number. | |
| 278 return (aChar - bChar).sign; | |
| 279 } | |
| 280 | |
| 281 /// Compare numbers overlapping [aChar] and [bChar] numerically. | |
| 282 /// | |
| 283 /// If the numbers have the same numerical value, but one has more leading | |
| 284 /// zeros, the longer number is considered greater than the shorter one. | |
| 285 /// | |
| 286 /// This ensures a total ordering on strings compatible with equality. | |
| 287 int _compareNumerically(String a, String b, int aChar, int bChar, int index) { | |
| 288 // Both are digits. Find the first significant different digit, then find | |
| 289 // the length of the numbers. | |
| 290 if (_isNumberSuffix(a, index)) { | |
| 291 // Part of a longer number, differs at this index, just count the length. | |
| 292 int result = _compareDigitCount(a, b, index, index); | |
| 293 if (result != 0) return result; | |
| 294 // If same length, the current character is the most significant differing | |
| 295 // digit. | |
| 296 return (aChar - bChar).sign; | |
| 297 } | |
| 298 // Not part of larger (non-zero) number, so skip leading zeros before | |
| 299 // comparing numbers. | |
| 300 int aIndex = index; | |
| 301 int bIndex = index; | |
| 302 if (aChar == _zero) { | |
| 303 do { | |
| 304 aIndex++; | |
| 305 if (aIndex == a.length) return -1; // number in a is zero, b is not. | |
| 306 aChar = a.codeUnitAt(aIndex); | |
| 307 } while (aChar == _zero); | |
| 308 if (!_isDigit(aChar)) return -1; | |
| 309 } else if (bChar == _zero) { | |
| 310 do { | |
| 311 bIndex++; | |
| 312 if (bIndex == b.length) return 1; // number in b is zero, a is not. | |
| 313 bChar = b.codeUnitAt(bIndex); | |
| 314 } while (bChar == _zero); | |
| 315 if (!_isDigit(bChar)) return 1; | |
| 316 } | |
| 317 if (aChar != bChar) { | |
| 318 int result = _compareDigitCount(a, b, aIndex, bIndex); | |
| 319 if (result != 0) return result; | |
| 320 return (aChar - bChar).sign; | |
| 321 } | |
| 322 // Same leading digit, one had more leading zeros. | |
| 323 // Compare digits until reaching a difference. | |
| 324 while (true) { | |
| 325 var aIsDigit = false; | |
| 326 var bIsDigit = false; | |
| 327 aChar = 0; | |
| 328 bChar = 0; | |
| 329 if (++aIndex < a.length) { | |
| 330 aChar = a.codeUnitAt(aIndex); | |
| 331 aIsDigit = _isDigit(aChar); | |
| 332 } | |
| 333 if (++bIndex < b.length) { | |
| 334 bChar = b.codeUnitAt(bIndex); | |
| 335 bIsDigit = _isDigit(bChar); | |
| 336 } | |
| 337 if (aIsDigit) { | |
| 338 if (bIsDigit) { | |
| 339 if (aChar == bChar) continue; | |
| 340 // First different digit found. | |
| 341 break; | |
| 342 } | |
| 343 // bChar is non-digit, so a has longer number. | |
| 344 return 1; | |
| 345 } else if (bIsDigit) { | |
| 346 return -1; // b has longer number. | |
| 347 } else { | |
| 348 // Neither is digit, so numbers had same numerical value. | |
| 349 // Fall back on number of leading zeros | |
| 350 // (reflected by difference in indices). | |
| 351 return (aIndex - bIndex).sign; | |
| 352 } | |
| 353 } | |
| 354 // At first differing digits. | |
| 355 int result = _compareDigitCount(a, b, aIndex, bIndex); | |
| 356 if (result != 0) return result; | |
| 357 return (aChar - bChar).sign; | |
| 358 } | |
| 359 | |
| 360 /// Checks which of [a] and [b] has the longest sequence of digits. | |
| 361 /// | |
| 362 /// Starts counting from `i + 1` and `j + 1` (assumes that `a[i]` and `b[j]` are | |
| 363 /// both already known to be digits). | |
| 364 int _compareDigitCount(String a, String b, int i, int j) { | |
| 365 while (++i < a.length) { | |
| 366 bool aIsDigit = _isDigit(a.codeUnitAt(i)); | |
| 367 if (++j == b.length) return aIsDigit ? 1 : 0; | |
| 368 bool bIsDigit = _isDigit(b.codeUnitAt(j)); | |
| 369 if (aIsDigit) { | |
| 370 if (bIsDigit) continue; | |
| 371 return 1; | |
| 372 } else if (bIsDigit) { | |
| 373 return -1; | |
| 374 } else { | |
| 375 return 0; | |
| 376 } | |
| 377 } | |
| 378 if (++j < b.length && _isDigit(b.codeUnitAt(j))) { | |
| 379 return -1; | |
| 380 } | |
| 381 return 0; | |
| 382 } | |
| 383 | |
| 384 bool _isDigit(int charCode) => (charCode ^ _zero) <= 9; | |
| 385 | |
| 386 /// Check if the digit at [index] is continuing a non-zero number. | |
| 387 /// | |
| 388 /// If there is no non-zero digits before, then leading zeros at [index] | |
| 389 /// are also ignored when comparing numerically. If there is a non-zero digit | |
| 390 /// before, then zeros at [index] are significant. | |
| 391 bool _isNumberSuffix(String string, int index) { | |
|
sra1
2015/11/18 23:40:16
maybe call it _isNonZeroNumberSuffix
Lasse Reichstein Nielsen
2015/11/19 07:15:37
Done.
| |
| 392 while (--index >= 0) { | |
| 393 int char = string.codeUnitAt(index); | |
| 394 if (char != _zero) return _isDigit(char); | |
| 395 } | |
| 396 return false; | |
| 397 } | |
| OLD | NEW |