OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. |
| 4 |
| 5 library dart.pkg.collection.comparators; |
| 6 |
| 7 // Character constants. |
| 8 const int _zero = 0x30; |
| 9 const int _upperCaseA = 0x41; |
| 10 const int _upperCaseZ = 0x5a; |
| 11 const int _lowerCaseA = 0x61; |
| 12 const int _lowerCaseZ = 0x7a; |
| 13 const int _asciiCaseBit = 0x20; |
| 14 |
| 15 /// Checks if strings [a] and [b] differ only on the case of ASCII letters. |
| 16 /// |
| 17 /// Strings are equal if they have the same length, and the characters at |
| 18 /// each index are the same, or they are ASCII letters where one is upper-case |
| 19 /// and the other is the lower-case version of the same letter. |
| 20 /// |
| 21 /// The comparison does not ignore the case of non-ASCII letters, so |
| 22 /// an upper-case ae-ligature (Æ) is different from |
| 23 /// a lower case ae-ligature (æ). |
| 24 /// |
| 25 /// Ignoring non-ASCII letters is not generally a good idea, but it makes sense |
| 26 /// for situations where the strings are known to be ASCII. Examples could |
| 27 /// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar |
| 28 /// strings with a known structure. |
| 29 bool equalsIgnoreAsciiCase(String a, String b) { |
| 30 if (a.length != b.length) return false; |
| 31 for (int i = 0; i < a.length; i++) { |
| 32 var aChar = a.codeUnitAt(i); |
| 33 var bChar = b.codeUnitAt(i); |
| 34 if (aChar == bChar) continue; |
| 35 // Quick-check for whether this may be different cases of the same letter. |
| 36 if (aChar ^ bChar != _asciiCaseBit) return false; |
| 37 // If it's possible, then check if either character is actually an ASCII |
| 38 // letter. |
| 39 int aCharUpperCase = aChar | _asciiCaseBit; |
| 40 if (_upperCaseA <= aCharUpperCase && aCharUpperCase <= _upperCaseZ) { |
| 41 continue; |
| 42 } |
| 43 return false; |
| 44 } |
| 45 return true; |
| 46 } |
| 47 |
| 48 |
| 49 /// Hash code for a string which is compatible with [equalsIgnoreAsciiCase]. |
| 50 /// |
| 51 /// The hash code is unaffected by changing the case of ASCII letters, but |
| 52 /// the case of non-ASCII letters do affect the result. |
| 53 int hashIgnoreAsciiCase(String string) { |
| 54 // Jenkins hash code ( http://en.wikipedia.org/wiki/Jenkins_hash_function). |
| 55 // adapted to smi values. |
| 56 // Same hash used by dart2js for strings, modified to ignore ASCII letter |
| 57 // case. |
| 58 int hash = 0; |
| 59 for (int i = 0; i < string.length; i++) { |
| 60 int char = string.codeUnitAt(i); |
| 61 // Convert lower-case ASCII letters to upper case.upper |
| 62 // This ensures that strings that differ only in case will have the |
| 63 // same hash code. |
| 64 if (_lowerCaseA <= char && char <= _lowerCaseZ) char -= _asciiCaseBit; |
| 65 hash = 0x1fffffff & (hash + char); |
| 66 hash = 0x1fffffff & (hash + ((0x0007ffff & hash) << 10)); |
| 67 hash >>= 6; |
| 68 } |
| 69 hash = 0x1fffffff & (hash + ((0x03ffffff & hash) << 3)); |
| 70 hash >>= 11; |
| 71 return 0x1fffffff & (hash + ((0x00003fff & hash) << 15)); |
| 72 } |
| 73 |
| 74 |
| 75 /// Compares [a] and [b] lexically, converting ASCII letters to upper case. |
| 76 /// |
| 77 /// Comparison treats all lower-case ASCII letters as upper-case letters, |
| 78 /// but does no case conversion for non-ASCII letters. |
| 79 /// |
| 80 /// If two strings differ only on the case of ASCII letters, the one with the |
| 81 /// capital letter at the first difference will compare as less than the other |
| 82 /// string. This tie-breaking ensures that the comparison is a total ordering |
| 83 /// on strings and is compatible with equality. |
| 84 /// |
| 85 /// Ignoring non-ASCII letters is not generally a good idea, but it makes sense |
| 86 /// for situations where the strings are known to be ASCII. Examples could |
| 87 /// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar |
| 88 /// strings with a known structure. |
| 89 int compareAsciiUpperCase(String a, String b) { |
| 90 int defaultResult = 0; // Returned if no difference found. |
| 91 for (int i = 0; i < a.length; i++) { |
| 92 if (i >= b.length) return 1; |
| 93 var aChar = a.codeUnitAt(i); |
| 94 var bChar = b.codeUnitAt(i); |
| 95 if (aChar == bChar) continue; |
| 96 // Upper-case if letters. |
| 97 int aUpperCase = aChar; |
| 98 int bUpperCase = bChar; |
| 99 if (_lowerCaseA <= aChar && aChar <= _lowerCaseZ) { |
| 100 aUpperCase -= _asciiCaseBit; |
| 101 } |
| 102 if (_lowerCaseA <= bChar && bChar <= _lowerCaseZ) { |
| 103 bUpperCase -= _asciiCaseBit; |
| 104 } |
| 105 if (aUpperCase != bUpperCase) return (aUpperCase - bUpperCase).sign; |
| 106 if (defaultResult == 0) defaultResult = (aChar - bChar); |
| 107 } |
| 108 if (b.length > a.length) return -1; |
| 109 return defaultResult.sign; |
| 110 } |
| 111 |
| 112 |
| 113 /// Compares [a] and [b] lexically, converting ASCII letters to lower case. |
| 114 /// |
| 115 /// Comparison treats all upper-case ASCII letters as lower-case letters, |
| 116 /// but does no case conversion for non-ASCII letters. |
| 117 /// |
| 118 /// If two strings differ only on the case of ASCII letters, the one with the |
| 119 /// capital letter at the first difference will compare as less than the other |
| 120 /// string. This tie-breaking ensures that the comparison is a total ordering |
| 121 /// on strings. |
| 122 /// |
| 123 /// Ignoring non-ASCII letters is not generally a good idea, but it makes sense |
| 124 /// for situations where the strings are known to be ASCII. Examples could |
| 125 /// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar |
| 126 /// strings with a known structure. |
| 127 int compareAsciiLowerCase(String a, String b) { |
| 128 int defaultResult = 0; |
| 129 for (int i = 0; i < a.length; i++) { |
| 130 if (i >= b.length) return 1; |
| 131 var aChar = a.codeUnitAt(i); |
| 132 var bChar = b.codeUnitAt(i); |
| 133 if (aChar == bChar) continue; |
| 134 int aLowerCase = aChar; |
| 135 int bLowerCase = bChar; |
| 136 // Upper case if ASCII letters. |
| 137 if (_upperCaseA <= bChar && bChar <= _upperCaseZ) { |
| 138 bLowerCase += _asciiCaseBit; |
| 139 } |
| 140 if (_upperCaseA <= aChar && aChar <= _upperCaseZ) { |
| 141 aLowerCase += _asciiCaseBit; |
| 142 } |
| 143 if (aLowerCase != bLowerCase) return (aLowerCase - bLowerCase).sign; |
| 144 if (defaultResult == 0) defaultResult = aChar - bChar; |
| 145 } |
| 146 if (b.length > a.length) return -1; |
| 147 return defaultResult.sign; |
| 148 } |
| 149 |
| 150 /// Compares strings [a] and [b] according to [natural sort ordering]. |
| 151 /// |
| 152 /// A natural sort ordering is a lexical ordering where embedded |
| 153 /// numerals (digit sequences) are treated as a single unit and ordered by |
| 154 /// numerical value. |
| 155 /// This means that `"a10b"` will be ordered after `"a7b"` in natural |
| 156 /// ordering, where lexical ordering would put the `1` before the `7`, ignoring |
| 157 /// that the `1` is part of a larger number. |
| 158 /// |
| 159 /// Example: |
| 160 /// The following strings are in the order they would be sorted by using this |
| 161 /// comparison function: |
| 162 /// |
| 163 /// "a", "a0", "a0b", "a1", "a01", "a9", "a10", "a100", "a100b", "aa" |
| 164 /// |
| 165 /// [natural sort ordering]: https://en.wikipedia.org/wiki/Natural_sort_order |
| 166 int compareNatural(String a, String b) { |
| 167 for (int i = 0; i < a.length; i++) { |
| 168 if (i >= b.length) return 1; |
| 169 var aChar = a.codeUnitAt(i); |
| 170 var bChar = b.codeUnitAt(i); |
| 171 if (aChar != bChar) { |
| 172 return _compareNaturally(a, b, i, aChar, bChar); |
| 173 } |
| 174 } |
| 175 if (b.length > a.length) return -1; |
| 176 return 0; |
| 177 } |
| 178 |
| 179 /// Compares strings [a] and [b] according to lower-case |
| 180 /// [natural sort ordering]. |
| 181 /// |
| 182 /// ASCII letters are converted to lower case before being compared, like |
| 183 /// for [compareAsciiLowerCase], then the result is compared like for |
| 184 /// [compareNatural]. |
| 185 /// |
| 186 /// If two strings differ only on the case of ASCII letters, the one with the |
| 187 /// capital letter at the first difference will compare as less than the other |
| 188 /// string. This tie-breaking ensures that the comparison is a total ordering |
| 189 /// on strings. |
| 190 /// |
| 191 /// [natural sort ordering]: https://en.wikipedia.org/wiki/Natural_sort_order |
| 192 int compareAsciiLowerCaseNatural(String a, String b) { |
| 193 int defaultResult = 0; // Returned if no difference found. |
| 194 for (int i = 0; i < a.length; i++) { |
| 195 if (i >= b.length) return 1; |
| 196 var aChar = a.codeUnitAt(i); |
| 197 var bChar = b.codeUnitAt(i); |
| 198 if (aChar == bChar) continue; |
| 199 int aLowerCase = aChar; |
| 200 int bLowerCase = bChar; |
| 201 if (_upperCaseA <= aChar && aChar <= _upperCaseZ) { |
| 202 aLowerCase += _asciiCaseBit; |
| 203 } |
| 204 if (_upperCaseA <= bChar && bChar <= _upperCaseZ) { |
| 205 bLowerCase += _asciiCaseBit; |
| 206 } |
| 207 if (aLowerCase != bLowerCase) { |
| 208 return _compareNaturally(a, b, i, aLowerCase, bLowerCase); |
| 209 } |
| 210 if (defaultResult == 0) defaultResult = aChar - bChar; |
| 211 } |
| 212 if (b.length > a.length) return -1; |
| 213 return defaultResult.sign; |
| 214 } |
| 215 |
| 216 /// Compares strings [a] and [b] according to upper-case |
| 217 /// [natural sort ordering]. |
| 218 /// |
| 219 /// ASCII letters are converted to upper case before being compared, like |
| 220 /// for [compareAsciiUpperCase], then the result is compared like for |
| 221 /// [compareNatural]. |
| 222 /// |
| 223 /// If two strings differ only on the case of ASCII letters, the one with the |
| 224 /// capital letter at the first difference will compare as less than the other |
| 225 /// string. This tie-breaking ensures that the comparison is a total ordering |
| 226 /// on strings |
| 227 /// |
| 228 /// [natural sort ordering]: https://en.wikipedia.org/wiki/Natural_sort_order |
| 229 int compareAsciiUpperCaseNatural(String a, String b) { |
| 230 int defaultResult = 0; |
| 231 for (int i = 0; i < a.length; i++) { |
| 232 if (i >= b.length) return 1; |
| 233 var aChar = a.codeUnitAt(i); |
| 234 var bChar = b.codeUnitAt(i); |
| 235 if (aChar == bChar) continue; |
| 236 int aUpperCase = aChar; |
| 237 int bUpperCase = bChar; |
| 238 if (_lowerCaseA <= aChar && aChar <= _lowerCaseZ) { |
| 239 aUpperCase -= _asciiCaseBit; |
| 240 } |
| 241 if (_lowerCaseA <= bChar && bChar <= _lowerCaseZ) { |
| 242 bUpperCase -= _asciiCaseBit; |
| 243 } |
| 244 if (aUpperCase != bUpperCase) { |
| 245 return _compareNaturally(a, b, i, aUpperCase, bUpperCase); |
| 246 } |
| 247 if (defaultResult == 0) defaultResult = aChar - bChar; |
| 248 } |
| 249 if (b.length > a.length) return -1; |
| 250 return defaultResult.sign; |
| 251 } |
| 252 |
| 253 /// Check for numbers overlapping the current mismatched characters. |
| 254 /// |
| 255 /// If both [aChar] and [bChar] are digits, use numerical comparison. |
| 256 /// Check if the previous characters is a non-zero number, and if not, |
| 257 /// skip - but count - leading zeros before comparing numbers. |
| 258 /// |
| 259 /// If one is a digit and the other isn't, check if the previous character |
| 260 /// is a digit, and if so, the the one with the digit is the greater number. |
| 261 /// |
| 262 /// Otherwise just returns the difference between [aChar] and [bChar]. |
| 263 int _compareNaturally( |
| 264 String a, String b, int index, int aChar, int bChar) { |
| 265 assert(aChar != bChar); |
| 266 var aIsDigit = _isDigit(aChar); |
| 267 var bIsDigit = _isDigit(bChar); |
| 268 if (aIsDigit) { |
| 269 if (bIsDigit) { |
| 270 return _compareNumerically(a, b, aChar, bChar, index); |
| 271 } else if (index > 0 && _isDigit(a.codeUnitAt(index - 1))) { |
| 272 // aChar is the continuation of a longer number. |
| 273 return 1; |
| 274 } |
| 275 } else if (bIsDigit && index > 0 && _isDigit(b.codeUnitAt(index - 1))) { |
| 276 // bChar is the continuation of a longer number. |
| 277 return -1; |
| 278 } |
| 279 // Characters are both non-digits, or not continuation of earlier number. |
| 280 return (aChar - bChar).sign; |
| 281 } |
| 282 |
| 283 /// Compare numbers overlapping [aChar] and [bChar] numerically. |
| 284 /// |
| 285 /// If the numbers have the same numerical value, but one has more leading |
| 286 /// zeros, the longer number is considered greater than the shorter one. |
| 287 /// |
| 288 /// This ensures a total ordering on strings compatible with equality. |
| 289 int _compareNumerically(String a, String b, int aChar, int bChar, int index) { |
| 290 // Both are digits. Find the first significant different digit, then find |
| 291 // the length of the numbers. |
| 292 if (_isNonZeroNumberSuffix(a, index)) { |
| 293 // Part of a longer number, differs at this index, just count the length. |
| 294 int result = _compareDigitCount(a, b, index, index); |
| 295 if (result != 0) return result; |
| 296 // If same length, the current character is the most significant differing |
| 297 // digit. |
| 298 return (aChar - bChar).sign; |
| 299 } |
| 300 // Not part of larger (non-zero) number, so skip leading zeros before |
| 301 // comparing numbers. |
| 302 int aIndex = index; |
| 303 int bIndex = index; |
| 304 if (aChar == _zero) { |
| 305 do { |
| 306 aIndex++; |
| 307 if (aIndex == a.length) return -1; // number in a is zero, b is not. |
| 308 aChar = a.codeUnitAt(aIndex); |
| 309 } while (aChar == _zero); |
| 310 if (!_isDigit(aChar)) return -1; |
| 311 } else if (bChar == _zero) { |
| 312 do { |
| 313 bIndex++; |
| 314 if (bIndex == b.length) return 1; // number in b is zero, a is not. |
| 315 bChar = b.codeUnitAt(bIndex); |
| 316 } while (bChar == _zero); |
| 317 if (!_isDigit(bChar)) return 1; |
| 318 } |
| 319 if (aChar != bChar) { |
| 320 int result = _compareDigitCount(a, b, aIndex, bIndex); |
| 321 if (result != 0) return result; |
| 322 return (aChar - bChar).sign; |
| 323 } |
| 324 // Same leading digit, one had more leading zeros. |
| 325 // Compare digits until reaching a difference. |
| 326 while (true) { |
| 327 var aIsDigit = false; |
| 328 var bIsDigit = false; |
| 329 aChar = 0; |
| 330 bChar = 0; |
| 331 if (++aIndex < a.length) { |
| 332 aChar = a.codeUnitAt(aIndex); |
| 333 aIsDigit = _isDigit(aChar); |
| 334 } |
| 335 if (++bIndex < b.length) { |
| 336 bChar = b.codeUnitAt(bIndex); |
| 337 bIsDigit = _isDigit(bChar); |
| 338 } |
| 339 if (aIsDigit) { |
| 340 if (bIsDigit) { |
| 341 if (aChar == bChar) continue; |
| 342 // First different digit found. |
| 343 break; |
| 344 } |
| 345 // bChar is non-digit, so a has longer number. |
| 346 return 1; |
| 347 } else if (bIsDigit) { |
| 348 return -1; // b has longer number. |
| 349 } else { |
| 350 // Neither is digit, so numbers had same numerical value. |
| 351 // Fall back on number of leading zeros |
| 352 // (reflected by difference in indices). |
| 353 return (aIndex - bIndex).sign; |
| 354 } |
| 355 } |
| 356 // At first differing digits. |
| 357 int result = _compareDigitCount(a, b, aIndex, bIndex); |
| 358 if (result != 0) return result; |
| 359 return (aChar - bChar).sign; |
| 360 } |
| 361 |
| 362 /// Checks which of [a] and [b] has the longest sequence of digits. |
| 363 /// |
| 364 /// Starts counting from `i + 1` and `j + 1` (assumes that `a[i]` and `b[j]` are |
| 365 /// both already known to be digits). |
| 366 int _compareDigitCount(String a, String b, int i, int j) { |
| 367 while (++i < a.length) { |
| 368 bool aIsDigit = _isDigit(a.codeUnitAt(i)); |
| 369 if (++j == b.length) return aIsDigit ? 1 : 0; |
| 370 bool bIsDigit = _isDigit(b.codeUnitAt(j)); |
| 371 if (aIsDigit) { |
| 372 if (bIsDigit) continue; |
| 373 return 1; |
| 374 } else if (bIsDigit) { |
| 375 return -1; |
| 376 } else { |
| 377 return 0; |
| 378 } |
| 379 } |
| 380 if (++j < b.length && _isDigit(b.codeUnitAt(j))) { |
| 381 return -1; |
| 382 } |
| 383 return 0; |
| 384 } |
| 385 |
| 386 bool _isDigit(int charCode) => (charCode ^ _zero) <= 9; |
| 387 |
| 388 /// Check if the digit at [index] is continuing a non-zero number. |
| 389 /// |
| 390 /// If there is no non-zero digits before, then leading zeros at [index] |
| 391 /// are also ignored when comparing numerically. If there is a non-zero digit |
| 392 /// before, then zeros at [index] are significant. |
| 393 bool _isNonZeroNumberSuffix(String string, int index) { |
| 394 while (--index >= 0) { |
| 395 int char = string.codeUnitAt(index); |
| 396 if (char != _zero) return _isDigit(char); |
| 397 } |
| 398 return false; |
| 399 } |
OLD | NEW |