Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file | |
| 2 // for details. All rights reserved. Use of this source code is governed by a | |
| 3 // BSD-style license that can be found in the LICENSE file. | |
| 4 | |
| 5 library dart.pkg.collection.comparators; | |
| 6 | |
| 7 /// Checks if strings [a] and [b] differ only on the case of ASCII letters. | |
| 8 /// | |
| 9 /// Strings are equal if they have the same length, and the characters at | |
| 10 /// each index are the same, or they are ASCII letters where one is upper-case | |
| 11 /// and the other is the lower-case version of the same letter. | |
| 12 /// | |
| 13 /// The comparison does not ignore the case of non-ASCII letters, so | |
| 14 /// a capital ae-ligature (Æ) is different from a lower case ae-ligature (æ). | |
| 15 /// | |
| 16 /// Ignoring non-ASCII letters is not generally a good idea, but it makes sense | |
| 17 /// for situations where the strings are known to be ASCII. Examples could | |
| 18 /// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar | |
| 19 /// strings with a known structure. | |
| 20 bool equalsIgnoreAsciiCase(String a, String b) { | |
| 21 if (a.length != b.length) return false; | |
| 22 for (int i = 0; i < a.length; i++) { | |
| 23 var aChar = a.codeUnitAt(i); | |
| 24 var bChar = b.codeUnitAt(i); | |
| 25 if (aChar == bChar) continue; | |
| 26 const int upperCaseA = 0x41; | |
|
sra1
2015/11/13 18:25:40
Hoist these common constants to top level library
Lasse Reichstein Nielsen
2015/11/18 12:12:47
Done.
| |
| 27 const int upperCaseZ = 0x5d; | |
| 28 if (aChar ^ bChar != 0x20) return false; | |
|
Bob Nystrom
2015/11/13 16:55:36
Document this. This is because upper/lowercase ASC
Lasse Reichstein Nielsen
2015/11/18 12:12:47
Yes, this is a quick bailout. If the characters di
| |
| 29 if (upperCaseA <= aChar && aChar <= upperCaseZ) continue; | |
| 30 if (upperCaseA <= bChar && bChar <= upperCaseZ) continue; | |
| 31 return false; | |
| 32 } | |
| 33 return true; | |
| 34 } | |
| 35 | |
| 36 /// Hash code for a string which is compatible with [equalsIgnoreAsciiCase]. | |
| 37 /// | |
| 38 /// The hash code is unaffected by changing the case of ASCII letters, but | |
| 39 /// the case of non-ASCII letters do affect the result. | |
| 40 int hashIgnoreAsciiCase(String string) { | |
| 41 int hash = 0; | |
| 42 for (int i = 0; i < string.length; i++) { | |
| 43 int char = string.codeUnitAt(i); | |
| 44 const int lowerCaseA = 0x61; | |
| 45 const int lowerCaseZ = 0x7D; | |
| 46 if (lowerCaseA <= char && char <= lowerCaseZ) char -= 0x20; | |
|
Bob Nystrom
2015/11/13 16:55:36
Document this.
Lasse Reichstein Nielsen
2015/11/18 12:12:47
Done.
| |
| 47 hash = 0x1fffffff & (hash + char); | |
| 48 hash = 0x1fffffff & (hash + ((0x0007ffff & hash) << 10)); | |
| 49 hash >>= 6; | |
| 50 } | |
| 51 hash = 0x1fffffff & (hash + ((0x03ffffff & hash) << 3)); | |
| 52 hash >>= 11; | |
| 53 return 0x1fffffff & (hash + ((0x00003fff & hash) << 15)); | |
|
Bob Nystrom
2015/11/13 16:55:36
It would be good to document a reference for where
Lasse Reichstein Nielsen
2015/11/18 12:12:47
It's Jenkins, stole the code from the actual strin
| |
| 54 } | |
| 55 | |
| 56 /// Compares [a] and [b] lexically, converting ASCII letters to upper case. | |
| 57 /// | |
| 58 /// Comparison treats all lower-case ASCII letters as their upper-case letter, | |
| 59 /// but does no case conversion for non-ASCII letters. | |
| 60 /// | |
| 61 /// If two strings differ only on the case of ASCII letters, the one with the | |
| 62 /// capital letter at the first difference will compare as less than the other | |
| 63 /// string. This tie-breaking ensures that the comparison is a total ordering | |
| 64 /// on strings. | |
| 65 /// | |
| 66 /// Ignoring non-ASCII letters is not generally a good idea, but it makes sense | |
| 67 /// for situations where the strings are known to be ASCII. Examples could | |
| 68 /// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar | |
| 69 /// strings with a known structure. | |
| 70 int compareAsciiUpperCase(String a, String b) { | |
| 71 int defaultResult = 0; // Returned if no difference found. | |
| 72 for (int i = 0; i < a.length; i++) { | |
| 73 if (i == b.length) return 1; | |
| 74 var aChar = a.codeUnitAt(i); | |
| 75 var bChar = b.codeUnitAt(i); | |
| 76 if (aChar == bChar) continue; | |
| 77 const int lowerCaseA = 0x61; | |
| 78 const int lowerCaseZ = 0x7d; | |
| 79 // Lower case if letters. | |
| 80 int aUpperCase = aChar; | |
| 81 int bUpperCase = bChar; | |
| 82 if (lowerCaseA <= aChar && aChar <= lowerCaseZ) aUpperCase -= 0x20; | |
| 83 if (lowerCaseA <= bChar && bChar <= lowerCaseZ) bUpperCase -= 0x20; | |
|
Bob Nystrom
2015/11/13 16:55:36
You do this in a few places. I wonder if it's wort
Lasse Reichstein Nielsen
2015/11/18 12:12:47
I could, but only if I'm sure it will get inlined.
| |
| 84 if (aUpperCase != bUpperCase) return (aUpperCase - bUpperCase).sign; | |
| 85 if (defaultResult == 0) defaultResult = (aChar - bChar); | |
| 86 } | |
| 87 if (b.length > a.length) return -1; | |
| 88 return defaultResult.sign; | |
| 89 } | |
| 90 | |
| 91 /// Compares [a] and [b] lexically, converting ASCII letters to lower case. | |
| 92 /// | |
| 93 /// Comparison treats all upper-case ASCII letters as lower-case letter, | |
|
Bob Nystrom
2015/11/13 16:55:36
"letter" -> "letters"
Lasse Reichstein Nielsen
2015/11/18 12:12:47
Done.
| |
| 94 /// but does no case conversion for non-ASCII letters. | |
| 95 /// | |
| 96 /// If two strings differ only on the case of ASCII letters, the one with the | |
| 97 /// capital letter at the first difference will compare as less than the other | |
| 98 /// string. This tie-breaking ensures that the comparison is a total ordering | |
| 99 /// on strings. | |
| 100 /// | |
| 101 /// Ignoring non-ASCII letters is not generally a good idea, but it makes sense | |
| 102 /// for situations where the strings are known to be ASCII. Examples could | |
| 103 /// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar | |
| 104 /// strings with a known structure. | |
| 105 int compareAsciiLowerCase(String a, String b) { | |
| 106 int defaultResult = 0; | |
| 107 for (int i = 0; i < a.length; i++) { | |
| 108 if (i == b.length) return 1; | |
| 109 var aChar = a.codeUnitAt(i); | |
| 110 var bChar = b.codeUnitAt(i); | |
| 111 if (aChar == bChar) continue; | |
| 112 const int upperCaseA = 0x41; | |
| 113 const int upperCaseZ = 0x5d; | |
| 114 int aLowerCase = aChar; | |
| 115 int bLowerCase = bChar; | |
| 116 // Upper case if ASCII letters. | |
| 117 if (upperCaseA <= aChar && aChar <= upperCaseZ) aLowerCase += 0x20; | |
| 118 if (upperCaseA <= bChar && bChar <= upperCaseZ) bLowerCase += 0x20; | |
| 119 if (aLowerCase != bLowerCase) return (aLowerCase - bLowerCase).sign; | |
| 120 if (defaultResult == 0) defaultResult = aChar - bChar; | |
| 121 } | |
| 122 if (b.length > a.length) return -1; | |
| 123 return defaultResult.sign; | |
| 124 } | |
| 125 | |
| 126 /// Compares strings [a] and [b] according to natural sort ordering. | |
| 127 /// | |
| 128 /// A traditional natural sort ordering is a lexical ordering where embedded | |
| 129 /// numerals (digit sequeunces) are treated as a single unit and ordered by | |
| 130 /// numerical value. | |
| 131 /// This means that `"a10b"` will be ordered after `"a7b"` in natural | |
| 132 /// ordering, where leixcal ordering would put the `1` before the `7`, ignoring | |
| 133 /// that the `1` is part of a larger number. | |
| 134 /// (See: [Natural sort order](https://en.wikipedia.org/wiki/Natural_sort_order) ) | |
|
Bob Nystrom
2015/11/13 16:55:36
Long line. I'd probably do:
/// Compares strings
Lasse Reichstein Nielsen
2015/11/18 12:12:47
Done.
| |
| 135 /// | |
| 136 /// The ordering of this function is not pure numerical ordering on embedded | |
| 137 /// numerals, which would ignore leading zeros. Instead it orders the numerals | |
| 138 /// by length first and then by numerical value if they have the same length. | |
| 139 /// This is indistinguishable from natural sort ordering if either all numerals | |
| 140 /// are zero padded, or if none of them are. | |
| 141 /// | |
| 142 /// Example: | |
| 143 /// The following strings are in the order they would be sorted by using this | |
| 144 /// comparison function: | |
| 145 /// | |
| 146 /// "a", "a0", "a0b", "a1", "a9", "a01", "a10", "a100", "a100b", "aa" | |
|
sra1
2015/11/13 18:25:40
I find it odd that a01 is after a9.
This is neithe
Lasse Reichstein Nielsen
2015/11/18 12:12:47
Good point. It's simpler to explain "numerical ord
| |
| 147 int compareNatural(String a, String b) { | |
| 148 for (int i = 0; i < a.length; i++) { | |
| 149 if (i == b.length) return 1; | |
| 150 var aChar = a.codeUnitAt(i); | |
| 151 var bChar = b.codeUnitAt(i); | |
| 152 if (aChar != bChar) { | |
| 153 return _compareDigitsLexicographically(a, b, i, aChar, bChar); | |
| 154 } | |
| 155 } | |
| 156 if (b.length > a.length) return -1; | |
| 157 return 0; | |
| 158 } | |
| 159 | |
| 160 /// Compares strings [a] and [b] according to lower-case natural sort ordering. | |
| 161 /// | |
| 162 /// ASCII letters are converted to lower case before being compared, like | |
| 163 /// for [compareAsciiLowerCase], then the result is compared like for | |
| 164 /// [compareNatural]. | |
| 165 /// | |
| 166 /// If two strings differ only on the case of ASCII letters, the one with the | |
| 167 /// capital letter at the first difference will compare as less than the other | |
| 168 /// string. This tie-breaking ensures that the comparison is a total ordering | |
| 169 /// on strings. | |
| 170 int compareAsciiLowerCaseNatural(String a, String b) { | |
| 171 int defaultResult = 0; // Returned if no difference found. | |
| 172 for (int i = 0; i < a.length; i++) { | |
| 173 if (i == b.length) return 1; | |
| 174 var aChar = a.codeUnitAt(i); | |
| 175 var bChar = b.codeUnitAt(i); | |
| 176 if (aChar == bChar) continue; | |
| 177 const int upperCaseA = 0x41; | |
| 178 const int upperCaseZ = 0x5d; | |
| 179 int aLowerCase = aChar; | |
| 180 int bLowerCase = bChar; | |
| 181 if (upperCaseA <= aChar && aChar <= upperCaseZ) aLowerCase += 0x20; | |
| 182 if (upperCaseA <= bChar && bChar <= upperCaseZ) bLowerCase += 0x20; | |
| 183 if (aLowerCase != bLowerCase) { | |
| 184 return _compareDigitsLexicographically(a, b, i, aLowerCase, bLowerCase); | |
| 185 } | |
| 186 if (defaultResult == 0) defaultResult = aChar - bChar; | |
| 187 } | |
| 188 if (b.length > a.length) return -1; | |
| 189 return defaultResult.sign; | |
| 190 } | |
| 191 | |
| 192 /// Compares strings [a] and [b] according to upper-case natural sort ordering. | |
| 193 /// | |
| 194 /// ASCII letters are converted to upper case before being compared, like | |
| 195 /// for [compareAsciiUpperCase], then the result is compared like for | |
| 196 /// [compareNatural]. | |
| 197 /// | |
| 198 /// If two strings differ only on the case of ASCII letters, the one with the | |
| 199 /// capital letter at the first difference will compare as less than the other | |
| 200 /// string. This tie-breaking ensures that the comparison is a total ordering | |
| 201 /// on strings | |
| 202 int compareAsciiUpperCaseNatural(String a, String b) { | |
| 203 int defaultResult = 0; | |
| 204 for (int i = 0; i < a.length; i++) { | |
| 205 if (i == b.length) return 1; | |
| 206 var aChar = a.codeUnitAt(i); | |
| 207 var bChar = b.codeUnitAt(i); | |
| 208 if (aChar == bChar) continue; | |
| 209 const int lowerCaseA = 0x61; | |
| 210 const int lowerCaseZ = 0x7d; | |
| 211 int aUpperCase = aChar; | |
| 212 int bUpperCase = bChar; | |
| 213 if (lowerCaseA <= aChar && aChar <= lowerCaseZ) aUpperCase -= 0x20; | |
| 214 if (lowerCaseA <= bChar && bChar <= lowerCaseZ) bUpperCase -= 0x20; | |
| 215 if (aUpperCase != bUpperCase) { | |
| 216 return _compareDigitsLexicographically(a, b, i, aUpperCase, bUpperCase); | |
| 217 } | |
| 218 if (defaultResult == 0) defaultResult = aChar - bChar; | |
| 219 } | |
| 220 if (b.length > a.length) return -1; | |
| 221 return defaultResult.sign; | |
| 222 } | |
| 223 | |
| 224 /// Check for numbers overlapping the current mismatched characters. | |
| 225 /// | |
| 226 /// If either [aChar] or [bChar] or both are digits we check if one is part of | |
| 227 /// a longer digits sequence than the other, | |
| 228 /// and let the longer sequence compare as greater. | |
| 229 /// | |
| 230 /// Otherwise just returns the difference between [aChar] and [bChar]. | |
| 231 int _compareDigitsLexicographically( | |
| 232 String a, String b, int index, int aChar, int bChar) { | |
| 233 assert(aChar != bChar); | |
| 234 var aIsDigit = _isDigit(aChar); | |
| 235 var bIsDigit = _isDigit(bChar); | |
| 236 if (aIsDigit) { | |
| 237 if (bIsDigit) { | |
| 238 int lengthCompare = _compareDigitCount(a, b, index); | |
| 239 if (lengthCompare != 0) return lengthCompare; | |
| 240 } else if (index > 0 && _isDigit(a.codeUnitAt(index - 1))) { | |
| 241 // aChar is the continuation of a longer number. | |
| 242 return 1; | |
| 243 } | |
| 244 } else if (bIsDigit && index > 0 && _isDigit(b.codeUnitAt(index - 1))) { | |
| 245 // bChar is the continuation of a longer number. | |
| 246 return -1; | |
| 247 } | |
| 248 // Characters are both non-digits. | |
| 249 return (aChar - bChar).sign; | |
| 250 } | |
| 251 | |
| 252 /// Checks which of [a] and [b] has the longest sequence of digits. | |
| 253 /// | |
| 254 /// Starts counting from `i + 1` (assumes that `a[i]` and `b[i]` are both | |
| 255 /// already known to be digits). | |
| 256 int _compareDigitCount(String a, String b, int i) { | |
| 257 while (++i < a.length) { | |
| 258 bool aIsDigit = _isDigit(a.codeUnitAt(i)); | |
| 259 if (i == b.length) return aIsDigit ? 1 : 0; | |
| 260 bool bIsDigit = _isDigit(b.codeUnitAt(i)); | |
| 261 if (aIsDigit) { | |
| 262 if (bIsDigit) continue; | |
| 263 return 1; | |
| 264 } else if (bIsDigit) { | |
| 265 return -1; | |
| 266 } else { | |
| 267 return 0; | |
| 268 } | |
| 269 } | |
| 270 if (i < b.length && _isDigit(b.codeUnitAt(i))) { | |
| 271 return -1; | |
| 272 } | |
| 273 return 0; | |
| 274 } | |
| 275 | |
| 276 bool _isDigit(int charCode) => (charCode ^ 0x30) <= 9; | |
| OLD | NEW |