OLD | NEW |
1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 library dart.pkg.collection.comparators; | |
6 | |
7 // Character constants. | 5 // Character constants. |
8 const int _zero = 0x30; | 6 const int _zero = 0x30; |
9 const int _upperCaseA = 0x41; | 7 const int _upperCaseA = 0x41; |
10 const int _upperCaseZ = 0x5a; | 8 const int _upperCaseZ = 0x5a; |
11 const int _lowerCaseA = 0x61; | 9 const int _lowerCaseA = 0x61; |
12 const int _lowerCaseZ = 0x7a; | 10 const int _lowerCaseZ = 0x7a; |
13 const int _asciiCaseBit = 0x20; | 11 const int _asciiCaseBit = 0x20; |
14 | 12 |
15 /// Checks if strings [a] and [b] differ only on the case of ASCII letters. | 13 /// Checks if strings [a] and [b] differ only on the case of ASCII letters. |
16 /// | 14 /// |
17 /// Strings are equal if they have the same length, and the characters at | 15 /// Strings are equal if they have the same length, and the characters at |
18 /// each index are the same, or they are ASCII letters where one is upper-case | 16 /// each index are the same, or they are ASCII letters where one is upper-case |
19 /// and the other is the lower-case version of the same letter. | 17 /// and the other is the lower-case version of the same letter. |
20 /// | 18 /// |
21 /// The comparison does not ignore the case of non-ASCII letters, so | 19 /// The comparison does not ignore the case of non-ASCII letters, so |
22 /// an upper-case ae-ligature (Æ) is different from | 20 /// an upper-case ae-ligature (Æ) is different from |
23 /// a lower case ae-ligature (æ). | 21 /// a lower case ae-ligature (æ). |
24 /// | 22 /// |
25 /// Ignoring non-ASCII letters is not generally a good idea, but it makes sense | 23 /// Ignoring non-ASCII letters is not generally a good idea, but it makes sense |
26 /// for situations where the strings are known to be ASCII. Examples could | 24 /// for situations where the strings are known to be ASCII. Examples could |
27 /// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar | 25 /// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar |
28 /// strings with a known structure. | 26 /// strings with a known structure. |
29 bool equalsIgnoreAsciiCase(String a, String b) { | 27 bool equalsIgnoreAsciiCase(String a, String b) { |
30 if (a.length != b.length) return false; | 28 if (a.length != b.length) return false; |
31 for (int i = 0; i < a.length; i++) { | 29 for (int i = 0; i < a.length; i++) { |
32 var aChar = a.codeUnitAt(i); | 30 var aChar = a.codeUnitAt(i); |
33 var bChar = b.codeUnitAt(i); | 31 var bChar = b.codeUnitAt(i); |
34 if (aChar == bChar) continue; | 32 if (aChar == bChar) continue; |
35 // Quick-check for whether this may be different cases of the same letter. | 33 // Quick-check for whether this may be different cases of the same letter. |
36 if (aChar ^ bChar != _asciiCaseBit) return false; | 34 if (aChar ^ bChar != _asciiCaseBit) return false; |
37 // If it's possible, then check if either character is actually an ASCII | 35 // If it's possible, then check if either character is actually an ASCII |
38 // letter. | 36 // letter. |
39 int aCharUpperCase = aChar | _asciiCaseBit; | 37 int aCharLowerCase = aChar | _asciiCaseBit; |
40 if (_upperCaseA <= aCharUpperCase && aCharUpperCase <= _upperCaseZ) { | 38 if (_lowerCaseA <= aCharLowerCase && aCharLowerCase <= _lowerCaseZ) { |
41 continue; | 39 continue; |
42 } | 40 } |
43 return false; | 41 return false; |
44 } | 42 } |
45 return true; | 43 return true; |
46 } | 44 } |
47 | 45 |
48 | |
49 /// Hash code for a string which is compatible with [equalsIgnoreAsciiCase]. | 46 /// Hash code for a string which is compatible with [equalsIgnoreAsciiCase]. |
50 /// | 47 /// |
51 /// The hash code is unaffected by changing the case of ASCII letters, but | 48 /// The hash code is unaffected by changing the case of ASCII letters, but |
52 /// the case of non-ASCII letters do affect the result. | 49 /// the case of non-ASCII letters do affect the result. |
53 int hashIgnoreAsciiCase(String string) { | 50 int hashIgnoreAsciiCase(String string) { |
54 // Jenkins hash code ( http://en.wikipedia.org/wiki/Jenkins_hash_function). | 51 // Jenkins hash code ( http://en.wikipedia.org/wiki/Jenkins_hash_function). |
55 // adapted to smi values. | 52 // adapted to smi values. |
56 // Same hash used by dart2js for strings, modified to ignore ASCII letter | 53 // Same hash used by dart2js for strings, modified to ignore ASCII letter |
57 // case. | 54 // case. |
58 int hash = 0; | 55 int hash = 0; |
59 for (int i = 0; i < string.length; i++) { | 56 for (int i = 0; i < string.length; i++) { |
60 int char = string.codeUnitAt(i); | 57 int char = string.codeUnitAt(i); |
61 // Convert lower-case ASCII letters to upper case.upper | 58 // Convert lower-case ASCII letters to upper case.upper |
62 // This ensures that strings that differ only in case will have the | 59 // This ensures that strings that differ only in case will have the |
63 // same hash code. | 60 // same hash code. |
64 if (_lowerCaseA <= char && char <= _lowerCaseZ) char -= _asciiCaseBit; | 61 if (_lowerCaseA <= char && char <= _lowerCaseZ) char -= _asciiCaseBit; |
65 hash = 0x1fffffff & (hash + char); | 62 hash = 0x1fffffff & (hash + char); |
66 hash = 0x1fffffff & (hash + ((0x0007ffff & hash) << 10)); | 63 hash = 0x1fffffff & (hash + ((0x0007ffff & hash) << 10)); |
67 hash >>= 6; | 64 hash >>= 6; |
68 } | 65 } |
69 hash = 0x1fffffff & (hash + ((0x03ffffff & hash) << 3)); | 66 hash = 0x1fffffff & (hash + ((0x03ffffff & hash) << 3)); |
70 hash >>= 11; | 67 hash >>= 11; |
71 return 0x1fffffff & (hash + ((0x00003fff & hash) << 15)); | 68 return 0x1fffffff & (hash + ((0x00003fff & hash) << 15)); |
72 } | 69 } |
73 | 70 |
74 | |
75 /// Compares [a] and [b] lexically, converting ASCII letters to upper case. | 71 /// Compares [a] and [b] lexically, converting ASCII letters to upper case. |
76 /// | 72 /// |
77 /// Comparison treats all lower-case ASCII letters as upper-case letters, | 73 /// Comparison treats all lower-case ASCII letters as upper-case letters, |
78 /// but does no case conversion for non-ASCII letters. | 74 /// but does no case conversion for non-ASCII letters. |
79 /// | 75 /// |
80 /// If two strings differ only on the case of ASCII letters, the one with the | 76 /// If two strings differ only on the case of ASCII letters, the one with the |
81 /// capital letter at the first difference will compare as less than the other | 77 /// capital letter at the first difference will compare as less than the other |
82 /// string. This tie-breaking ensures that the comparison is a total ordering | 78 /// string. This tie-breaking ensures that the comparison is a total ordering |
83 /// on strings and is compatible with equality. | 79 /// on strings and is compatible with equality. |
84 /// | 80 /// |
(...skipping 17 matching lines...) Expand all Loading... |
102 if (_lowerCaseA <= bChar && bChar <= _lowerCaseZ) { | 98 if (_lowerCaseA <= bChar && bChar <= _lowerCaseZ) { |
103 bUpperCase -= _asciiCaseBit; | 99 bUpperCase -= _asciiCaseBit; |
104 } | 100 } |
105 if (aUpperCase != bUpperCase) return (aUpperCase - bUpperCase).sign; | 101 if (aUpperCase != bUpperCase) return (aUpperCase - bUpperCase).sign; |
106 if (defaultResult == 0) defaultResult = (aChar - bChar); | 102 if (defaultResult == 0) defaultResult = (aChar - bChar); |
107 } | 103 } |
108 if (b.length > a.length) return -1; | 104 if (b.length > a.length) return -1; |
109 return defaultResult.sign; | 105 return defaultResult.sign; |
110 } | 106 } |
111 | 107 |
112 | |
113 /// Compares [a] and [b] lexically, converting ASCII letters to lower case. | 108 /// Compares [a] and [b] lexically, converting ASCII letters to lower case. |
114 /// | 109 /// |
115 /// Comparison treats all upper-case ASCII letters as lower-case letters, | 110 /// Comparison treats all upper-case ASCII letters as lower-case letters, |
116 /// but does no case conversion for non-ASCII letters. | 111 /// but does no case conversion for non-ASCII letters. |
117 /// | 112 /// |
118 /// If two strings differ only on the case of ASCII letters, the one with the | 113 /// If two strings differ only on the case of ASCII letters, the one with the |
119 /// capital letter at the first difference will compare as less than the other | 114 /// capital letter at the first difference will compare as less than the other |
120 /// string. This tie-breaking ensures that the comparison is a total ordering | 115 /// string. This tie-breaking ensures that the comparison is a total ordering |
121 /// on strings. | 116 /// on strings. |
122 /// | 117 /// |
(...skipping 17 matching lines...) Expand all Loading... |
140 if (_upperCaseA <= aChar && aChar <= _upperCaseZ) { | 135 if (_upperCaseA <= aChar && aChar <= _upperCaseZ) { |
141 aLowerCase += _asciiCaseBit; | 136 aLowerCase += _asciiCaseBit; |
142 } | 137 } |
143 if (aLowerCase != bLowerCase) return (aLowerCase - bLowerCase).sign; | 138 if (aLowerCase != bLowerCase) return (aLowerCase - bLowerCase).sign; |
144 if (defaultResult == 0) defaultResult = aChar - bChar; | 139 if (defaultResult == 0) defaultResult = aChar - bChar; |
145 } | 140 } |
146 if (b.length > a.length) return -1; | 141 if (b.length > a.length) return -1; |
147 return defaultResult.sign; | 142 return defaultResult.sign; |
148 } | 143 } |
149 | 144 |
150 /// Compares strings [a] and [b] according to [natural sort ordering]. | 145 /// Compares strings [a] and [b] according to [natural sort ordering][]. |
151 /// | 146 /// |
152 /// A natural sort ordering is a lexical ordering where embedded | 147 /// A natural sort ordering is a lexical ordering where embedded |
153 /// numerals (digit sequences) are treated as a single unit and ordered by | 148 /// numerals (digit sequences) are treated as a single unit and ordered by |
154 /// numerical value. | 149 /// numerical value. |
155 /// This means that `"a10b"` will be ordered after `"a7b"` in natural | 150 /// This means that `"a10b"` will be ordered after `"a7b"` in natural |
156 /// ordering, where lexical ordering would put the `1` before the `7`, ignoring | 151 /// ordering, where lexical ordering would put the `1` before the `7`, ignoring |
157 /// that the `1` is part of a larger number. | 152 /// that the `1` is part of a larger number. |
158 /// | 153 /// |
159 /// Example: | 154 /// Example: |
160 /// The following strings are in the order they would be sorted by using this | 155 /// The following strings are in the order they would be sorted by using this |
161 /// comparison function: | 156 /// comparison function: |
162 /// | 157 /// |
163 /// "a", "a0", "a0b", "a1", "a01", "a9", "a10", "a100", "a100b", "aa" | 158 /// "a", "a0", "a0b", "a1", "a01", "a9", "a10", "a100", "a100b", "aa" |
164 /// | 159 /// |
165 /// [natural sort ordering]: https://en.wikipedia.org/wiki/Natural_sort_order | 160 /// [natural sort ordering]: https://en.wikipedia.org/wiki/Natural_sort_order |
166 int compareNatural(String a, String b) { | 161 int compareNatural(String a, String b) { |
167 for (int i = 0; i < a.length; i++) { | 162 for (int i = 0; i < a.length; i++) { |
168 if (i >= b.length) return 1; | 163 if (i >= b.length) return 1; |
169 var aChar = a.codeUnitAt(i); | 164 var aChar = a.codeUnitAt(i); |
170 var bChar = b.codeUnitAt(i); | 165 var bChar = b.codeUnitAt(i); |
171 if (aChar != bChar) { | 166 if (aChar != bChar) { |
172 return _compareNaturally(a, b, i, aChar, bChar); | 167 return _compareNaturally(a, b, i, aChar, bChar); |
173 } | 168 } |
174 } | 169 } |
175 if (b.length > a.length) return -1; | 170 if (b.length > a.length) return -1; |
176 return 0; | 171 return 0; |
177 } | 172 } |
178 | 173 |
179 /// Compares strings [a] and [b] according to lower-case | 174 /// Compares strings [a] and [b] according to lower-case |
180 /// [natural sort ordering]. | 175 /// [natural sort ordering][]. |
181 /// | 176 /// |
182 /// ASCII letters are converted to lower case before being compared, like | 177 /// ASCII letters are converted to lower case before being compared, like |
183 /// for [compareAsciiLowerCase], then the result is compared like for | 178 /// for [compareAsciiLowerCase], then the result is compared like for |
184 /// [compareNatural]. | 179 /// [compareNatural]. |
185 /// | 180 /// |
186 /// If two strings differ only on the case of ASCII letters, the one with the | 181 /// If two strings differ only on the case of ASCII letters, the one with the |
187 /// capital letter at the first difference will compare as less than the other | 182 /// capital letter at the first difference will compare as less than the other |
188 /// string. This tie-breaking ensures that the comparison is a total ordering | 183 /// string. This tie-breaking ensures that the comparison is a total ordering |
189 /// on strings. | 184 /// on strings. |
190 /// | 185 /// |
(...skipping 16 matching lines...) Expand all Loading... |
207 if (aLowerCase != bLowerCase) { | 202 if (aLowerCase != bLowerCase) { |
208 return _compareNaturally(a, b, i, aLowerCase, bLowerCase); | 203 return _compareNaturally(a, b, i, aLowerCase, bLowerCase); |
209 } | 204 } |
210 if (defaultResult == 0) defaultResult = aChar - bChar; | 205 if (defaultResult == 0) defaultResult = aChar - bChar; |
211 } | 206 } |
212 if (b.length > a.length) return -1; | 207 if (b.length > a.length) return -1; |
213 return defaultResult.sign; | 208 return defaultResult.sign; |
214 } | 209 } |
215 | 210 |
216 /// Compares strings [a] and [b] according to upper-case | 211 /// Compares strings [a] and [b] according to upper-case |
217 /// [natural sort ordering]. | 212 /// [natural sort ordering][]. |
218 /// | 213 /// |
219 /// ASCII letters are converted to upper case before being compared, like | 214 /// ASCII letters are converted to upper case before being compared, like |
220 /// for [compareAsciiUpperCase], then the result is compared like for | 215 /// for [compareAsciiUpperCase], then the result is compared like for |
221 /// [compareNatural]. | 216 /// [compareNatural]. |
222 /// | 217 /// |
223 /// If two strings differ only on the case of ASCII letters, the one with the | 218 /// If two strings differ only on the case of ASCII letters, the one with the |
224 /// capital letter at the first difference will compare as less than the other | 219 /// capital letter at the first difference will compare as less than the other |
225 /// string. This tie-breaking ensures that the comparison is a total ordering | 220 /// string. This tie-breaking ensures that the comparison is a total ordering |
226 /// on strings | 221 /// on strings |
227 /// | 222 /// |
(...skipping 25 matching lines...) Expand all Loading... |
253 /// Check for numbers overlapping the current mismatched characters. | 248 /// Check for numbers overlapping the current mismatched characters. |
254 /// | 249 /// |
255 /// If both [aChar] and [bChar] are digits, use numerical comparison. | 250 /// If both [aChar] and [bChar] are digits, use numerical comparison. |
256 /// Check if the previous characters is a non-zero number, and if not, | 251 /// Check if the previous characters is a non-zero number, and if not, |
257 /// skip - but count - leading zeros before comparing numbers. | 252 /// skip - but count - leading zeros before comparing numbers. |
258 /// | 253 /// |
259 /// If one is a digit and the other isn't, check if the previous character | 254 /// If one is a digit and the other isn't, check if the previous character |
260 /// is a digit, and if so, the the one with the digit is the greater number. | 255 /// is a digit, and if so, the the one with the digit is the greater number. |
261 /// | 256 /// |
262 /// Otherwise just returns the difference between [aChar] and [bChar]. | 257 /// Otherwise just returns the difference between [aChar] and [bChar]. |
263 int _compareNaturally( | 258 int _compareNaturally(String a, String b, int index, int aChar, int bChar) { |
264 String a, String b, int index, int aChar, int bChar) { | |
265 assert(aChar != bChar); | 259 assert(aChar != bChar); |
266 var aIsDigit = _isDigit(aChar); | 260 var aIsDigit = _isDigit(aChar); |
267 var bIsDigit = _isDigit(bChar); | 261 var bIsDigit = _isDigit(bChar); |
268 if (aIsDigit) { | 262 if (aIsDigit) { |
269 if (bIsDigit) { | 263 if (bIsDigit) { |
270 return _compareNumerically(a, b, aChar, bChar, index); | 264 return _compareNumerically(a, b, aChar, bChar, index); |
271 } else if (index > 0 && _isDigit(a.codeUnitAt(index - 1))) { | 265 } else if (index > 0 && _isDigit(a.codeUnitAt(index - 1))) { |
272 // aChar is the continuation of a longer number. | 266 // aChar is the continuation of a longer number. |
273 return 1; | 267 return 1; |
274 } | 268 } |
(...skipping 22 matching lines...) Expand all Loading... |
297 // digit. | 291 // digit. |
298 return (aChar - bChar).sign; | 292 return (aChar - bChar).sign; |
299 } | 293 } |
300 // Not part of larger (non-zero) number, so skip leading zeros before | 294 // Not part of larger (non-zero) number, so skip leading zeros before |
301 // comparing numbers. | 295 // comparing numbers. |
302 int aIndex = index; | 296 int aIndex = index; |
303 int bIndex = index; | 297 int bIndex = index; |
304 if (aChar == _zero) { | 298 if (aChar == _zero) { |
305 do { | 299 do { |
306 aIndex++; | 300 aIndex++; |
307 if (aIndex == a.length) return -1; // number in a is zero, b is not. | 301 if (aIndex == a.length) return -1; // number in a is zero, b is not. |
308 aChar = a.codeUnitAt(aIndex); | 302 aChar = a.codeUnitAt(aIndex); |
309 } while (aChar == _zero); | 303 } while (aChar == _zero); |
310 if (!_isDigit(aChar)) return -1; | 304 if (!_isDigit(aChar)) return -1; |
311 } else if (bChar == _zero) { | 305 } else if (bChar == _zero) { |
312 do { | 306 do { |
313 bIndex++; | 307 bIndex++; |
314 if (bIndex == b.length) return 1; // number in b is zero, a is not. | 308 if (bIndex == b.length) return 1; // number in b is zero, a is not. |
315 bChar = b.codeUnitAt(bIndex); | 309 bChar = b.codeUnitAt(bIndex); |
316 } while (bChar == _zero); | 310 } while (bChar == _zero); |
317 if (!_isDigit(bChar)) return 1; | 311 if (!_isDigit(bChar)) return 1; |
318 } | 312 } |
319 if (aChar != bChar) { | 313 if (aChar != bChar) { |
320 int result = _compareDigitCount(a, b, aIndex, bIndex); | 314 int result = _compareDigitCount(a, b, aIndex, bIndex); |
321 if (result != 0) return result; | 315 if (result != 0) return result; |
322 return (aChar - bChar).sign; | 316 return (aChar - bChar).sign; |
323 } | 317 } |
324 // Same leading digit, one had more leading zeros. | 318 // Same leading digit, one had more leading zeros. |
(...skipping 13 matching lines...) Expand all Loading... |
338 } | 332 } |
339 if (aIsDigit) { | 333 if (aIsDigit) { |
340 if (bIsDigit) { | 334 if (bIsDigit) { |
341 if (aChar == bChar) continue; | 335 if (aChar == bChar) continue; |
342 // First different digit found. | 336 // First different digit found. |
343 break; | 337 break; |
344 } | 338 } |
345 // bChar is non-digit, so a has longer number. | 339 // bChar is non-digit, so a has longer number. |
346 return 1; | 340 return 1; |
347 } else if (bIsDigit) { | 341 } else if (bIsDigit) { |
348 return -1; // b has longer number. | 342 return -1; // b has longer number. |
349 } else { | 343 } else { |
350 // Neither is digit, so numbers had same numerical value. | 344 // Neither is digit, so numbers had same numerical value. |
351 // Fall back on number of leading zeros | 345 // Fall back on number of leading zeros |
352 // (reflected by difference in indices). | 346 // (reflected by difference in indices). |
353 return (aIndex - bIndex).sign; | 347 return (aIndex - bIndex).sign; |
354 } | 348 } |
355 } | 349 } |
356 // At first differing digits. | 350 // At first differing digits. |
357 int result = _compareDigitCount(a, b, aIndex, bIndex); | 351 int result = _compareDigitCount(a, b, aIndex, bIndex); |
358 if (result != 0) return result; | 352 if (result != 0) return result; |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
390 /// If there is no non-zero digits before, then leading zeros at [index] | 384 /// If there is no non-zero digits before, then leading zeros at [index] |
391 /// are also ignored when comparing numerically. If there is a non-zero digit | 385 /// are also ignored when comparing numerically. If there is a non-zero digit |
392 /// before, then zeros at [index] are significant. | 386 /// before, then zeros at [index] are significant. |
393 bool _isNonZeroNumberSuffix(String string, int index) { | 387 bool _isNonZeroNumberSuffix(String string, int index) { |
394 while (--index >= 0) { | 388 while (--index >= 0) { |
395 int char = string.codeUnitAt(index); | 389 int char = string.codeUnitAt(index); |
396 if (char != _zero) return _isDigit(char); | 390 if (char != _zero) return _isDigit(char); |
397 } | 391 } |
398 return false; | 392 return false; |
399 } | 393 } |
OLD | NEW |