Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file | |
| 2 // for details. All rights reserved. Use of this source code is governed by a | |
| 3 // BSD-style license that can be found in the LICENSE file. | |
| 4 | |
| 5 library dart.pkg.collection.comparators; | |
| 6 | |
| 7 /// Checks if strings [a] and [b] differ only on the case of ASCII letters. | |
| 8 /// | |
| 9 /// Strings are equal if they have the same length, and the characters at | |
| 10 /// each index are the same, or they are ASCII letters where one is upper-case | |
| 11 /// and the other is the lower-case version of the same letter. | |
| 12 /// | |
| 13 /// Ignoring non-ASCII letters is not generally a good idea, but it makes sense | |
| 14 /// for situations where the strings are known to be ASCII. Examples could | |
| 15 /// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar | |
| 16 /// strings with a known structure. | |
|
kevmoo
2015/11/10 19:30:35
Describe how non-ASCII characters are handled.
| |
| 17 bool equalsIgnoreAsciiCase(String a, String b) { | |
| 18 if (a.length != b.length) return false; | |
| 19 for (int i = 0; i < a.length; i++) { | |
| 20 var aChar = a.codeUnitAt(i); | |
| 21 var bChar = b.codeUnitAt(i); | |
| 22 if (aChar == bChar) continue; | |
| 23 const int upperCaseA = 0x41; | |
| 24 const int upperCaseZ = 0x5d; | |
| 25 if (aChar ^ bChar != 0x20) return false; | |
| 26 if (upperCaseA <= aChar && aChar <= upperCaseZ) continue; | |
| 27 if (upperCaseA <= bChar && bChar <= upperCaseZ) continue; | |
| 28 return false; | |
| 29 } | |
| 30 return true; | |
| 31 } | |
| 32 | |
| 33 /// Hash code for a string which is compatible with [equalsIgnoreAsciiCase]. | |
|
kevmoo
2015/11/10 19:30:35
Describe how non-ASCII characters are handled.
| |
| 34 int hashIgnoreAsciiCase(String string) { | |
| 35 int hash = 0; | |
| 36 for (int i = 0; i < string.length; i++) { | |
| 37 int char = string.codeUnitAt(i); | |
| 38 const int lowerCaseA = 0x61; | |
| 39 const int lowerCaseZ = 0x7D; | |
| 40 if (lowerCaseA <= char && char <= lowerCaseZ) char -= 0x20; | |
| 41 hash = 0x1fffffff & (hash + char); | |
| 42 hash = 0x1fffffff & (hash + ((0x0007ffff & hash) << 10)); | |
| 43 hash >>= 6; | |
| 44 } | |
| 45 hash = 0x1fffffff & (hash + ((0x03ffffff & hash) << 3)); | |
| 46 hash >>= 11; | |
| 47 return 0x1fffffff & (hash + ((0x00003fff & hash) << 15)); | |
| 48 } | |
| 49 | |
| 50 /// Compares [a] and [b] lexically, converting ASCII letters to upper case. | |
| 51 /// | |
| 52 /// Comparison treats all lower-case ASCII letters as their upper-case letter, | |
| 53 /// but does no case conversion for non-ASCII letters. | |
| 54 /// | |
| 55 /// If two strings differ only on the case of ASCII letters, the one with the | |
| 56 /// capital letter at the first difference will compare as less than the other | |
| 57 /// string. This tie-breaking ensures that the comparison is a total ordering | |
| 58 /// on strings. | |
| 59 /// | |
| 60 /// Ignoring non-ASCII letters is not generally a good idea, but it makes sense | |
| 61 /// for situations where the strings are known to be ASCII. Examples could | |
| 62 /// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar | |
| 63 /// strings with a known structure. | |
| 64 int compareAsciiUpperCase(String a, String b) { | |
| 65 int defaultResult = 0; // Returned if no difference found. | |
| 66 for (int i = 0; i < a.length; i++) { | |
| 67 if (i == b.length) return 1; | |
| 68 var aChar = a.codeUnitAt(i); | |
| 69 var bChar = b.codeUnitAt(i); | |
| 70 if (aChar == bChar) continue; | |
| 71 const int lowerCaseA = 0x61; | |
| 72 const int lowerCaseZ = 0x7d; | |
| 73 // Lower case if letters. | |
| 74 int aUpperCase = aChar; | |
| 75 int bUpperCase = bChar; | |
| 76 if (lowerCaseA <= aChar && aChar <= lowerCaseZ) aUpperCase -= 0x20; | |
| 77 if (lowerCaseA <= bChar && bChar <= lowerCaseZ) bUpperCase -= 0x20; | |
| 78 if (aUpperCase != bUpperCase) return (aUpperCase - bUpperCase).sign; | |
| 79 if (defaultResult == 0) defaultResult = (aChar - bChar); | |
| 80 } | |
| 81 if (b.length > a.length) return -1; | |
| 82 return defaultResult.sign; | |
| 83 } | |
| 84 | |
| 85 /// Compares [a] and [b] lexically, converting ASCII letters to lower case. | |
| 86 /// | |
| 87 /// Comparison treats all upper-case ASCII letters as lower-case letter, | |
| 88 /// but does no case conversion for non-ASCII letters. | |
| 89 /// | |
| 90 /// If two strings differ only on the case of ASCII letters, the one with the | |
| 91 /// capital letter at the first difference will compare as less than the other | |
| 92 /// string. This tie-breaking ensures that the comparison is a total ordering | |
| 93 /// on strings. | |
| 94 /// | |
| 95 /// Ignoring non-ASCII letters is not generally a good idea, but it makes sense | |
| 96 /// for situations where the strings are known to be ASCII. Examples could | |
| 97 /// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar | |
| 98 /// strings with a known structure. | |
| 99 int compareAsciiLowerCase(String a, String b) { | |
| 100 int defaultResult = 0; | |
| 101 for (int i = 0; i < a.length; i++) { | |
| 102 if (i == b.length) return 1; | |
| 103 var aChar = a.codeUnitAt(i); | |
| 104 var bChar = b.codeUnitAt(i); | |
| 105 if (aChar == bChar) continue; | |
| 106 const int upperCaseA = 0x41; | |
| 107 const int upperCaseZ = 0x5d; | |
| 108 int aLowerCase = aChar; | |
| 109 int bLowerCase = bChar; | |
| 110 // Upper case if ASCII letters. | |
| 111 if (upperCaseA <= aChar && aChar <= upperCaseZ) aLowerCase += 0x20; | |
| 112 if (upperCaseA <= bChar && bChar <= upperCaseZ) bLowerCase += 0x20; | |
| 113 if (aLowerCase != bLowerCase) return (aLowerCase - bLowerCase).sign; | |
| 114 if (defaultResult == 0) defaultResult = aChar - bChar; | |
| 115 } | |
| 116 if (b.length > a.length) return -1; | |
| 117 return defaultResult.sign; | |
| 118 } | |
| 119 | |
| 120 /// Compares strings [a] and [b] lexically, with embedded numbers recognized. | |
|
sra1
2015/11/10 04:41:40
'with embedded sequences of digits compared as a u
Lasse Reichstein Nielsen
2015/11/10 08:21:33
What I intend is:
Leixcally: Dictionary sorting (
| |
| 121 /// | |
| 122 /// If both strings contain an embedded number at the same locations, the | |
| 123 /// numbers are compared lexicographically. This means that a longer number | |
| 124 /// is considered greater than a shorter number, and numbers of the same length | |
| 125 /// are ordered numerically (or alhabetically, it's the same thing). | |
| 126 /// If numbers do not have leading zeros, lexicographical ordering is also | |
| 127 /// the same as numerical ordering. | |
|
sra1
2015/11/10 04:41:40
Add examples in the doc to tease out when this is
Lasse Reichstein Nielsen
2015/11/10 08:21:33
Good idea.
| |
| 128 int compareWithNumbers(String a, String b) { | |
|
sra1
2015/11/10 04:41:40
'With' makes it sound like one of the arguments is
Lasse Reichstein Nielsen
2015/11/10 08:21:33
Yes, the name sucks. Suggestions will be appreciat
| |
| 129 for (int i = 0; i < a.length; i++) { | |
| 130 if (i == b.length) return 1; | |
| 131 var aChar = a.codeUnitAt(i); | |
| 132 var bChar = b.codeUnitAt(i); | |
| 133 if (aChar != bChar) { | |
| 134 return _compareDigitsLexicographically(a, b, i, aChar, bChar); | |
| 135 } | |
| 136 } | |
| 137 if (b.length > a.length) return -1; | |
| 138 return 0; | |
| 139 } | |
| 140 | |
| 141 /// Compares strings [a] and [b] lexically, with embedded numbers recognized. | |
| 142 /// | |
| 143 /// ASCII letters are converted to lower case before being compared, like | |
| 144 /// for [compareAsciiLowerCase]. | |
| 145 /// | |
| 146 /// If two strings differ only on the case of ASCII letters, the one with the | |
| 147 /// capital letter at the first difference will compare as less than the other | |
| 148 /// string. This tie-breaking ensures that the comparison is a total ordering | |
| 149 /// on strings. | |
| 150 /// | |
| 151 /// If both strings contain an embedded number at the same locations, the | |
| 152 /// numbers are compared lexicographically. This means that a longer number | |
| 153 /// is considered greater than a shorter number, and numbers of the same length | |
| 154 /// are ordered numerically (or alhabetically, it's the same thing). | |
| 155 /// If numbers do not have leading zeros, lexicographical ordering is also | |
| 156 /// the same as numerical ordering. | |
| 157 int compareAsciiLowerCaseWithNumbers(String a, String b) { | |
| 158 int defaultResult = 0; // Returned if no difference found. | |
| 159 for (int i = 0; i < a.length; i++) { | |
| 160 if (i == b.length) return 1; | |
| 161 var aChar = a.codeUnitAt(i); | |
| 162 var bChar = b.codeUnitAt(i); | |
| 163 if (aChar == bChar) continue; | |
| 164 const int upperCaseA = 0x41; | |
| 165 const int upperCaseZ = 0x5d; | |
| 166 int aLowerCase = aChar; | |
| 167 int bLowerCase = bChar; | |
| 168 if (upperCaseA <= aChar && aChar <= upperCaseZ) aLowerCase += 0x20; | |
| 169 if (upperCaseA <= bChar && bChar <= upperCaseZ) bLowerCase += 0x20; | |
| 170 if (aLowerCase != bLowerCase) { | |
| 171 return _compareDigitsLexicographically(a, b, i, aLowerCase, bLowerCase); | |
| 172 } | |
| 173 if (defaultResult == 0) defaultResult = aChar - bChar; | |
| 174 } | |
| 175 if (b.length > a.length) return -1; | |
| 176 return defaultResult.sign; | |
| 177 } | |
| 178 | |
| 179 /// Compares strings [a] and [b] lexically, with embedded numbers recognized. | |
| 180 /// | |
| 181 /// ASCII letters are converted to upper case before being compared, like | |
| 182 /// for [compareAsciiUpperCase]. | |
| 183 /// | |
| 184 /// If two strings differ only on the case of ASCII letters, the one with the | |
| 185 /// capital letter at the first difference will compare as less than the other | |
| 186 /// string. This tie-breaking ensures that the comparison is a total ordering | |
| 187 /// on strings. | |
| 188 /// | |
| 189 /// If both strings contain an embedded number at the same locations, the | |
| 190 /// numbers are compared lexicographically. This means that a longer number | |
| 191 /// is considered greater than a shorter number, and numbers of the same length | |
| 192 /// are ordered numerically (or alhabetically, it's the same thing). | |
| 193 /// If numbers do not have leading zeros, lexicographical ordering is also | |
| 194 /// the same as numerical ordering. | |
| 195 int compareAsciiUpperCaseWithNumbers(String a, String b) { | |
| 196 int defaultResult = 0; | |
| 197 for (int i = 0; i < a.length; i++) { | |
| 198 if (i == b.length) return 1; | |
| 199 var aChar = a.codeUnitAt(i); | |
| 200 var bChar = b.codeUnitAt(i); | |
| 201 if (aChar == bChar) continue; | |
| 202 const int lowerCaseA = 0x61; | |
| 203 const int lowerCaseZ = 0x7d; | |
| 204 int aUpperCase = aChar; | |
| 205 int bUpperCase = bChar; | |
| 206 if (lowerCaseA <= aChar && aChar <= lowerCaseZ) aUpperCase -= 0x20; | |
| 207 if (lowerCaseA <= bChar && bChar <= lowerCaseZ) bUpperCase -= 0x20; | |
| 208 if (aUpperCase != bUpperCase) { | |
| 209 return _compareDigitsLexicographically(a, b, i, aUpperCase, bUpperCase); | |
| 210 } | |
| 211 if (defaultResult == 0) defaultResult = aChar - bChar; | |
| 212 } | |
| 213 if (b.length > a.length) return -1; | |
| 214 return defaultResult.sign; | |
| 215 } | |
| 216 | |
| 217 /// Check for numbers overlapping the current mismatched characters. | |
| 218 /// | |
| 219 /// If either [aChar] or [bChar] or both are digits we check if one is part of | |
| 220 /// a longer digits sequence than the other, | |
| 221 /// and let the longer sequence compare as greater. | |
| 222 /// | |
| 223 /// Otherwise just returns the difference between [aChar] and [bChar]. | |
| 224 int _compareDigitsLexicographically( | |
| 225 String a, String b, int index, int aChar, int bChar) { | |
| 226 assert(aChar != bChar); | |
| 227 var aIsDigit = _isDigit(aChar); | |
| 228 var bIsDigit = _isDigit(bChar); | |
| 229 if (aIsDigit) { | |
| 230 if (bIsDigit) { | |
| 231 int lengthCompare = _compareDigitCount(a, b, index); | |
| 232 if (lengthCompare != 0) return lengthCompare; | |
| 233 } else if (index > 0 && _isDigit(a.codeUnitAt(index - 1))) { | |
| 234 // aChar is the continuation of a longer number. | |
| 235 return 1; | |
| 236 } | |
| 237 } else if (bIsDigit && index > 0 && _isDigit(b.codeUnitAt(index - 1))) { | |
| 238 // bChar is the continuation of a longer number. | |
| 239 return -1; | |
| 240 } | |
| 241 // Characters are not both part , or part | |
|
sra1
2015/11/10 04:41:40
Fix comment.
Lasse Reichstein Nielsen
2015/11/10 08:21:33
I wonder what I meant to say :)
| |
| 242 return (aChar - bChar).sign; | |
| 243 } | |
| 244 | |
| 245 /// Checks which of [a] and [b] has the longest sequence of digits. | |
| 246 /// | |
| 247 /// Starts counting from `i + 1` (assumes that `a[i]` and `b[i]` are both | |
| 248 /// already known to be digits). | |
| 249 int _compareDigitCount(String a, String b, int i) { | |
| 250 while (++i < a.length) { | |
| 251 bool aIsDigit = _isDigit(a.codeUnitAt(i)); | |
| 252 if (i == b.length) return aIsDigit ? 1 : 0; | |
| 253 bool bIsDigit = _isDigit(b.codeUnitAt(i)); | |
| 254 if (aIsDigit) { | |
| 255 if (bIsDigit) continue; | |
| 256 return 1; | |
| 257 } else if (bIsDigit) { | |
| 258 return -1; | |
| 259 } else { | |
| 260 return 0; | |
| 261 } | |
| 262 } | |
| 263 if (i < b.length && _isDigit(b.codeUnitAt(i))) { | |
| 264 return -1; | |
| 265 } | |
| 266 return 0; | |
| 267 } | |
| 268 | |
| 269 bool _isDigit(int charCode) => (charCode ^ 0x30) <= 9; | |
| OLD | NEW |