Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(424)

Side by Side Diff: lib/src/comparators.dart

Issue 1412293008: Add string comparators that can compare ASCII strings ignoring case. (Closed) Base URL: https://github.com/dart-lang/collection.git@master
Patch Set: Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « lib/collection.dart ('k') | test/comparators_test.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 library dart.pkg.collection.comparators;
6
7 /// Checks if strings [a] and [b] differ only on the case of ASCII letters.
8 ///
9 /// Strings are equal if they have the same length, and the characters at
10 /// each index are the same, or they are ASCII letters where one is upper-case
11 /// and the other is the lower-case version of the same letter.
12 ///
13 /// Ignoring non-ASCII letters is not generally a good idea, but it makes sense
14 /// for situations where the strings are known to be ASCII. Examples could
15 /// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar
16 /// strings with a known structure.
kevmoo 2015/11/10 19:30:35 Describe how non-ASCII characters are handled.
17 bool equalsIgnoreAsciiCase(String a, String b) {
18 if (a.length != b.length) return false;
19 for (int i = 0; i < a.length; i++) {
20 var aChar = a.codeUnitAt(i);
21 var bChar = b.codeUnitAt(i);
22 if (aChar == bChar) continue;
23 const int upperCaseA = 0x41;
24 const int upperCaseZ = 0x5d;
25 if (aChar ^ bChar != 0x20) return false;
26 if (upperCaseA <= aChar && aChar <= upperCaseZ) continue;
27 if (upperCaseA <= bChar && bChar <= upperCaseZ) continue;
28 return false;
29 }
30 return true;
31 }
32
33 /// Hash code for a string which is compatible with [equalsIgnoreAsciiCase].
kevmoo 2015/11/10 19:30:35 Describe how non-ASCII characters are handled.
34 int hashIgnoreAsciiCase(String string) {
35 int hash = 0;
36 for (int i = 0; i < string.length; i++) {
37 int char = string.codeUnitAt(i);
38 const int lowerCaseA = 0x61;
39 const int lowerCaseZ = 0x7D;
40 if (lowerCaseA <= char && char <= lowerCaseZ) char -= 0x20;
41 hash = 0x1fffffff & (hash + char);
42 hash = 0x1fffffff & (hash + ((0x0007ffff & hash) << 10));
43 hash >>= 6;
44 }
45 hash = 0x1fffffff & (hash + ((0x03ffffff & hash) << 3));
46 hash >>= 11;
47 return 0x1fffffff & (hash + ((0x00003fff & hash) << 15));
48 }
49
50 /// Compares [a] and [b] lexically, converting ASCII letters to upper case.
51 ///
52 /// Comparison treats all lower-case ASCII letters as their upper-case letter,
53 /// but does no case conversion for non-ASCII letters.
54 ///
55 /// If two strings differ only on the case of ASCII letters, the one with the
56 /// capital letter at the first difference will compare as less than the other
57 /// string. This tie-breaking ensures that the comparison is a total ordering
58 /// on strings.
59 ///
60 /// Ignoring non-ASCII letters is not generally a good idea, but it makes sense
61 /// for situations where the strings are known to be ASCII. Examples could
62 /// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar
63 /// strings with a known structure.
64 int compareAsciiUpperCase(String a, String b) {
65 int defaultResult = 0; // Returned if no difference found.
66 for (int i = 0; i < a.length; i++) {
67 if (i == b.length) return 1;
68 var aChar = a.codeUnitAt(i);
69 var bChar = b.codeUnitAt(i);
70 if (aChar == bChar) continue;
71 const int lowerCaseA = 0x61;
72 const int lowerCaseZ = 0x7d;
73 // Lower case if letters.
74 int aUpperCase = aChar;
75 int bUpperCase = bChar;
76 if (lowerCaseA <= aChar && aChar <= lowerCaseZ) aUpperCase -= 0x20;
77 if (lowerCaseA <= bChar && bChar <= lowerCaseZ) bUpperCase -= 0x20;
78 if (aUpperCase != bUpperCase) return (aUpperCase - bUpperCase).sign;
79 if (defaultResult == 0) defaultResult = (aChar - bChar);
80 }
81 if (b.length > a.length) return -1;
82 return defaultResult.sign;
83 }
84
85 /// Compares [a] and [b] lexically, converting ASCII letters to lower case.
86 ///
87 /// Comparison treats all upper-case ASCII letters as lower-case letter,
88 /// but does no case conversion for non-ASCII letters.
89 ///
90 /// If two strings differ only on the case of ASCII letters, the one with the
91 /// capital letter at the first difference will compare as less than the other
92 /// string. This tie-breaking ensures that the comparison is a total ordering
93 /// on strings.
94 ///
95 /// Ignoring non-ASCII letters is not generally a good idea, but it makes sense
96 /// for situations where the strings are known to be ASCII. Examples could
97 /// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar
98 /// strings with a known structure.
99 int compareAsciiLowerCase(String a, String b) {
100 int defaultResult = 0;
101 for (int i = 0; i < a.length; i++) {
102 if (i == b.length) return 1;
103 var aChar = a.codeUnitAt(i);
104 var bChar = b.codeUnitAt(i);
105 if (aChar == bChar) continue;
106 const int upperCaseA = 0x41;
107 const int upperCaseZ = 0x5d;
108 int aLowerCase = aChar;
109 int bLowerCase = bChar;
110 // Upper case if ASCII letters.
111 if (upperCaseA <= aChar && aChar <= upperCaseZ) aLowerCase += 0x20;
112 if (upperCaseA <= bChar && bChar <= upperCaseZ) bLowerCase += 0x20;
113 if (aLowerCase != bLowerCase) return (aLowerCase - bLowerCase).sign;
114 if (defaultResult == 0) defaultResult = aChar - bChar;
115 }
116 if (b.length > a.length) return -1;
117 return defaultResult.sign;
118 }
119
120 /// Compares strings [a] and [b] lexically, with embedded numbers recognized.
sra1 2015/11/10 04:41:40 'with embedded sequences of digits compared as a u
Lasse Reichstein Nielsen 2015/11/10 08:21:33 What I intend is: Leixcally: Dictionary sorting (
121 ///
122 /// If both strings contain an embedded number at the same locations, the
123 /// numbers are compared lexicographically. This means that a longer number
124 /// is considered greater than a shorter number, and numbers of the same length
125 /// are ordered numerically (or alhabetically, it's the same thing).
126 /// If numbers do not have leading zeros, lexicographical ordering is also
127 /// the same as numerical ordering.
sra1 2015/11/10 04:41:40 Add examples in the doc to tease out when this is
Lasse Reichstein Nielsen 2015/11/10 08:21:33 Good idea.
128 int compareWithNumbers(String a, String b) {
sra1 2015/11/10 04:41:40 'With' makes it sound like one of the arguments is
Lasse Reichstein Nielsen 2015/11/10 08:21:33 Yes, the name sucks. Suggestions will be appreciat
129 for (int i = 0; i < a.length; i++) {
130 if (i == b.length) return 1;
131 var aChar = a.codeUnitAt(i);
132 var bChar = b.codeUnitAt(i);
133 if (aChar != bChar) {
134 return _compareDigitsLexicographically(a, b, i, aChar, bChar);
135 }
136 }
137 if (b.length > a.length) return -1;
138 return 0;
139 }
140
141 /// Compares strings [a] and [b] lexically, with embedded numbers recognized.
142 ///
143 /// ASCII letters are converted to lower case before being compared, like
144 /// for [compareAsciiLowerCase].
145 ///
146 /// If two strings differ only on the case of ASCII letters, the one with the
147 /// capital letter at the first difference will compare as less than the other
148 /// string. This tie-breaking ensures that the comparison is a total ordering
149 /// on strings.
150 ///
151 /// If both strings contain an embedded number at the same locations, the
152 /// numbers are compared lexicographically. This means that a longer number
153 /// is considered greater than a shorter number, and numbers of the same length
154 /// are ordered numerically (or alhabetically, it's the same thing).
155 /// If numbers do not have leading zeros, lexicographical ordering is also
156 /// the same as numerical ordering.
157 int compareAsciiLowerCaseWithNumbers(String a, String b) {
158 int defaultResult = 0; // Returned if no difference found.
159 for (int i = 0; i < a.length; i++) {
160 if (i == b.length) return 1;
161 var aChar = a.codeUnitAt(i);
162 var bChar = b.codeUnitAt(i);
163 if (aChar == bChar) continue;
164 const int upperCaseA = 0x41;
165 const int upperCaseZ = 0x5d;
166 int aLowerCase = aChar;
167 int bLowerCase = bChar;
168 if (upperCaseA <= aChar && aChar <= upperCaseZ) aLowerCase += 0x20;
169 if (upperCaseA <= bChar && bChar <= upperCaseZ) bLowerCase += 0x20;
170 if (aLowerCase != bLowerCase) {
171 return _compareDigitsLexicographically(a, b, i, aLowerCase, bLowerCase);
172 }
173 if (defaultResult == 0) defaultResult = aChar - bChar;
174 }
175 if (b.length > a.length) return -1;
176 return defaultResult.sign;
177 }
178
179 /// Compares strings [a] and [b] lexically, with embedded numbers recognized.
180 ///
181 /// ASCII letters are converted to upper case before being compared, like
182 /// for [compareAsciiUpperCase].
183 ///
184 /// If two strings differ only on the case of ASCII letters, the one with the
185 /// capital letter at the first difference will compare as less than the other
186 /// string. This tie-breaking ensures that the comparison is a total ordering
187 /// on strings.
188 ///
189 /// If both strings contain an embedded number at the same locations, the
190 /// numbers are compared lexicographically. This means that a longer number
191 /// is considered greater than a shorter number, and numbers of the same length
192 /// are ordered numerically (or alhabetically, it's the same thing).
193 /// If numbers do not have leading zeros, lexicographical ordering is also
194 /// the same as numerical ordering.
195 int compareAsciiUpperCaseWithNumbers(String a, String b) {
196 int defaultResult = 0;
197 for (int i = 0; i < a.length; i++) {
198 if (i == b.length) return 1;
199 var aChar = a.codeUnitAt(i);
200 var bChar = b.codeUnitAt(i);
201 if (aChar == bChar) continue;
202 const int lowerCaseA = 0x61;
203 const int lowerCaseZ = 0x7d;
204 int aUpperCase = aChar;
205 int bUpperCase = bChar;
206 if (lowerCaseA <= aChar && aChar <= lowerCaseZ) aUpperCase -= 0x20;
207 if (lowerCaseA <= bChar && bChar <= lowerCaseZ) bUpperCase -= 0x20;
208 if (aUpperCase != bUpperCase) {
209 return _compareDigitsLexicographically(a, b, i, aUpperCase, bUpperCase);
210 }
211 if (defaultResult == 0) defaultResult = aChar - bChar;
212 }
213 if (b.length > a.length) return -1;
214 return defaultResult.sign;
215 }
216
217 /// Check for numbers overlapping the current mismatched characters.
218 ///
219 /// If either [aChar] or [bChar] or both are digits we check if one is part of
220 /// a longer digits sequence than the other,
221 /// and let the longer sequence compare as greater.
222 ///
223 /// Otherwise just returns the difference between [aChar] and [bChar].
224 int _compareDigitsLexicographically(
225 String a, String b, int index, int aChar, int bChar) {
226 assert(aChar != bChar);
227 var aIsDigit = _isDigit(aChar);
228 var bIsDigit = _isDigit(bChar);
229 if (aIsDigit) {
230 if (bIsDigit) {
231 int lengthCompare = _compareDigitCount(a, b, index);
232 if (lengthCompare != 0) return lengthCompare;
233 } else if (index > 0 && _isDigit(a.codeUnitAt(index - 1))) {
234 // aChar is the continuation of a longer number.
235 return 1;
236 }
237 } else if (bIsDigit && index > 0 && _isDigit(b.codeUnitAt(index - 1))) {
238 // bChar is the continuation of a longer number.
239 return -1;
240 }
241 // Characters are not both part , or part
sra1 2015/11/10 04:41:40 Fix comment.
Lasse Reichstein Nielsen 2015/11/10 08:21:33 I wonder what I meant to say :)
242 return (aChar - bChar).sign;
243 }
244
245 /// Checks which of [a] and [b] has the longest sequence of digits.
246 ///
247 /// Starts counting from `i + 1` (assumes that `a[i]` and `b[i]` are both
248 /// already known to be digits).
249 int _compareDigitCount(String a, String b, int i) {
250 while (++i < a.length) {
251 bool aIsDigit = _isDigit(a.codeUnitAt(i));
252 if (i == b.length) return aIsDigit ? 1 : 0;
253 bool bIsDigit = _isDigit(b.codeUnitAt(i));
254 if (aIsDigit) {
255 if (bIsDigit) continue;
256 return 1;
257 } else if (bIsDigit) {
258 return -1;
259 } else {
260 return 0;
261 }
262 }
263 if (i < b.length && _isDigit(b.codeUnitAt(i))) {
264 return -1;
265 }
266 return 0;
267 }
268
269 bool _isDigit(int charCode) => (charCode ^ 0x30) <= 9;
OLDNEW
« no previous file with comments | « lib/collection.dart ('k') | test/comparators_test.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698