OLD | NEW |
| (Empty) |
1 part of petitparser; | |
2 | |
3 /** | |
4 * Parser class for individual character classes. | |
5 */ | |
6 class CharacterParser extends Parser { | |
7 final CharacterPredicate _predicate; | |
8 | |
9 final String _message; | |
10 | |
11 CharacterParser(this._predicate, this._message); | |
12 | |
13 @override | |
14 Result parseOn(Context context) { | |
15 var buffer = context.buffer; | |
16 var position = context.position; | |
17 if (position < buffer.length && | |
18 _predicate.test(buffer.codeUnitAt(position))) { | |
19 return context.success(buffer[position], position + 1); | |
20 } | |
21 return context.failure(_message); | |
22 } | |
23 | |
24 @override | |
25 String toString() => '${super.toString()}[$_message]'; | |
26 | |
27 @override | |
28 Parser copy() => new CharacterParser(_predicate, _message); | |
29 | |
30 @override | |
31 bool hasEqualProperties(Parser other) { | |
32 return other is CharacterParser | |
33 && super.hasEqualProperties(other) | |
34 && _predicate == other._predicate | |
35 && _message == other._message; | |
36 } | |
37 } | |
38 | |
39 /** | |
40 * Abstract character predicate class. | |
41 */ | |
42 abstract class CharacterPredicate { | |
43 | |
44 /** | |
45 * Tests if the character predicate is satisfied. | |
46 */ | |
47 bool test(int value); | |
48 } | |
49 | |
50 class _NotCharacterPredicate implements CharacterPredicate { | |
51 final CharacterPredicate predicate; | |
52 | |
53 _NotCharacterPredicate(this.predicate); | |
54 | |
55 @override | |
56 bool test(int value) => !predicate.test(value); | |
57 } | |
58 | |
59 /** | |
60 * Returns a parser that accepts any of the specified characters. | |
61 */ | |
62 Parser anyOf(String string, [String message]) { | |
63 return new CharacterParser(_optimizedString(string), | |
64 message != null ? message : 'any of "$string" expected'); | |
65 } | |
66 | |
67 CharacterPredicate _optimizedString(String string) { | |
68 var ranges = | |
69 string.codeUnits.map((value) => new _RangeCharPredicate(value, value)); | |
70 return _optimizedRanges(ranges); | |
71 } | |
72 | |
73 CharacterPredicate _optimizedRanges(Iterable<_RangeCharPredicate> ranges) { | |
74 | |
75 // 1. sort the ranges | |
76 var sortedRanges = new List.from(ranges, growable: false); | |
77 sortedRanges.sort((first, second) { | |
78 return first.start != second.start | |
79 ? first.start - second.start | |
80 : first.stop - second.stop; | |
81 }); | |
82 | |
83 // 2. merge adjacent or overlapping ranges | |
84 var mergedRanges = new List(); | |
85 for (var thisRange in sortedRanges) { | |
86 if (mergedRanges.isEmpty) { | |
87 mergedRanges.add(thisRange); | |
88 } else { | |
89 var lastRange = mergedRanges.last; | |
90 if (lastRange.stop + 1 >= thisRange.start) { | |
91 var characterRange = new _RangeCharPredicate(lastRange.start, thisRange.
stop); | |
92 mergedRanges[mergedRanges.length - 1] = characterRange; | |
93 } else { | |
94 mergedRanges.add(thisRange); | |
95 } | |
96 } | |
97 } | |
98 | |
99 // 3. build the best resulting predicates | |
100 if (mergedRanges.length == 1) { | |
101 return mergedRanges[0].start == mergedRanges[0].stop | |
102 ? new _SingleCharPredicate(mergedRanges[0].start) | |
103 : mergedRanges[0]; | |
104 } else { | |
105 return new _RangesCharPredicate(mergedRanges.length, | |
106 mergedRanges.map((range) => range.start).toList(growable: false), | |
107 mergedRanges.map((range) => range.stop).toList(growable: false)); | |
108 } | |
109 } | |
110 | |
111 /** | |
112 * Returns a parser that accepts none of the specified characters. | |
113 */ | |
114 Parser noneOf(String string, [String message]) { | |
115 return new CharacterParser( | |
116 new _NotCharacterPredicate(_optimizedString(string)), | |
117 message != null ? message : 'none of "$string" expected'); | |
118 } | |
119 | |
120 /** | |
121 * Returns a parser that accepts a specific character only. | |
122 */ | |
123 Parser char(element, [String message]) { | |
124 return new CharacterParser(new _SingleCharPredicate(_toCharCode(element)), | |
125 message != null ? message : '"$element" expected'); | |
126 } | |
127 | |
128 class _SingleCharPredicate implements CharacterPredicate { | |
129 final int value; | |
130 | |
131 const _SingleCharPredicate(this.value); | |
132 | |
133 @override | |
134 bool test(int value) => identical(this.value, value); | |
135 } | |
136 | |
137 /** | |
138 * Returns a parser that accepts any digit character. | |
139 */ | |
140 Parser digit([String message]) { | |
141 return new CharacterParser( | |
142 _digitCharPredicate, message != null ? message : 'digit expected'); | |
143 } | |
144 | |
145 class _DigitCharPredicate implements CharacterPredicate { | |
146 const _DigitCharPredicate(); | |
147 | |
148 @override | |
149 bool test(int value) => 48 <= value && value <= 57; | |
150 } | |
151 | |
152 const _digitCharPredicate = const _DigitCharPredicate(); | |
153 | |
154 /** | |
155 * Returns a parser that accepts any letter character. | |
156 */ | |
157 Parser letter([String message]) { | |
158 return new CharacterParser( | |
159 _letterCharPredicate, message != null ? message : 'letter expected'); | |
160 } | |
161 | |
162 class _LetterCharPredicate implements CharacterPredicate { | |
163 const _LetterCharPredicate(); | |
164 | |
165 @override | |
166 bool test(int value) => | |
167 (65 <= value && value <= 90) || (97 <= value && value <= 122); | |
168 } | |
169 | |
170 const _letterCharPredicate = const _LetterCharPredicate(); | |
171 | |
172 /** | |
173 * Returns a parser that accepts any lowercase character. | |
174 */ | |
175 Parser lowercase([String message]) { | |
176 return new CharacterParser(_lowercaseCharPredicate, | |
177 message != null ? message : 'lowercase letter expected'); | |
178 } | |
179 | |
180 class _LowercaseCharPredicate implements CharacterPredicate { | |
181 const _LowercaseCharPredicate(); | |
182 | |
183 @override | |
184 bool test(int value) => 97 <= value && value <= 122; | |
185 } | |
186 | |
187 const _lowercaseCharPredicate = const _LowercaseCharPredicate(); | |
188 | |
189 /** | |
190 * Returns a parser that accepts the given character class pattern. | |
191 */ | |
192 Parser pattern(String element, [String message]) { | |
193 return new CharacterParser(_patternParser.parse(element).value, | |
194 message != null ? message : '[$element] expected'); | |
195 } | |
196 | |
197 Parser _createPatternParser() { | |
198 var single = any().map( | |
199 (each) => new _RangeCharPredicate(_toCharCode(each), _toCharCode(each))); | |
200 var multiple = any().seq(char('-')).seq(any()).map((each) => | |
201 new _RangeCharPredicate(_toCharCode(each[0]), _toCharCode(each[2]))); | |
202 var positive = | |
203 multiple.or(single).plus().map((each) => _optimizedRanges(each)); | |
204 return char('^').optional().seq(positive).map((each) => | |
205 each[0] == null ? each[1] : new _NotCharacterPredicate(each[1])); | |
206 } | |
207 | |
208 final _patternParser = _createPatternParser(); | |
209 | |
210 class _RangesCharPredicate implements CharacterPredicate { | |
211 final int length; | |
212 final List<int> starts; | |
213 final List<int> stops; | |
214 | |
215 _RangesCharPredicate(this.length, this.starts, this.stops); | |
216 | |
217 @override | |
218 bool test(int value) { | |
219 var min = 0; | |
220 var max = length; | |
221 while (min < max) { | |
222 var mid = min + ((max - min) >> 1); | |
223 var comp = starts[mid] - value; | |
224 if (comp == 0) { | |
225 return true; | |
226 } else if (comp < 0) { | |
227 min = mid + 1; | |
228 } else { | |
229 max = mid; | |
230 } | |
231 } | |
232 return 0 < min && value <= stops[min - 1]; | |
233 } | |
234 } | |
235 | |
236 /** | |
237 * Returns a parser that accepts any character in the range | |
238 * between [start] and [stop]. | |
239 */ | |
240 Parser range(start, stop, [String message]) { | |
241 return new CharacterParser( | |
242 new _RangeCharPredicate(_toCharCode(start), _toCharCode(stop)), | |
243 message != null ? message : '$start..$stop expected'); | |
244 } | |
245 | |
246 class _RangeCharPredicate implements CharacterPredicate { | |
247 final int start; | |
248 final int stop; | |
249 | |
250 _RangeCharPredicate(this.start, this.stop); | |
251 | |
252 @override | |
253 bool test(int value) => start <= value && value <= stop; | |
254 } | |
255 | |
256 /** | |
257 * Returns a parser that accepts any uppercase character. | |
258 */ | |
259 Parser uppercase([String message]) { | |
260 return new CharacterParser(_uppercaseCharPredicate, | |
261 message != null ? message : 'uppercase letter expected'); | |
262 } | |
263 | |
264 class _UppercaseCharPredicate implements CharacterPredicate { | |
265 const _UppercaseCharPredicate(); | |
266 | |
267 @override | |
268 bool test(int value) => 65 <= value && value <= 90; | |
269 } | |
270 | |
271 const _uppercaseCharPredicate = const _UppercaseCharPredicate(); | |
272 | |
273 /** | |
274 * Returns a parser that accepts any whitespace character. | |
275 */ | |
276 Parser whitespace([String message]) { | |
277 return new CharacterParser(_whitespaceCharPredicate, | |
278 message != null ? message : 'whitespace expected'); | |
279 } | |
280 | |
281 class _WhitespaceCharPredicate implements CharacterPredicate { | |
282 const _WhitespaceCharPredicate(); | |
283 | |
284 @override | |
285 bool test(int value) { | |
286 if (value < 256) { | |
287 return value == 0x09 || | |
288 value == 0x0A || | |
289 value == 0x0B || | |
290 value == 0x0C || | |
291 value == 0x0D || | |
292 value == 0x20 || | |
293 value == 0x85 || | |
294 value == 0xA0; | |
295 } else { | |
296 return value == 0x1680 || | |
297 value == 0x180E || | |
298 value == 0x2000 || | |
299 value == 0x2001 || | |
300 value == 0x2002 || | |
301 value == 0x2003 || | |
302 value == 0x2004 || | |
303 value == 0x2005 || | |
304 value == 0x2006 || | |
305 value == 0x2007 || | |
306 value == 0x2008 || | |
307 value == 0x2009 || | |
308 value == 0x200A || | |
309 value == 0x2028 || | |
310 value == 0x2029 || | |
311 value == 0x202F || | |
312 value == 0x205F || | |
313 value == 0x3000 || | |
314 value == 0xFEFF; | |
315 } | |
316 } | |
317 } | |
318 | |
319 const _whitespaceCharPredicate = const _WhitespaceCharPredicate(); | |
320 | |
321 /** | |
322 * Returns a parser that accepts any word character. | |
323 */ | |
324 Parser word([String message]) { | |
325 return new CharacterParser(_wordCharPredicate, | |
326 message != null ? message : 'letter or digit expected'); | |
327 } | |
328 | |
329 class _WordCharPredicate implements CharacterPredicate { | |
330 const _WordCharPredicate(); | |
331 | |
332 @override | |
333 bool test(int value) => (65 <= value && value <= 90) || | |
334 (97 <= value && value <= 122) || | |
335 (48 <= value && value <= 57) || | |
336 (value == 95); | |
337 } | |
338 | |
339 const _wordCharPredicate = const _WordCharPredicate(); | |
340 | |
341 // internal converter for character codes | |
342 int _toCharCode(element) { | |
343 if (element is num) { | |
344 return element.round(); | |
345 } | |
346 var value = element.toString(); | |
347 if (value.length != 1) { | |
348 throw new ArgumentError('$value is not a character'); | |
349 } | |
350 return value.codeUnitAt(0); | |
351 } | |
OLD | NEW |