OLD | NEW |
| (Empty) |
1 part of petitparser; | |
2 | |
3 /// Parser class for individual character classes. | |
4 class CharacterParser extends Parser { | |
5 final CharacterPredicate _predicate; | |
6 | |
7 final String _message; | |
8 | |
9 CharacterParser(this._predicate, this._message); | |
10 | |
11 @override | |
12 Result parseOn(Context context) { | |
13 var buffer = context.buffer; | |
14 var position = context.position; | |
15 if (position < buffer.length && | |
16 _predicate.test(buffer.codeUnitAt(position))) { | |
17 return context.success(buffer[position], position + 1); | |
18 } | |
19 return context.failure(_message); | |
20 } | |
21 | |
22 @override | |
23 String toString() => '${super.toString()}[$_message]'; | |
24 | |
25 @override | |
26 Parser copy() => new CharacterParser(_predicate, _message); | |
27 | |
28 @override | |
29 bool hasEqualProperties(Parser other) { | |
30 return other is CharacterParser | |
31 && super.hasEqualProperties(other) | |
32 && _predicate == other._predicate | |
33 && _message == other._message; | |
34 } | |
35 } | |
36 | |
37 /// Abstract character predicate class. | |
38 abstract class CharacterPredicate { | |
39 | |
40 /// Tests if the character predicate is satisfied. | |
41 bool test(int value); | |
42 } | |
43 | |
44 class _NotCharacterPredicate implements CharacterPredicate { | |
45 final CharacterPredicate predicate; | |
46 | |
47 _NotCharacterPredicate(this.predicate); | |
48 | |
49 @override | |
50 bool test(int value) => !predicate.test(value); | |
51 } | |
52 | |
53 /// Returns a parser that accepts any of the specified characters. | |
54 Parser anyOf(String string, [String message]) { | |
55 return new CharacterParser(_optimizedString(string), | |
56 message != null ? message : 'any of "$string" expected'); | |
57 } | |
58 | |
59 CharacterPredicate _optimizedString(String string) { | |
60 var ranges = | |
61 string.codeUnits.map((value) => new _RangeCharPredicate(value, value)); | |
62 return _optimizedRanges(ranges); | |
63 } | |
64 | |
65 CharacterPredicate _optimizedRanges(Iterable<_RangeCharPredicate> ranges) { | |
66 | |
67 // 1. sort the ranges | |
68 var sortedRanges = new List.from(ranges, growable: false); | |
69 sortedRanges.sort((first, second) { | |
70 return first.start != second.start | |
71 ? first.start - second.start | |
72 : first.stop - second.stop; | |
73 }); | |
74 | |
75 // 2. merge adjacent or overlapping ranges | |
76 var mergedRanges = new List(); | |
77 for (var thisRange in sortedRanges) { | |
78 if (mergedRanges.isEmpty) { | |
79 mergedRanges.add(thisRange); | |
80 } else { | |
81 var lastRange = mergedRanges.last; | |
82 if (lastRange.stop + 1 >= thisRange.start) { | |
83 var characterRange = new _RangeCharPredicate(lastRange.start, thisRange.
stop); | |
84 mergedRanges[mergedRanges.length - 1] = characterRange; | |
85 } else { | |
86 mergedRanges.add(thisRange); | |
87 } | |
88 } | |
89 } | |
90 | |
91 // 3. build the best resulting predicates | |
92 if (mergedRanges.length == 1) { | |
93 return mergedRanges[0].start == mergedRanges[0].stop | |
94 ? new _SingleCharPredicate(mergedRanges[0].start) | |
95 : mergedRanges[0]; | |
96 } else { | |
97 return new _RangesCharPredicate(mergedRanges.length, | |
98 mergedRanges.map((range) => range.start).toList(growable: false), | |
99 mergedRanges.map((range) => range.stop).toList(growable: false)); | |
100 } | |
101 } | |
102 | |
103 /// Returns a parser that accepts none of the specified characters. | |
104 Parser noneOf(String string, [String message]) { | |
105 return new CharacterParser( | |
106 new _NotCharacterPredicate(_optimizedString(string)), | |
107 message != null ? message : 'none of "$string" expected'); | |
108 } | |
109 | |
110 /// Returns a parser that accepts a specific character only. | |
111 Parser char(element, [String message]) { | |
112 return new CharacterParser(new _SingleCharPredicate(_toCharCode(element)), | |
113 message != null ? message : '"$element" expected'); | |
114 } | |
115 | |
116 class _SingleCharPredicate implements CharacterPredicate { | |
117 final int value; | |
118 | |
119 const _SingleCharPredicate(this.value); | |
120 | |
121 @override | |
122 bool test(int value) => identical(this.value, value); | |
123 } | |
124 | |
125 /// Returns a parser that accepts any digit character. | |
126 Parser digit([String message]) { | |
127 return new CharacterParser( | |
128 _digitCharPredicate, message != null ? message : 'digit expected'); | |
129 } | |
130 | |
131 class _DigitCharPredicate implements CharacterPredicate { | |
132 const _DigitCharPredicate(); | |
133 | |
134 @override | |
135 bool test(int value) => 48 <= value && value <= 57; | |
136 } | |
137 | |
138 const _digitCharPredicate = const _DigitCharPredicate(); | |
139 | |
140 /// Returns a parser that accepts any letter character. | |
141 Parser letter([String message]) { | |
142 return new CharacterParser( | |
143 _letterCharPredicate, message != null ? message : 'letter expected'); | |
144 } | |
145 | |
146 class _LetterCharPredicate implements CharacterPredicate { | |
147 const _LetterCharPredicate(); | |
148 | |
149 @override | |
150 bool test(int value) => | |
151 (65 <= value && value <= 90) || (97 <= value && value <= 122); | |
152 } | |
153 | |
154 const _letterCharPredicate = const _LetterCharPredicate(); | |
155 | |
156 /// Returns a parser that accepts any lowercase character. | |
157 Parser lowercase([String message]) { | |
158 return new CharacterParser(_lowercaseCharPredicate, | |
159 message != null ? message : 'lowercase letter expected'); | |
160 } | |
161 | |
162 class _LowercaseCharPredicate implements CharacterPredicate { | |
163 const _LowercaseCharPredicate(); | |
164 | |
165 @override | |
166 bool test(int value) => 97 <= value && value <= 122; | |
167 } | |
168 | |
169 const _lowercaseCharPredicate = const _LowercaseCharPredicate(); | |
170 | |
171 /// Returns a parser that accepts the given character class pattern. | |
172 Parser pattern(String element, [String message]) { | |
173 return new CharacterParser(_patternParser.parse(element).value, | |
174 message != null ? message : '[$element] expected'); | |
175 } | |
176 | |
177 Parser _createPatternParser() { | |
178 var single = any().map( | |
179 (each) => new _RangeCharPredicate(_toCharCode(each), _toCharCode(each))); | |
180 var multiple = any().seq(char('-')).seq(any()).map((each) => | |
181 new _RangeCharPredicate(_toCharCode(each[0]), _toCharCode(each[2]))); | |
182 var positive = | |
183 multiple.or(single).plus().map((each) => _optimizedRanges(each)); | |
184 return char('^').optional().seq(positive).map((each) => | |
185 each[0] == null ? each[1] : new _NotCharacterPredicate(each[1])); | |
186 } | |
187 | |
188 final _patternParser = _createPatternParser(); | |
189 | |
190 class _RangesCharPredicate implements CharacterPredicate { | |
191 final int length; | |
192 final List<int> starts; | |
193 final List<int> stops; | |
194 | |
195 _RangesCharPredicate(this.length, this.starts, this.stops); | |
196 | |
197 @override | |
198 bool test(int value) { | |
199 var min = 0; | |
200 var max = length; | |
201 while (min < max) { | |
202 var mid = min + ((max - min) >> 1); | |
203 var comp = starts[mid] - value; | |
204 if (comp == 0) { | |
205 return true; | |
206 } else if (comp < 0) { | |
207 min = mid + 1; | |
208 } else { | |
209 max = mid; | |
210 } | |
211 } | |
212 return 0 < min && value <= stops[min - 1]; | |
213 } | |
214 } | |
215 | |
216 /// Returns a parser that accepts any character in the range | |
217 /// between [start] and [stop]. | |
218 Parser range(start, stop, [String message]) { | |
219 return new CharacterParser( | |
220 new _RangeCharPredicate(_toCharCode(start), _toCharCode(stop)), | |
221 message != null ? message : '$start..$stop expected'); | |
222 } | |
223 | |
224 class _RangeCharPredicate implements CharacterPredicate { | |
225 final int start; | |
226 final int stop; | |
227 | |
228 _RangeCharPredicate(this.start, this.stop); | |
229 | |
230 @override | |
231 bool test(int value) => start <= value && value <= stop; | |
232 } | |
233 | |
234 /// Returns a parser that accepts any uppercase character. | |
235 Parser uppercase([String message]) { | |
236 return new CharacterParser(_uppercaseCharPredicate, | |
237 message != null ? message : 'uppercase letter expected'); | |
238 } | |
239 | |
240 class _UppercaseCharPredicate implements CharacterPredicate { | |
241 const _UppercaseCharPredicate(); | |
242 | |
243 @override | |
244 bool test(int value) => 65 <= value && value <= 90; | |
245 } | |
246 | |
247 const _uppercaseCharPredicate = const _UppercaseCharPredicate(); | |
248 | |
249 /// Returns a parser that accepts any whitespace character. | |
250 Parser whitespace([String message]) { | |
251 return new CharacterParser(_whitespaceCharPredicate, | |
252 message != null ? message : 'whitespace expected'); | |
253 } | |
254 | |
255 class _WhitespaceCharPredicate implements CharacterPredicate { | |
256 const _WhitespaceCharPredicate(); | |
257 | |
258 @override | |
259 bool test(int value) { | |
260 if (value < 256) { | |
261 return value == 0x09 || | |
262 value == 0x0A || | |
263 value == 0x0B || | |
264 value == 0x0C || | |
265 value == 0x0D || | |
266 value == 0x20 || | |
267 value == 0x85 || | |
268 value == 0xA0; | |
269 } else { | |
270 return value == 0x1680 || | |
271 value == 0x180E || | |
272 value == 0x2000 || | |
273 value == 0x2001 || | |
274 value == 0x2002 || | |
275 value == 0x2003 || | |
276 value == 0x2004 || | |
277 value == 0x2005 || | |
278 value == 0x2006 || | |
279 value == 0x2007 || | |
280 value == 0x2008 || | |
281 value == 0x2009 || | |
282 value == 0x200A || | |
283 value == 0x2028 || | |
284 value == 0x2029 || | |
285 value == 0x202F || | |
286 value == 0x205F || | |
287 value == 0x3000 || | |
288 value == 0xFEFF; | |
289 } | |
290 } | |
291 } | |
292 | |
293 const _whitespaceCharPredicate = const _WhitespaceCharPredicate(); | |
294 | |
295 /// Returns a parser that accepts any word character. | |
296 Parser word([String message]) { | |
297 return new CharacterParser(_wordCharPredicate, | |
298 message != null ? message : 'letter or digit expected'); | |
299 } | |
300 | |
301 class _WordCharPredicate implements CharacterPredicate { | |
302 const _WordCharPredicate(); | |
303 | |
304 @override | |
305 bool test(int value) => (65 <= value && value <= 90) || | |
306 (97 <= value && value <= 122) || | |
307 (48 <= value && value <= 57) || | |
308 (value == 95); | |
309 } | |
310 | |
311 const _wordCharPredicate = const _WordCharPredicate(); | |
312 | |
313 // internal converter for character codes | |
314 int _toCharCode(element) { | |
315 if (element is num) { | |
316 return element.round(); | |
317 } | |
318 var value = element.toString(); | |
319 if (value.length != 1) { | |
320 throw new ArgumentError('$value is not a character'); | |
321 } | |
322 return value.codeUnitAt(0); | |
323 } | |
OLD | NEW |