OLD | NEW |
| (Empty) |
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 | |
5 library yaml.scanner; | |
6 | |
7 import 'package:collection/collection.dart'; | |
8 import 'package:string_scanner/string_scanner.dart'; | |
9 import 'package:source_span/source_span.dart'; | |
10 | |
11 import 'style.dart'; | |
12 import 'token.dart'; | |
13 import 'utils.dart'; | |
14 import 'yaml_exception.dart'; | |
15 | |
16 /// A scanner that reads a string of Unicode characters and emits [Token]s. | |
17 /// | |
18 /// This is based on the libyaml scanner, available at | |
19 /// https://github.com/yaml/libyaml/blob/master/src/scanner.c. The license for | |
20 /// that is available in ../../libyaml-license.txt. | |
21 class Scanner { | |
22 static const TAB = 0x9; | |
23 static const LF = 0xA; | |
24 static const CR = 0xD; | |
25 static const SP = 0x20; | |
26 static const DOLLAR = 0x24; | |
27 static const LEFT_PAREN = 0x28; | |
28 static const RIGHT_PAREN = 0x29; | |
29 static const PLUS = 0x2B; | |
30 static const COMMA = 0x2C; | |
31 static const HYPHEN = 0x2D; | |
32 static const PERIOD = 0x2E; | |
33 static const QUESTION = 0x3F; | |
34 static const COLON = 0x3A; | |
35 static const SEMICOLON = 0x3B; | |
36 static const EQUALS = 0x3D; | |
37 static const LEFT_SQUARE = 0x5B; | |
38 static const RIGHT_SQUARE = 0x5D; | |
39 static const LEFT_CURLY = 0x7B; | |
40 static const RIGHT_CURLY = 0x7D; | |
41 static const HASH = 0x23; | |
42 static const AMPERSAND = 0x26; | |
43 static const ASTERISK = 0x2A; | |
44 static const EXCLAMATION = 0x21; | |
45 static const VERTICAL_BAR = 0x7C; | |
46 static const LEFT_ANGLE = 0x3C; | |
47 static const RIGHT_ANGLE = 0x3E; | |
48 static const SINGLE_QUOTE = 0x27; | |
49 static const DOUBLE_QUOTE = 0x22; | |
50 static const PERCENT = 0x25; | |
51 static const AT = 0x40; | |
52 static const GRAVE_ACCENT = 0x60; | |
53 static const TILDE = 0x7E; | |
54 | |
55 static const NULL = 0x0; | |
56 static const BELL = 0x7; | |
57 static const BACKSPACE = 0x8; | |
58 static const VERTICAL_TAB = 0xB; | |
59 static const FORM_FEED = 0xC; | |
60 static const ESCAPE = 0x1B; | |
61 static const SLASH = 0x2F; | |
62 static const BACKSLASH = 0x5C; | |
63 static const UNDERSCORE = 0x5F; | |
64 static const NEL = 0x85; | |
65 static const NBSP = 0xA0; | |
66 static const LINE_SEPARATOR = 0x2028; | |
67 static const PARAGRAPH_SEPARATOR = 0x2029; | |
68 static const BOM = 0xFEFF; | |
69 | |
70 static const NUMBER_0 = 0x30; | |
71 static const NUMBER_9 = 0x39; | |
72 | |
73 static const LETTER_A = 0x61; | |
74 static const LETTER_B = 0x62; | |
75 static const LETTER_E = 0x65; | |
76 static const LETTER_F = 0x66; | |
77 static const LETTER_N = 0x6E; | |
78 static const LETTER_R = 0x72; | |
79 static const LETTER_T = 0x74; | |
80 static const LETTER_U = 0x75; | |
81 static const LETTER_V = 0x76; | |
82 static const LETTER_X = 0x78; | |
83 static const LETTER_Z = 0x7A; | |
84 | |
85 static const LETTER_CAP_A = 0x41; | |
86 static const LETTER_CAP_F = 0x46; | |
87 static const LETTER_CAP_L = 0x4C; | |
88 static const LETTER_CAP_N = 0x4E; | |
89 static const LETTER_CAP_P = 0x50; | |
90 static const LETTER_CAP_U = 0x55; | |
91 static const LETTER_CAP_X = 0x58; | |
92 static const LETTER_CAP_Z = 0x5A; | |
93 | |
94 /// The underlying [SpanScanner] used to read characters from the source text. | |
95 /// | |
96 /// This is also used to track line and column information and to generate | |
97 /// [SourceSpan]s. | |
98 final SpanScanner _scanner; | |
99 | |
100 /// Whether this scanner has produced a [TokenType.STREAM_START] token | |
101 /// indicating the beginning of the YAML stream. | |
102 var _streamStartProduced = false; | |
103 | |
104 /// Whether this scanner has produced a [TokenType.STREAM_END] token | |
105 /// indicating the end of the YAML stream. | |
106 var _streamEndProduced = false; | |
107 | |
108 /// The queue of tokens yet to be emitted. | |
109 /// | |
110 /// These are queued up in advance so that [TokenType.KEY] tokens can be | |
111 /// inserted once the scanner determines that a series of tokens represents a | |
112 /// mapping key. | |
113 final _tokens = new QueueList<Token>(); | |
114 | |
115 /// The number of tokens that have been emitted. | |
116 /// | |
117 /// This doesn't count tokens in [tokens]. | |
118 var _tokensParsed = 0; | |
119 | |
120 /// Whether the next token in [_tokens] is ready to be returned. | |
121 /// | |
122 /// It might not be ready if there may still be a [TokenType.KEY] inserted | |
123 /// before it. | |
124 var _tokenAvailable = false; | |
125 | |
126 /// The stack of indent levels for the current nested block contexts. | |
127 /// | |
128 /// The YAML spec specifies that the initial indentation level is -1 spaces. | |
129 final _indents = <int>[-1]; | |
130 | |
131 /// Whether a simple key is allowed in this context. | |
132 /// | |
133 /// A simple key refers to any mapping key that doesn't have an explicit "?". | |
134 var _simpleKeyAllowed = true; | |
135 | |
136 /// The stack of potential simple keys for each level of flow nesting. | |
137 /// | |
138 /// Entries in this list may be `null`, indicating that there is no valid | |
139 /// simple key for the associated level of nesting. | |
140 /// | |
141 /// When a ":" is parsed and there's a simple key available, a [TokenType.KEY] | |
142 /// token is inserted in [_tokens] before that key's token. This allows the | |
143 /// parser to tell that the key is intended to be a mapping key. | |
144 final _simpleKeys = <_SimpleKey>[null]; | |
145 | |
146 /// The current indentation level. | |
147 int get _indent => _indents.last; | |
148 | |
149 /// Whether the scanner's currently positioned in a block-level structure (as | |
150 /// opposed to flow-level). | |
151 bool get _inBlockContext => _simpleKeys.length == 1; | |
152 | |
153 /// Whether the current character is a line break or the end of the source. | |
154 bool get _isBreakOrEnd => _scanner.isDone || _isBreak; | |
155 | |
156 /// Whether the current character is a line break. | |
157 bool get _isBreak => _isBreakAt(0); | |
158 | |
159 /// Whether the current character is whitespace or the end of the source. | |
160 bool get _isBlankOrEnd => _isBlankOrEndAt(0); | |
161 | |
162 /// Whether the current character is whitespace. | |
163 bool get _isBlank => _isBlankAt(0); | |
164 | |
165 /// Whether the current character is a valid tag name character. | |
166 /// | |
167 /// See http://yaml.org/spec/1.2/spec.html#ns-tag-name. | |
168 bool get _isTagChar { | |
169 var char = _scanner.peekChar(); | |
170 if (char == null) return false; | |
171 switch (char) { | |
172 case HYPHEN: | |
173 case SEMICOLON: | |
174 case SLASH: | |
175 case COLON: | |
176 case AT: | |
177 case AMPERSAND: | |
178 case EQUALS: | |
179 case PLUS: | |
180 case DOLLAR: | |
181 case PERIOD: | |
182 case TILDE: | |
183 case QUESTION: | |
184 case ASTERISK: | |
185 case SINGLE_QUOTE: | |
186 case LEFT_PAREN: | |
187 case RIGHT_PAREN: | |
188 case PERCENT: | |
189 return true; | |
190 default: | |
191 return (char >= NUMBER_0 && char <= NUMBER_9) || | |
192 (char >= LETTER_A && char <= LETTER_Z) || | |
193 (char >= LETTER_CAP_A && char <= LETTER_CAP_Z); | |
194 } | |
195 } | |
196 | |
197 /// Whether the current character is a valid anchor name character. | |
198 /// | |
199 /// See http://yaml.org/spec/1.2/spec.html#ns-anchor-name. | |
200 bool get _isAnchorChar { | |
201 if (!_isNonSpace) return false; | |
202 | |
203 switch (_scanner.peekChar()) { | |
204 case COMMA: | |
205 case LEFT_SQUARE: | |
206 case RIGHT_SQUARE: | |
207 case LEFT_CURLY: | |
208 case RIGHT_CURLY: | |
209 return false; | |
210 default: | |
211 return true; | |
212 } | |
213 } | |
214 | |
215 /// Whether the character at the current position is a decimal digit. | |
216 bool get _isDigit { | |
217 var char = _scanner.peekChar(); | |
218 return char != null && (char >= NUMBER_0 && char <= NUMBER_9); | |
219 } | |
220 | |
221 /// Whether the character at the current position is a hexidecimal | |
222 /// digit. | |
223 bool get _isHex { | |
224 var char = _scanner.peekChar(); | |
225 if (char == null) return false; | |
226 return (char >= NUMBER_0 && char <= NUMBER_9) || | |
227 (char >= LETTER_A && char <= LETTER_F) || | |
228 (char >= LETTER_CAP_A && char <= LETTER_CAP_F); | |
229 } | |
230 | |
231 /// Whether the character at the current position is a plain character. | |
232 /// | |
233 /// See http://yaml.org/spec/1.2/spec.html#ns-plain-char(c). | |
234 bool get _isPlainChar => _isPlainCharAt(0); | |
235 | |
236 /// Whether the character at the current position is a printable character | |
237 /// other than a line break or byte-order mark. | |
238 /// | |
239 /// See http://yaml.org/spec/1.2/spec.html#nb-char. | |
240 bool get _isNonBreak { | |
241 var char = _scanner.peekChar(); | |
242 if (char == null) return false; | |
243 switch (char) { | |
244 case LF: | |
245 case CR: | |
246 case BOM: | |
247 return false; | |
248 case TAB: | |
249 case NEL: | |
250 return true; | |
251 default: | |
252 return (char >= 0x00020 && char <= 0x00007E) || | |
253 (char >= 0x000A0 && char <= 0x00D7FF) || | |
254 (char >= 0x0E000 && char <= 0x00FFFD) || | |
255 (char >= 0x10000 && char <= 0x10FFFF); | |
256 } | |
257 } | |
258 | |
259 /// Whether the character at the current position is a printable character | |
260 /// other than whitespace. | |
261 /// | |
262 /// See http://yaml.org/spec/1.2/spec.html#nb-char. | |
263 bool get _isNonSpace { | |
264 var char = _scanner.peekChar(); | |
265 if (char == null) return false; | |
266 switch (char) { | |
267 case LF: | |
268 case CR: | |
269 case BOM: | |
270 case SP: | |
271 return false; | |
272 case NEL: | |
273 return true; | |
274 default: | |
275 return (char >= 0x00020 && char <= 0x00007E) || | |
276 (char >= 0x000A0 && char <= 0x00D7FF) || | |
277 (char >= 0x0E000 && char <= 0x00FFFD) || | |
278 (char >= 0x10000 && char <= 0x10FFFF); | |
279 } | |
280 } | |
281 | |
282 /// Returns Whether or not the current character begins a documentation | |
283 /// indicator. | |
284 /// | |
285 /// If so, this sets the scanner's last match to that indicator. | |
286 bool get _isDocumentIndicator { | |
287 return _scanner.column == 0 && _isBlankOrEndAt(3) && | |
288 (_scanner.matches('---') || _scanner.matches('...')); | |
289 } | |
290 | |
291 /// Creates a scanner that scans [source]. | |
292 /// | |
293 /// [sourceUrl] can be a String or a [Uri]. | |
294 Scanner(String source, {sourceUrl}) | |
295 : _scanner = new SpanScanner.eager(source, sourceUrl: sourceUrl); | |
296 | |
297 /// Consumes and returns the next token. | |
298 Token scan() { | |
299 if (_streamEndProduced) throw new StateError("Out of tokens."); | |
300 if (!_tokenAvailable) _fetchMoreTokens(); | |
301 | |
302 var token = _tokens.removeFirst(); | |
303 _tokenAvailable = false; | |
304 _tokensParsed++; | |
305 _streamEndProduced = token is Token && | |
306 token.type == TokenType.STREAM_END; | |
307 return token; | |
308 } | |
309 | |
310 /// Consumes the next token and returns the one after that. | |
311 Token advance() { | |
312 scan(); | |
313 return peek(); | |
314 } | |
315 | |
316 /// Returns the next token without consuming it. | |
317 Token peek() { | |
318 if (_streamEndProduced) return null; | |
319 if (!_tokenAvailable) _fetchMoreTokens(); | |
320 return _tokens.first; | |
321 } | |
322 | |
323 /// Ensures that [_tokens] contains at least one token which can be returned. | |
324 void _fetchMoreTokens() { | |
325 while (true) { | |
326 if (_tokens.isNotEmpty) { | |
327 _staleSimpleKeys(); | |
328 | |
329 // If the current token could be a simple key, we need to scan more | |
330 // tokens until we determine whether it is or not. Otherwise we might | |
331 // not emit the `KEY` token before we emit the value of the key. | |
332 if (!_simpleKeys.any((key) => | |
333 key != null && key.tokenNumber == _tokensParsed)) { | |
334 break; | |
335 } | |
336 } | |
337 | |
338 _fetchNextToken(); | |
339 } | |
340 _tokenAvailable = true; | |
341 } | |
342 | |
343 /// The dispatcher for token fetchers. | |
344 void _fetchNextToken() { | |
345 if (!_streamStartProduced) { | |
346 _fetchStreamStart(); | |
347 return; | |
348 } | |
349 | |
350 _scanToNextToken(); | |
351 _staleSimpleKeys(); | |
352 _unrollIndent(_scanner.column); | |
353 | |
354 if (_scanner.isDone) { | |
355 _fetchStreamEnd(); | |
356 return; | |
357 } | |
358 | |
359 if (_scanner.column == 0) { | |
360 if (_scanner.peekChar() == PERCENT) { | |
361 _fetchDirective(); | |
362 return; | |
363 } | |
364 | |
365 if (_isBlankOrEndAt(3)) { | |
366 if (_scanner.matches('---')) { | |
367 _fetchDocumentIndicator(TokenType.DOCUMENT_START); | |
368 return; | |
369 } | |
370 | |
371 if (_scanner.matches('...')) { | |
372 _fetchDocumentIndicator(TokenType.DOCUMENT_END); | |
373 return; | |
374 } | |
375 } | |
376 } | |
377 | |
378 switch (_scanner.peekChar()) { | |
379 case LEFT_SQUARE: | |
380 _fetchFlowCollectionStart(TokenType.FLOW_SEQUENCE_START); | |
381 return; | |
382 case LEFT_CURLY: | |
383 _fetchFlowCollectionStart(TokenType.FLOW_MAPPING_START); | |
384 return; | |
385 case RIGHT_SQUARE: | |
386 _fetchFlowCollectionEnd(TokenType.FLOW_SEQUENCE_END); | |
387 return; | |
388 case RIGHT_CURLY: | |
389 _fetchFlowCollectionEnd(TokenType.FLOW_MAPPING_END); | |
390 return; | |
391 case COMMA: | |
392 _fetchFlowEntry(); | |
393 return; | |
394 case ASTERISK: | |
395 _fetchAnchor(anchor: false); | |
396 return; | |
397 case AMPERSAND: | |
398 _fetchAnchor(anchor: true); | |
399 return; | |
400 case EXCLAMATION: | |
401 _fetchTag(); | |
402 return; | |
403 case SINGLE_QUOTE: | |
404 _fetchFlowScalar(singleQuote: true); | |
405 return; | |
406 case DOUBLE_QUOTE: | |
407 _fetchFlowScalar(singleQuote: false); | |
408 return; | |
409 case VERTICAL_BAR: | |
410 if (!_inBlockContext) _invalidScalarCharacter(); | |
411 _fetchBlockScalar(literal: true); | |
412 return; | |
413 case RIGHT_ANGLE: | |
414 if (!_inBlockContext) _invalidScalarCharacter(); | |
415 _fetchBlockScalar(literal: false); | |
416 return; | |
417 case PERCENT: | |
418 case AT: | |
419 case GRAVE_ACCENT: | |
420 _invalidScalarCharacter(); | |
421 return; | |
422 | |
423 // These characters may sometimes begin plain scalars. | |
424 case HYPHEN: | |
425 if (_isPlainCharAt(1)) { | |
426 _fetchPlainScalar(); | |
427 } else { | |
428 _fetchBlockEntry(); | |
429 } | |
430 return; | |
431 case QUESTION: | |
432 if (_isPlainCharAt(1)) { | |
433 _fetchPlainScalar(); | |
434 } else { | |
435 _fetchKey(); | |
436 } | |
437 return; | |
438 case COLON: | |
439 if (!_inBlockContext && _tokens.isNotEmpty) { | |
440 // If a colon follows a "JSON-like" value (an explicit map or list, or | |
441 // a quoted string) it isn't required to have whitespace after it | |
442 // since it unambiguously describes a map. | |
443 var token = _tokens.last; | |
444 if (token.type == TokenType.FLOW_SEQUENCE_END || | |
445 token.type == TokenType.FLOW_MAPPING_END || | |
446 (token.type == TokenType.SCALAR && token.style.isQuoted)) { | |
447 _fetchValue(); | |
448 return; | |
449 } | |
450 } | |
451 | |
452 if (_isPlainCharAt(1)) { | |
453 _fetchPlainScalar(); | |
454 } else { | |
455 _fetchValue(); | |
456 } | |
457 return; | |
458 default: | |
459 if (!_isNonBreak) _invalidScalarCharacter(); | |
460 | |
461 _fetchPlainScalar(); | |
462 return; | |
463 } | |
464 | |
465 throw 'Inaccessible'; | |
466 } | |
467 | |
468 /// Throws an error about a disallowed character. | |
469 void _invalidScalarCharacter() => | |
470 _scanner.error("Unexpected character.", length: 1); | |
471 | |
472 /// Checks the list of potential simple keys and remove the positions that | |
473 /// cannot contain simple keys anymore. | |
474 void _staleSimpleKeys() { | |
475 for (var i = 0; i < _simpleKeys.length; i++) { | |
476 var key = _simpleKeys[i]; | |
477 if (key == null) continue; | |
478 | |
479 // libyaml requires that all simple keys be a single line and no longer | |
480 // than 1024 characters. However, in section 7.4.2 of the spec | |
481 // (http://yaml.org/spec/1.2/spec.html#id2790832), these restrictions are | |
482 // only applied when the curly braces are omitted. It's difficult to | |
483 // retain enough context to know which keys need to have the restriction | |
484 // placed on them, so for now we go the other direction and allow | |
485 // everything but multiline simple keys in a block context. | |
486 if (!_inBlockContext) continue; | |
487 | |
488 if (key.line == _scanner.line) continue; | |
489 | |
490 if (key.required) { | |
491 throw new YamlException("Expected ':'.", _scanner.emptySpan); | |
492 } | |
493 | |
494 _simpleKeys[i] = null; | |
495 } | |
496 } | |
497 | |
498 /// Checks if a simple key may start at the current position and saves it if | |
499 /// so. | |
500 void _saveSimpleKey() { | |
501 // A simple key is required at the current position if the scanner is in the | |
502 // block context and the current column coincides with the indentation | |
503 // level. | |
504 var required = _inBlockContext && _indent == _scanner.column; | |
505 | |
506 // A simple key is required only when it is the first token in the current | |
507 // line. Therefore it is always allowed. But we add a check anyway. | |
508 assert(_simpleKeyAllowed || !required); | |
509 | |
510 if (!_simpleKeyAllowed) return; | |
511 | |
512 // If the current position may start a simple key, save it. | |
513 _removeSimpleKey(); | |
514 _simpleKeys[_simpleKeys.length - 1] = new _SimpleKey( | |
515 _tokensParsed + _tokens.length, | |
516 _scanner.line, | |
517 _scanner.column, | |
518 _scanner.location, | |
519 required: required); | |
520 } | |
521 | |
522 /// Removes a potential simple key at the current flow level. | |
523 void _removeSimpleKey() { | |
524 var key = _simpleKeys.last; | |
525 if (key != null && key.required) { | |
526 throw new YamlException("Could not find expected ':' for simple key.", | |
527 key.location.pointSpan()); | |
528 } | |
529 | |
530 _simpleKeys[_simpleKeys.length - 1] = null; | |
531 } | |
532 | |
533 /// Increases the flow level and resizes the simple key list. | |
534 void _increaseFlowLevel() { | |
535 _simpleKeys.add(null); | |
536 } | |
537 | |
538 /// Decreases the flow level. | |
539 void _decreaseFlowLevel() { | |
540 if (_inBlockContext) return; | |
541 _simpleKeys.removeLast(); | |
542 } | |
543 | |
544 /// Pushes the current indentation level to the stack and sets the new level | |
545 /// if [column] is greater than [_indent]. | |
546 /// | |
547 /// If it is, appends or inserts the specified token into [_tokens]. If | |
548 /// [tokenNumber] is provided, the corresponding token will be replaced; | |
549 /// otherwise, the token will be added at the end. | |
550 void _rollIndent(int column, TokenType type, SourceLocation location, | |
551 {int tokenNumber}) { | |
552 if (!_inBlockContext) return; | |
553 if (_indent != -1 && _indent >= column) return; | |
554 | |
555 // Push the current indentation level to the stack and set the new | |
556 // indentation level. | |
557 _indents.add(column); | |
558 | |
559 // Create a token and insert it into the queue. | |
560 var token = new Token(type, location.pointSpan()); | |
561 if (tokenNumber == null) { | |
562 _tokens.add(token); | |
563 } else { | |
564 _tokens.insert(tokenNumber - _tokensParsed, token); | |
565 } | |
566 } | |
567 | |
568 /// Pops indentation levels from [_indents] until the current level becomes | |
569 /// less than or equal to [column]. | |
570 /// | |
571 /// For each indentation level, appends a [TokenType.BLOCK_END] token. | |
572 void _unrollIndent(int column) { | |
573 if (!_inBlockContext) return; | |
574 | |
575 while (_indent > column) { | |
576 _tokens.add(new Token(TokenType.BLOCK_END, _scanner.emptySpan)); | |
577 _indents.removeLast(); | |
578 } | |
579 } | |
580 | |
581 /// Pops indentation levels from [_indents] until the current level resets to | |
582 /// -1. | |
583 /// | |
584 /// For each indentation level, appends a [TokenType.BLOCK_END] token. | |
585 void _resetIndent() => _unrollIndent(-1); | |
586 | |
587 /// Produces a [TokenType.STREAM_START] token. | |
588 void _fetchStreamStart() { | |
589 // Much of libyaml's initialization logic here is done in variable | |
590 // initializers instead. | |
591 _streamStartProduced = true; | |
592 _tokens.add(new Token(TokenType.STREAM_START, _scanner.emptySpan)); | |
593 } | |
594 | |
595 /// Produces a [TokenType.STREAM_END] token. | |
596 void _fetchStreamEnd() { | |
597 _resetIndent(); | |
598 _removeSimpleKey(); | |
599 _simpleKeyAllowed = false; | |
600 _tokens.add(new Token(TokenType.STREAM_END, _scanner.emptySpan)); | |
601 } | |
602 | |
603 /// Produces a [TokenType.VERSION_DIRECTIVE] or [TokenType.TAG_DIRECTIVE] | |
604 /// token. | |
605 void _fetchDirective() { | |
606 _resetIndent(); | |
607 _removeSimpleKey(); | |
608 _simpleKeyAllowed = false; | |
609 var directive = _scanDirective(); | |
610 if (directive != null) _tokens.add(directive); | |
611 } | |
612 | |
613 /// Produces a [TokenType.DOCUMENT_START] or [TokenType.DOCUMENT_END] token. | |
614 void _fetchDocumentIndicator(TokenType type) { | |
615 _resetIndent(); | |
616 _removeSimpleKey(); | |
617 _simpleKeyAllowed = false; | |
618 | |
619 // Consume the indicator token. | |
620 var start = _scanner.state; | |
621 _scanner.readChar(); | |
622 _scanner.readChar(); | |
623 _scanner.readChar(); | |
624 | |
625 _tokens.add(new Token(type, _scanner.spanFrom(start))); | |
626 } | |
627 | |
628 /// Produces a [TokenType.FLOW_SEQUENCE_START] or | |
629 /// [TokenType.FLOW_MAPPING_START] token. | |
630 void _fetchFlowCollectionStart(TokenType type) { | |
631 _saveSimpleKey(); | |
632 _increaseFlowLevel(); | |
633 _simpleKeyAllowed = true; | |
634 _addCharToken(type); | |
635 } | |
636 | |
637 /// Produces a [TokenType.FLOW_SEQUENCE_END] or [TokenType.FLOW_MAPPING_END] | |
638 /// token. | |
639 void _fetchFlowCollectionEnd(TokenType type) { | |
640 _removeSimpleKey(); | |
641 _decreaseFlowLevel(); | |
642 _simpleKeyAllowed = false; | |
643 _addCharToken(type); | |
644 } | |
645 | |
646 /// Produces a [TokenType.FLOW_ENTRY] token. | |
647 void _fetchFlowEntry() { | |
648 _removeSimpleKey(); | |
649 _simpleKeyAllowed = true; | |
650 _addCharToken(TokenType.FLOW_ENTRY); | |
651 } | |
652 | |
653 /// Produces a [TokenType.BLOCK_ENTRY] token. | |
654 void _fetchBlockEntry() { | |
655 if (_inBlockContext) { | |
656 if (!_simpleKeyAllowed) { | |
657 throw new YamlException( | |
658 "Block sequence entries are not allowed here.", | |
659 _scanner.emptySpan); | |
660 } | |
661 | |
662 _rollIndent( | |
663 _scanner.column, | |
664 TokenType.BLOCK_SEQUENCE_START, | |
665 _scanner.location); | |
666 } else { | |
667 // It is an error for the '-' indicator to occur in the flow context, but | |
668 // we let the Parser detect and report it because it's able to point to | |
669 // the context. | |
670 } | |
671 | |
672 _removeSimpleKey(); | |
673 _simpleKeyAllowed = true; | |
674 _addCharToken(TokenType.BLOCK_ENTRY); | |
675 } | |
676 | |
677 /// Produces the [TokenType.KEY] token. | |
678 void _fetchKey() { | |
679 if (_inBlockContext) { | |
680 if (!_simpleKeyAllowed) { | |
681 throw new YamlException("Mapping keys are not allowed here.", | |
682 _scanner.emptySpan); | |
683 } | |
684 | |
685 _rollIndent( | |
686 _scanner.column, | |
687 TokenType.BLOCK_MAPPING_START, | |
688 _scanner.location); | |
689 } | |
690 | |
691 // Simple keys are allowed after `?` in a block context. | |
692 _simpleKeyAllowed = _inBlockContext; | |
693 _addCharToken(TokenType.KEY); | |
694 } | |
695 | |
696 /// Produces the [TokenType.VALUE] token. | |
697 void _fetchValue() { | |
698 var simpleKey = _simpleKeys.last; | |
699 if (simpleKey != null) { | |
700 // Add a [TokenType.KEY] directive before the first token of the simple | |
701 // key so the parser knows that it's part of a key/value pair. | |
702 _tokens.insert(simpleKey.tokenNumber - _tokensParsed, | |
703 new Token(TokenType.KEY, simpleKey.location.pointSpan())); | |
704 | |
705 // In the block context, we may need to add the | |
706 // [TokenType.BLOCK_MAPPING_START] token. | |
707 _rollIndent( | |
708 simpleKey.column, | |
709 TokenType.BLOCK_MAPPING_START, | |
710 simpleKey.location, | |
711 tokenNumber: simpleKey.tokenNumber); | |
712 | |
713 // Remove the simple key. | |
714 _simpleKeys[_simpleKeys.length - 1] = null; | |
715 | |
716 // A simple key cannot follow another simple key. | |
717 _simpleKeyAllowed = false; | |
718 } else if (_inBlockContext) { | |
719 if (!_simpleKeyAllowed) { | |
720 throw new YamlException( | |
721 "Mapping values are not allowed here. Did you miss a colon " | |
722 "earlier?", | |
723 _scanner.emptySpan); | |
724 } | |
725 | |
726 // If we're here, we've found the ':' indicator following a complex key. | |
727 | |
728 _rollIndent( | |
729 _scanner.column, | |
730 TokenType.BLOCK_MAPPING_START, | |
731 _scanner.location); | |
732 _simpleKeyAllowed = true; | |
733 } else if (_simpleKeyAllowed) { | |
734 // If we're here, we've found the ':' indicator with an empty key. This | |
735 // behavior differs from libyaml, which disallows empty implicit keys. | |
736 _simpleKeyAllowed = false; | |
737 _addCharToken(TokenType.KEY); | |
738 } | |
739 | |
740 _addCharToken(TokenType.VALUE); | |
741 } | |
742 | |
743 /// Adds a token with [type] to [_tokens]. | |
744 /// | |
745 /// The span of the new token is the current character. | |
746 void _addCharToken(TokenType type) { | |
747 var start = _scanner.state; | |
748 _scanner.readChar(); | |
749 _tokens.add(new Token(type, _scanner.spanFrom(start))); | |
750 } | |
751 | |
752 /// Produces a [TokenType.ALIAS] or [TokenType.ANCHOR] token. | |
753 void _fetchAnchor({bool anchor: true}) { | |
754 _saveSimpleKey(); | |
755 _simpleKeyAllowed = false; | |
756 _tokens.add(_scanAnchor(anchor: anchor)); | |
757 } | |
758 | |
759 /// Produces a [TokenType.TAG] token. | |
760 void _fetchTag() { | |
761 _saveSimpleKey(); | |
762 _simpleKeyAllowed = false; | |
763 _tokens.add(_scanTag()); | |
764 } | |
765 | |
766 /// Produces a [TokenType.SCALAR] token with style [ScalarStyle.LITERAL] or | |
767 /// [ScalarStyle.FOLDED]. | |
768 void _fetchBlockScalar({bool literal: false}) { | |
769 _removeSimpleKey(); | |
770 _simpleKeyAllowed = true; | |
771 _tokens.add(_scanBlockScalar(literal: literal)); | |
772 } | |
773 | |
774 /// Produces a [TokenType.SCALAR] token with style [ScalarStyle.SINGLE_QUOTED] | |
775 /// or [ScalarStyle.DOUBLE_QUOTED]. | |
776 void _fetchFlowScalar({bool singleQuote: false}) { | |
777 _saveSimpleKey(); | |
778 _simpleKeyAllowed = false; | |
779 _tokens.add(_scanFlowScalar(singleQuote: singleQuote)); | |
780 } | |
781 | |
782 /// Produces a [TokenType.SCALAR] token with style [ScalarStyle.PLAIN]. | |
783 void _fetchPlainScalar() { | |
784 _saveSimpleKey(); | |
785 _simpleKeyAllowed = false; | |
786 _tokens.add(_scanPlainScalar()); | |
787 } | |
788 | |
789 /// Eats whitespace and comments until the next token is found. | |
790 void _scanToNextToken() { | |
791 var afterLineBreak = false; | |
792 while (true) { | |
793 // Allow the BOM to start a line. | |
794 if (_scanner.column == 0) _scanner.scan("\uFEFF"); | |
795 | |
796 // Eat whitespace. | |
797 // | |
798 // libyaml disallows tabs after "-", "?", or ":", but the spec allows | |
799 // them. See section 6.2: http://yaml.org/spec/1.2/spec.html#id2778241. | |
800 while (_scanner.peekChar() == SP || | |
801 ((!_inBlockContext || !afterLineBreak) && | |
802 _scanner.peekChar() == TAB)) { | |
803 _scanner.readChar(); | |
804 } | |
805 | |
806 if (_scanner.peekChar() == TAB) { | |
807 _scanner.error("Tab characters are not allowed as indentation.", | |
808 length: 1); | |
809 } | |
810 | |
811 // Eat a comment until a line break. | |
812 _skipComment(); | |
813 | |
814 // If we're at a line break, eat it. | |
815 if (_isBreak) { | |
816 _skipLine(); | |
817 | |
818 // In the block context, a new line may start a simple key. | |
819 if (_inBlockContext) _simpleKeyAllowed = true; | |
820 afterLineBreak = true; | |
821 } else { | |
822 // Otherwise we've found a token. | |
823 break; | |
824 } | |
825 } | |
826 } | |
827 | |
828 /// Scans a [TokenType.YAML_DIRECTIVE] or [TokenType.TAG_DIRECTIVE] token. | |
829 /// | |
830 /// %YAML 1.2 # a comment \n | |
831 /// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
832 /// %TAG !yaml! tag:yaml.org,2002: \n | |
833 /// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
834 Token _scanDirective() { | |
835 var start = _scanner.state; | |
836 | |
837 // Eat '%'. | |
838 _scanner.readChar(); | |
839 | |
840 var token; | |
841 var name = _scanDirectiveName(); | |
842 if (name == "YAML") { | |
843 token = _scanVersionDirectiveValue(start); | |
844 } else if (name == "TAG") { | |
845 token = _scanTagDirectiveValue(start); | |
846 } else { | |
847 warn("Warning: unknown directive.", _scanner.spanFrom(start)); | |
848 | |
849 // libyaml doesn't support unknown directives, but the spec says to ignore | |
850 // them and warn: http://yaml.org/spec/1.2/spec.html#id2781147. | |
851 while (!_isBreakOrEnd) { | |
852 _scanner.readChar(); | |
853 } | |
854 | |
855 return null; | |
856 } | |
857 | |
858 // Eat the rest of the line, including any comments. | |
859 _skipBlanks(); | |
860 _skipComment(); | |
861 | |
862 if (!_isBreakOrEnd) { | |
863 throw new YamlException( | |
864 "Expected comment or line break after directive.", | |
865 _scanner.spanFrom(start)); | |
866 } | |
867 | |
868 _skipLine(); | |
869 return token; | |
870 } | |
871 | |
872 /// Scans a directive name. | |
873 /// | |
874 /// %YAML 1.2 # a comment \n | |
875 /// ^^^^ | |
876 /// %TAG !yaml! tag:yaml.org,2002: \n | |
877 /// ^^^ | |
878 String _scanDirectiveName() { | |
879 // libyaml only allows word characters in directive names, but the spec | |
880 // disagrees: http://yaml.org/spec/1.2/spec.html#ns-directive-name. | |
881 var start = _scanner.position; | |
882 while (_isNonSpace) { | |
883 _scanner.readChar(); | |
884 } | |
885 | |
886 var name = _scanner.substring(start); | |
887 if (name.isEmpty) { | |
888 throw new YamlException("Expected directive name.", _scanner.emptySpan); | |
889 } else if (!_isBlankOrEnd) { | |
890 throw new YamlException( | |
891 "Unexpected character in directive name.", _scanner.emptySpan); | |
892 } | |
893 | |
894 return name; | |
895 } | |
896 | |
897 /// Scans the value of a version directive. | |
898 /// | |
899 /// %YAML 1.2 # a comment \n | |
900 /// ^^^^^^ | |
901 Token _scanVersionDirectiveValue(LineScannerState start) { | |
902 _skipBlanks(); | |
903 | |
904 var major = _scanVersionDirectiveNumber(); | |
905 _scanner.expect('.'); | |
906 var minor = _scanVersionDirectiveNumber(); | |
907 | |
908 return new VersionDirectiveToken(_scanner.spanFrom(start), major, minor); | |
909 } | |
910 | |
911 /// Scans the version number of a version directive. | |
912 /// | |
913 /// %YAML 1.2 # a comment \n | |
914 /// ^ | |
915 /// %YAML 1.2 # a comment \n | |
916 /// ^ | |
917 int _scanVersionDirectiveNumber() { | |
918 var start = _scanner.position; | |
919 while (_isDigit) { | |
920 _scanner.readChar(); | |
921 } | |
922 | |
923 var number = _scanner.substring(start); | |
924 if (number.isEmpty) { | |
925 throw new YamlException("Expected version number.", _scanner.emptySpan); | |
926 } | |
927 | |
928 return int.parse(number); | |
929 } | |
930 | |
931 /// Scans the value of a tag directive. | |
932 /// | |
933 /// %TAG !yaml! tag:yaml.org,2002: \n | |
934 /// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
935 Token _scanTagDirectiveValue(LineScannerState start) { | |
936 _skipBlanks(); | |
937 | |
938 var handle = _scanTagHandle(directive: true); | |
939 if (!_isBlank) { | |
940 throw new YamlException("Expected whitespace.", _scanner.emptySpan); | |
941 } | |
942 | |
943 _skipBlanks(); | |
944 | |
945 var prefix = _scanTagUri(); | |
946 if (!_isBlankOrEnd) { | |
947 throw new YamlException("Expected whitespace.", _scanner.emptySpan); | |
948 } | |
949 | |
950 return new TagDirectiveToken(_scanner.spanFrom(start), handle, prefix); | |
951 } | |
952 | |
953 /// Scans a [TokenType.ANCHOR] token. | |
954 Token _scanAnchor({bool anchor: true}) { | |
955 var start = _scanner.state; | |
956 | |
957 // Eat the indicator character. | |
958 _scanner.readChar(); | |
959 | |
960 // libyaml only allows word characters in anchor names, but the spec | |
961 // disagrees: http://yaml.org/spec/1.2/spec.html#ns-anchor-char. | |
962 var startPosition = _scanner.position; | |
963 while (_isAnchorChar) { | |
964 _scanner.readChar(); | |
965 } | |
966 var name = _scanner.substring(startPosition); | |
967 | |
968 var next = _scanner.peekChar(); | |
969 if (name.isEmpty || | |
970 (!_isBlankOrEnd && next != QUESTION && next != COLON && | |
971 next != COMMA && next != RIGHT_SQUARE && next != RIGHT_CURLY && | |
972 next != PERCENT && next != AT && next != GRAVE_ACCENT)) { | |
973 throw new YamlException("Expected alphanumeric character.", | |
974 _scanner.emptySpan); | |
975 } | |
976 | |
977 if (anchor) { | |
978 return new AnchorToken(_scanner.spanFrom(start), name); | |
979 } else { | |
980 return new AliasToken(_scanner.spanFrom(start), name); | |
981 } | |
982 } | |
983 | |
984 /// Scans a [TokenType.TAG] token. | |
985 Token _scanTag() { | |
986 var handle; | |
987 var suffix; | |
988 var start = _scanner.state; | |
989 | |
990 // Check if the tag is in the canonical form. | |
991 if (_scanner.peekChar(1) == LEFT_ANGLE) { | |
992 // Eat '!<'. | |
993 _scanner.readChar(); | |
994 _scanner.readChar(); | |
995 | |
996 handle = ''; | |
997 suffix = _scanTagUri(); | |
998 | |
999 _scanner.expect('>'); | |
1000 } else { | |
1001 // The tag has either the '!suffix' or the '!handle!suffix' form. | |
1002 | |
1003 // First, try to scan a handle. | |
1004 handle = _scanTagHandle(); | |
1005 | |
1006 if (handle.length > 1 && handle.startsWith('!') && handle.endsWith('!')) { | |
1007 suffix = _scanTagUri(flowSeparators: false); | |
1008 } else { | |
1009 suffix = _scanTagUri(head: handle, flowSeparators: false); | |
1010 | |
1011 // There was no explicit handle. | |
1012 if (suffix.isEmpty) { | |
1013 // This is the special '!' tag. | |
1014 handle = null; | |
1015 suffix = '!'; | |
1016 } else { | |
1017 handle = '!'; | |
1018 } | |
1019 } | |
1020 } | |
1021 | |
1022 // libyaml insists on whitespace after a tag, but example 7.2 indicates | |
1023 // that it's not required: http://yaml.org/spec/1.2/spec.html#id2786720. | |
1024 | |
1025 return new TagToken(_scanner.spanFrom(start), handle, suffix); | |
1026 } | |
1027 | |
1028 /// Scans a tag handle. | |
1029 String _scanTagHandle({bool directive: false}) { | |
1030 _scanner.expect('!'); | |
1031 | |
1032 var buffer = new StringBuffer('!'); | |
1033 | |
1034 // libyaml only allows word characters in tags, but the spec disagrees: | |
1035 // http://yaml.org/spec/1.2/spec.html#ns-tag-char. | |
1036 var start = _scanner.position; | |
1037 while (_isTagChar) { | |
1038 _scanner.readChar(); | |
1039 } | |
1040 buffer.write(_scanner.substring(start)); | |
1041 | |
1042 if (_scanner.peekChar() == EXCLAMATION) { | |
1043 buffer.writeCharCode(_scanner.readChar()); | |
1044 } else { | |
1045 // It's either the '!' tag or not really a tag handle. If it's a %TAG | |
1046 // directive, it's an error. If it's a tag token, it must be part of a | |
1047 // URI. | |
1048 if (directive && buffer.toString() != '!') _scanner.expect('!'); | |
1049 } | |
1050 | |
1051 return buffer.toString(); | |
1052 } | |
1053 | |
1054 /// Scans a tag URI. | |
1055 /// | |
1056 /// [head] is the initial portion of the tag that's already been scanned. | |
1057 /// [flowSeparators] indicates whether the tag URI can contain flow | |
1058 /// separators. | |
1059 String _scanTagUri({String head, bool flowSeparators: true}) { | |
1060 var length = head == null ? 0 : head.length; | |
1061 var buffer = new StringBuffer(); | |
1062 | |
1063 // Copy the head if needed. | |
1064 // | |
1065 // Note that we don't copy the leading '!' character. | |
1066 if (length > 1) buffer.write(head.substring(1)); | |
1067 | |
1068 // The set of characters that may appear in URI is as follows: | |
1069 // | |
1070 // '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&', | |
1071 // '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']', | |
1072 // '%'. | |
1073 // | |
1074 // In a shorthand tag annotation, the flow separators ',', '[', and ']' are | |
1075 // disallowed. | |
1076 var start = _scanner.position; | |
1077 var char = _scanner.peekChar(); | |
1078 while (_isTagChar || (flowSeparators && | |
1079 (char == COMMA || char == LEFT_SQUARE || char == RIGHT_SQUARE))) { | |
1080 _scanner.readChar(); | |
1081 char = _scanner.peekChar(); | |
1082 } | |
1083 | |
1084 // libyaml manually decodes the URL, but we don't have to do that. | |
1085 return Uri.decodeFull(_scanner.substring(start)); | |
1086 } | |
1087 | |
1088 /// Scans a block scalar. | |
1089 Token _scanBlockScalar({bool literal: false}) { | |
1090 var start = _scanner.state; | |
1091 | |
1092 // Eat the indicator '|' or '>'. | |
1093 _scanner.readChar(); | |
1094 | |
1095 // Check for a chomping indicator. | |
1096 var chomping = _Chomping.CLIP; | |
1097 var increment = 0; | |
1098 var char = _scanner.peekChar(); | |
1099 if (char == PLUS || char == HYPHEN) { | |
1100 chomping = char == PLUS ? _Chomping.KEEP : _Chomping.STRIP; | |
1101 _scanner.readChar(); | |
1102 | |
1103 // Check for an indentation indicator. | |
1104 if (_isDigit) { | |
1105 // Check that the indentation is greater than 0. | |
1106 if (_scanner.peekChar() == NUMBER_0) { | |
1107 throw new YamlException( | |
1108 "0 may not be used as an indentation indicator.", | |
1109 _scanner.spanFrom(start)); | |
1110 } | |
1111 | |
1112 increment = _scanner.readChar() - NUMBER_0; | |
1113 } | |
1114 } else if (_isDigit) { | |
1115 // Do the same as above, but in the opposite order. | |
1116 if (_scanner.peekChar() == NUMBER_0) { | |
1117 throw new YamlException( | |
1118 "0 may not be used as an indentation indicator.", | |
1119 _scanner.spanFrom(start)); | |
1120 } | |
1121 | |
1122 increment = _scanner.readChar() - NUMBER_0; | |
1123 | |
1124 char = _scanner.peekChar(); | |
1125 if (char == PLUS || char == HYPHEN) { | |
1126 chomping = char == PLUS ? _Chomping.KEEP : _Chomping.STRIP; | |
1127 _scanner.readChar(); | |
1128 } | |
1129 } | |
1130 | |
1131 // Eat whitespace and comments to the end of the line. | |
1132 _skipBlanks(); | |
1133 _skipComment(); | |
1134 | |
1135 // Check if we're at the end of the line. | |
1136 if (!_isBreakOrEnd) { | |
1137 throw new YamlException("Expected comment or line break.", | |
1138 _scanner.emptySpan); | |
1139 } | |
1140 | |
1141 _skipLine(); | |
1142 | |
1143 // If the block scalar has an explicit indentation indicator, add that to | |
1144 // the current indentation to get the indentation level for the scalar's | |
1145 // contents. | |
1146 var indent = 0; | |
1147 if (increment != 0) { | |
1148 indent = _indent >= 0 ? _indent + increment : increment; | |
1149 } | |
1150 | |
1151 // Scan the leading line breaks to determine the indentation level if | |
1152 // needed. | |
1153 var pair = _scanBlockScalarBreaks(indent); | |
1154 indent = pair.first; | |
1155 var trailingBreaks = pair.last; | |
1156 | |
1157 // Scan the block scalar contents. | |
1158 var buffer = new StringBuffer(); | |
1159 var leadingBreak = ''; | |
1160 var leadingBlank = false; | |
1161 var trailingBlank = false; | |
1162 var end = _scanner.state; | |
1163 while (_scanner.column == indent && !_scanner.isDone) { | |
1164 // Check for a document indicator. libyaml doesn't do this, but the spec | |
1165 // mandates it. See example 9.5: | |
1166 // http://yaml.org/spec/1.2/spec.html#id2801606. | |
1167 if (_isDocumentIndicator) break; | |
1168 | |
1169 // We are at the beginning of a non-empty line. | |
1170 | |
1171 // Is there trailing whitespace? | |
1172 trailingBlank = _isBlank; | |
1173 | |
1174 // Check if we need to fold the leading line break. | |
1175 if (!literal && leadingBreak.isNotEmpty && !leadingBlank && | |
1176 !trailingBlank) { | |
1177 // Do we need to join the lines with a space? | |
1178 if (trailingBreaks.isEmpty) buffer.writeCharCode(SP); | |
1179 } else { | |
1180 buffer.write(leadingBreak); | |
1181 } | |
1182 leadingBreak = ''; | |
1183 | |
1184 // Append the remaining line breaks. | |
1185 buffer.write(trailingBreaks); | |
1186 | |
1187 // Is there leading whitespace? | |
1188 leadingBlank = _isBlank; | |
1189 | |
1190 var startPosition = _scanner.position; | |
1191 while (!_isBreakOrEnd) { | |
1192 _scanner.readChar(); | |
1193 } | |
1194 buffer.write(_scanner.substring(startPosition)); | |
1195 end = _scanner.state; | |
1196 | |
1197 // libyaml always reads a line here, but this breaks on block scalars at | |
1198 // the end of the document that end without newlines. See example 8.1: | |
1199 // http://yaml.org/spec/1.2/spec.html#id2793888. | |
1200 if (!_scanner.isDone) leadingBreak = _readLine(); | |
1201 | |
1202 // Eat the following indentation and spaces. | |
1203 var pair = _scanBlockScalarBreaks(indent); | |
1204 indent = pair.first; | |
1205 trailingBreaks = pair.last; | |
1206 } | |
1207 | |
1208 // Chomp the tail. | |
1209 if (chomping != _Chomping.STRIP) buffer.write(leadingBreak); | |
1210 if (chomping == _Chomping.KEEP) buffer.write(trailingBreaks); | |
1211 | |
1212 return new ScalarToken(_scanner.spanFrom(start, end), buffer.toString(), | |
1213 literal ? ScalarStyle.LITERAL : ScalarStyle.FOLDED); | |
1214 } | |
1215 | |
1216 /// Scans indentation spaces and line breaks for a block scalar. | |
1217 /// | |
1218 /// Determines the intendation level if needed. Returns the new indentation | |
1219 /// level and the text of the line breaks. | |
1220 Pair<int, String> _scanBlockScalarBreaks(int indent) { | |
1221 var maxIndent = 0; | |
1222 var breaks = new StringBuffer(); | |
1223 | |
1224 while (true) { | |
1225 while ((indent == 0 || _scanner.column < indent) && | |
1226 _scanner.peekChar() == SP) { | |
1227 _scanner.readChar(); | |
1228 } | |
1229 | |
1230 if (_scanner.column > maxIndent) maxIndent = _scanner.column; | |
1231 | |
1232 // libyaml throws an error here if a tab character is detected, but the | |
1233 // spec treats tabs like any other non-space character. See example 8.2: | |
1234 // http://yaml.org/spec/1.2/spec.html#id2794311. | |
1235 | |
1236 if (!_isBreak) break; | |
1237 breaks.write(_readLine()); | |
1238 } | |
1239 | |
1240 if (indent == 0) { | |
1241 indent = maxIndent; | |
1242 if (indent < _indent + 1) indent = _indent + 1; | |
1243 | |
1244 // libyaml forces indent to be at least 1 here, but that doesn't seem to | |
1245 // be supported by the spec. | |
1246 } | |
1247 | |
1248 return new Pair(indent, breaks.toString()); | |
1249 } | |
1250 | |
1251 // Scans a quoted scalar. | |
1252 Token _scanFlowScalar({bool singleQuote: false}) { | |
1253 var start = _scanner.state; | |
1254 var buffer = new StringBuffer(); | |
1255 | |
1256 // Eat the left quote. | |
1257 _scanner.readChar(); | |
1258 | |
1259 while (true) { | |
1260 // Check that there are no document indicators at the beginning of the | |
1261 // line. | |
1262 if (_isDocumentIndicator) { | |
1263 _scanner.error("Unexpected document indicator."); | |
1264 } | |
1265 | |
1266 if (_scanner.isDone) { | |
1267 throw new YamlException("Unexpected end of file.", _scanner.emptySpan); | |
1268 } | |
1269 | |
1270 var leadingBlanks = false; | |
1271 while (!_isBlankOrEnd) { | |
1272 var char = _scanner.peekChar(); | |
1273 if (singleQuote && char == SINGLE_QUOTE && | |
1274 _scanner.peekChar(1) == SINGLE_QUOTE) { | |
1275 // An escaped single quote. | |
1276 _scanner.readChar(); | |
1277 _scanner.readChar(); | |
1278 buffer.writeCharCode(SINGLE_QUOTE); | |
1279 } else if (char == (singleQuote ? SINGLE_QUOTE : DOUBLE_QUOTE)) { | |
1280 // The closing quote. | |
1281 break; | |
1282 } else if (!singleQuote && char == BACKSLASH && _isBreakAt(1)) { | |
1283 // An escaped newline. | |
1284 _scanner.readChar(); | |
1285 _skipLine(); | |
1286 leadingBlanks = true; | |
1287 break; | |
1288 } else if (!singleQuote && char == BACKSLASH) { | |
1289 var escapeStart = _scanner.state; | |
1290 | |
1291 // An escape sequence. | |
1292 var codeLength = null; | |
1293 switch (_scanner.peekChar(1)) { | |
1294 case NUMBER_0: | |
1295 buffer.writeCharCode(NULL); | |
1296 break; | |
1297 case LETTER_A: | |
1298 buffer.writeCharCode(BELL); | |
1299 break; | |
1300 case LETTER_B: | |
1301 buffer.writeCharCode(BACKSPACE); | |
1302 break; | |
1303 case LETTER_T: | |
1304 case TAB: | |
1305 buffer.writeCharCode(TAB); | |
1306 break; | |
1307 case LETTER_N: | |
1308 buffer.writeCharCode(LF); | |
1309 break; | |
1310 case LETTER_V: | |
1311 buffer.writeCharCode(VERTICAL_TAB); | |
1312 break; | |
1313 case LETTER_F: | |
1314 buffer.writeCharCode(FORM_FEED); | |
1315 break; | |
1316 case LETTER_R: | |
1317 buffer.writeCharCode(CR); | |
1318 break; | |
1319 case LETTER_E: | |
1320 buffer.writeCharCode(ESCAPE); | |
1321 break; | |
1322 case SP: | |
1323 case DOUBLE_QUOTE: | |
1324 case SLASH: | |
1325 case BACKSLASH: | |
1326 // libyaml doesn't support an escaped forward slash, but it was | |
1327 // added in YAML 1.2. See section 5.7: | |
1328 // http://yaml.org/spec/1.2/spec.html#id2776092 | |
1329 buffer.writeCharCode(_scanner.peekChar(1)); | |
1330 break; | |
1331 case LETTER_CAP_N: | |
1332 buffer.writeCharCode(NEL); | |
1333 break; | |
1334 case UNDERSCORE: | |
1335 buffer.writeCharCode(NBSP); | |
1336 break; | |
1337 case LETTER_CAP_L: | |
1338 buffer.writeCharCode(LINE_SEPARATOR); | |
1339 break; | |
1340 case LETTER_CAP_P: | |
1341 buffer.writeCharCode(PARAGRAPH_SEPARATOR); | |
1342 break; | |
1343 case LETTER_X: | |
1344 codeLength = 2; | |
1345 break; | |
1346 case LETTER_U: | |
1347 codeLength = 4; | |
1348 break; | |
1349 case LETTER_CAP_U: | |
1350 codeLength = 8; | |
1351 break; | |
1352 default: | |
1353 throw new YamlException("Unknown escape character.", | |
1354 _scanner.spanFrom(escapeStart)); | |
1355 } | |
1356 | |
1357 _scanner.readChar(); | |
1358 _scanner.readChar(); | |
1359 | |
1360 if (codeLength != null) { | |
1361 var value = 0; | |
1362 for (var i = 0; i < codeLength; i++) { | |
1363 if (!_isHex) { | |
1364 _scanner.readChar(); | |
1365 throw new YamlException( | |
1366 "Expected $codeLength-digit hexidecimal number.", | |
1367 _scanner.spanFrom(escapeStart)); | |
1368 } | |
1369 | |
1370 value = (value << 4) + _asHex(_scanner.readChar()); | |
1371 } | |
1372 | |
1373 // Check the value and write the character. | |
1374 if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) { | |
1375 throw new YamlException( | |
1376 "Invalid Unicode character escape code.", | |
1377 _scanner.spanFrom(escapeStart)); | |
1378 } | |
1379 | |
1380 buffer.writeCharCode(value); | |
1381 } | |
1382 } else { | |
1383 buffer.writeCharCode(_scanner.readChar()); | |
1384 } | |
1385 } | |
1386 | |
1387 // Check if we're at the end of a scalar. | |
1388 if (_scanner.peekChar() == (singleQuote ? SINGLE_QUOTE : DOUBLE_QUOTE)) { | |
1389 break; | |
1390 } | |
1391 | |
1392 var whitespace = new StringBuffer(); | |
1393 var leadingBreak = ''; | |
1394 var trailingBreaks = new StringBuffer(); | |
1395 while (_isBlank || _isBreak) { | |
1396 if (_isBlank) { | |
1397 // Consume a space or a tab. | |
1398 if (!leadingBlanks) { | |
1399 whitespace.writeCharCode(_scanner.readChar()); | |
1400 } else { | |
1401 _scanner.readChar(); | |
1402 } | |
1403 } else { | |
1404 // Check if it's a first line break. | |
1405 if (!leadingBlanks) { | |
1406 whitespace.clear(); | |
1407 leadingBreak = _readLine(); | |
1408 leadingBlanks = true; | |
1409 } else { | |
1410 trailingBreaks.write(_readLine()); | |
1411 } | |
1412 } | |
1413 } | |
1414 | |
1415 // Join the whitespace or fold line breaks. | |
1416 if (leadingBlanks) { | |
1417 if (leadingBreak.isNotEmpty && trailingBreaks.isEmpty) { | |
1418 buffer.writeCharCode(SP); | |
1419 } else { | |
1420 buffer.write(trailingBreaks); | |
1421 } | |
1422 } else { | |
1423 buffer.write(whitespace); | |
1424 whitespace.clear(); | |
1425 } | |
1426 } | |
1427 | |
1428 // Eat the right quote. | |
1429 _scanner.readChar(); | |
1430 | |
1431 return new ScalarToken(_scanner.spanFrom(start), buffer.toString(), | |
1432 singleQuote ? ScalarStyle.SINGLE_QUOTED : ScalarStyle.DOUBLE_QUOTED); | |
1433 } | |
1434 | |
1435 /// Scans a plain scalar. | |
1436 Token _scanPlainScalar() { | |
1437 var start = _scanner.state; | |
1438 var end = _scanner.state; | |
1439 var buffer = new StringBuffer(); | |
1440 var leadingBreak = ''; | |
1441 var trailingBreaks = ''; | |
1442 var whitespace = new StringBuffer(); | |
1443 var indent = _indent + 1; | |
1444 | |
1445 while (true) { | |
1446 // Check for a document indicator. | |
1447 if (_isDocumentIndicator) break; | |
1448 | |
1449 // Check for a comment. | |
1450 if (_scanner.peekChar() == HASH) break; | |
1451 | |
1452 if (_isPlainChar) { | |
1453 // Join the whitespace or fold line breaks. | |
1454 if (leadingBreak.isNotEmpty) { | |
1455 if (trailingBreaks.isEmpty) { | |
1456 buffer.writeCharCode(SP); | |
1457 } else { | |
1458 buffer.write(trailingBreaks); | |
1459 } | |
1460 leadingBreak = ''; | |
1461 trailingBreaks = ''; | |
1462 } else { | |
1463 buffer.write(whitespace); | |
1464 whitespace.clear(); | |
1465 } | |
1466 } | |
1467 | |
1468 // libyaml's notion of valid identifiers differs substantially from YAML | |
1469 // 1.2's. We use [_isPlainChar] instead of libyaml's character here. | |
1470 var startPosition = _scanner.position; | |
1471 while (_isPlainChar) { | |
1472 _scanner.readChar(); | |
1473 } | |
1474 buffer.write(_scanner.substring(startPosition)); | |
1475 end = _scanner.state; | |
1476 | |
1477 // Is it the end? | |
1478 if (!_isBlank && !_isBreak) break; | |
1479 | |
1480 while (_isBlank || _isBreak) { | |
1481 if (_isBlank) { | |
1482 // Check for a tab character messing up the intendation. | |
1483 if (leadingBreak.isNotEmpty && _scanner.column < indent && | |
1484 _scanner.peekChar() == TAB) { | |
1485 _scanner.error("Expected a space but found a tab.", length: 1); | |
1486 } | |
1487 | |
1488 if (leadingBreak.isEmpty) { | |
1489 whitespace.writeCharCode(_scanner.readChar()); | |
1490 } else { | |
1491 _scanner.readChar(); | |
1492 } | |
1493 } else { | |
1494 // Check if it's a first line break. | |
1495 if (leadingBreak.isEmpty) { | |
1496 leadingBreak = _readLine(); | |
1497 whitespace.clear(); | |
1498 } else { | |
1499 trailingBreaks = _readLine(); | |
1500 } | |
1501 } | |
1502 } | |
1503 | |
1504 // Check the indentation level. | |
1505 if (_inBlockContext && _scanner.column < indent) break; | |
1506 } | |
1507 | |
1508 // Allow a simple key after a plain scalar with leading blanks. | |
1509 if (leadingBreak.isNotEmpty) _simpleKeyAllowed = true; | |
1510 | |
1511 return new ScalarToken(_scanner.spanFrom(start, end), buffer.toString(), | |
1512 ScalarStyle.PLAIN); | |
1513 } | |
1514 | |
1515 /// Moves past the current line break, if there is one. | |
1516 void _skipLine() { | |
1517 var char = _scanner.peekChar(); | |
1518 if (char != CR && char != LF) return; | |
1519 _scanner.readChar(); | |
1520 if (char == CR && _scanner.peekChar() == LF) _scanner.readChar(); | |
1521 } | |
1522 | |
1523 // Moves past the current line break and returns a newline. | |
1524 String _readLine() { | |
1525 var char = _scanner.peekChar(); | |
1526 | |
1527 // libyaml supports NEL, PS, and LS characters as line separators, but this | |
1528 // is explicitly forbidden in section 5.4 of the YAML spec. | |
1529 if (char != CR && char != LF) { | |
1530 throw new YamlException("Expected newline.", _scanner.emptySpan); | |
1531 } | |
1532 | |
1533 _scanner.readChar(); | |
1534 // CR LF | CR | LF -> LF | |
1535 if (char == CR && _scanner.peekChar() == LF) _scanner.readChar(); | |
1536 return "\n"; | |
1537 } | |
1538 | |
1539 // Returns whether the character at [offset] is whitespace. | |
1540 bool _isBlankAt(int offset) { | |
1541 var char = _scanner.peekChar(offset); | |
1542 return char == SP || char == TAB; | |
1543 } | |
1544 | |
1545 // Returns whether the character at [offset] is a line break. | |
1546 bool _isBreakAt(int offset) { | |
1547 // Libyaml considers NEL, LS, and PS to be line breaks as well, but that's | |
1548 // contrary to the spec. | |
1549 var char = _scanner.peekChar(offset); | |
1550 return char == CR || char == LF; | |
1551 } | |
1552 | |
1553 // Returns whether the character at [offset] is whitespace or past the end of | |
1554 // the source. | |
1555 bool _isBlankOrEndAt(int offset) { | |
1556 var char = _scanner.peekChar(offset); | |
1557 return char == null || char == SP || char == TAB || char == CR || | |
1558 char == LF; | |
1559 } | |
1560 | |
1561 /// Returns whether the character at [offset] is a plain character. | |
1562 /// | |
1563 /// See http://yaml.org/spec/1.2/spec.html#ns-plain-char(c). | |
1564 bool _isPlainCharAt(int offset) { | |
1565 switch (_scanner.peekChar(offset)) { | |
1566 case COLON: | |
1567 return _isPlainSafeAt(offset + 1); | |
1568 case HASH: | |
1569 var previous = _scanner.peekChar(offset - 1); | |
1570 return previous != SP && previous != TAB; | |
1571 default: | |
1572 return _isPlainSafeAt(offset); | |
1573 } | |
1574 } | |
1575 | |
1576 /// Returns whether the character at [offset] is a plain-safe character. | |
1577 /// | |
1578 /// See http://yaml.org/spec/1.2/spec.html#ns-plain-safe(c). | |
1579 bool _isPlainSafeAt(int offset) { | |
1580 var char = _scanner.peekChar(offset); | |
1581 switch (char) { | |
1582 case COMMA: | |
1583 case LEFT_SQUARE: | |
1584 case RIGHT_SQUARE: | |
1585 case LEFT_CURLY: | |
1586 case RIGHT_CURLY: | |
1587 // These characters are delimiters in a flow context and thus are only | |
1588 // safe in a block context. | |
1589 return _inBlockContext; | |
1590 case SP: | |
1591 case TAB: | |
1592 case LF: | |
1593 case CR: | |
1594 case BOM: | |
1595 return false; | |
1596 case NEL: | |
1597 return true; | |
1598 default: | |
1599 return char != null && | |
1600 ((char >= 0x00020 && char <= 0x00007E) || | |
1601 (char >= 0x000A0 && char <= 0x00D7FF) || | |
1602 (char >= 0x0E000 && char <= 0x00FFFD) || | |
1603 (char >= 0x10000 && char <= 0x10FFFF)); | |
1604 } | |
1605 } | |
1606 | |
1607 /// Returns the hexidecimal value of [char]. | |
1608 int _asHex(int char) { | |
1609 if (char <= NUMBER_9) return char - NUMBER_0; | |
1610 if (char <= LETTER_CAP_F) return 10 + char - LETTER_CAP_A; | |
1611 return 10 + char - LETTER_A; | |
1612 } | |
1613 | |
1614 /// Moves the scanner past any blank characters. | |
1615 void _skipBlanks() { | |
1616 while (_isBlank) { | |
1617 _scanner.readChar(); | |
1618 } | |
1619 } | |
1620 | |
1621 /// Moves the scanner past a comment, if one starts at the current position. | |
1622 void _skipComment() { | |
1623 if (_scanner.peekChar() != HASH) return; | |
1624 while (!_isBreakOrEnd) { | |
1625 _scanner.readChar(); | |
1626 } | |
1627 } | |
1628 } | |
1629 | |
1630 /// A record of the location of a potential simple key. | |
1631 class _SimpleKey { | |
1632 /// The index of the token that begins the simple key. | |
1633 /// | |
1634 /// This is the index relative to all tokens emitted, rather than relative to | |
1635 /// [_tokens]. | |
1636 final int tokenNumber; | |
1637 | |
1638 /// The source location of the beginning of the simple key. | |
1639 /// | |
1640 /// This is used for error reporting and for determining when a simple key is | |
1641 /// no longer on the current line. | |
1642 final SourceLocation location; | |
1643 | |
1644 /// The line on which the key appears. | |
1645 /// | |
1646 /// We could get this from [location], but that requires a binary search | |
1647 /// whereas this is O(1). | |
1648 final int line; | |
1649 | |
1650 /// The column on which the key appears. | |
1651 /// | |
1652 /// We could get this from [location], but that requires a binary search | |
1653 /// whereas this is O(1). | |
1654 final int column; | |
1655 | |
1656 /// Whether this key must exist for the document to be scanned. | |
1657 final bool required; | |
1658 | |
1659 _SimpleKey(this.tokenNumber, this.line, this.column, this.location, | |
1660 {bool required}) | |
1661 : required = required; | |
1662 } | |
1663 | |
1664 /// An enum of chomping indicators that describe how to handle trailing | |
1665 /// whitespace for a block scalar. | |
1666 /// | |
1667 /// See http://yaml.org/spec/1.2/spec.html#id2794534. | |
1668 class _Chomping { | |
1669 /// All trailing whitespace is discarded. | |
1670 static const STRIP = const _Chomping("STRIP"); | |
1671 | |
1672 /// A single trailing newline is retained. | |
1673 static const CLIP = const _Chomping("CLIP"); | |
1674 | |
1675 /// All trailing whitespace is preserved. | |
1676 static const KEEP = const _Chomping("KEEP"); | |
1677 | |
1678 final String name; | |
1679 | |
1680 const _Chomping(this.name); | |
1681 | |
1682 String toString() => name; | |
1683 } | |
OLD | NEW |