Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(969)

Side by Side Diff: yaml/lib/src/scanner.dart

Issue 1400473008: Roll Observatory packages and add a roll script (Closed) Base URL: git@github.com:dart-lang/observatory_pub_packages.git@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « yaml/lib/src/parser.dart ('k') | yaml/lib/src/style.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 library yaml.scanner;
6
7 import 'package:collection/collection.dart';
8 import 'package:string_scanner/string_scanner.dart';
9 import 'package:source_span/source_span.dart';
10
11 import 'style.dart';
12 import 'token.dart';
13 import 'utils.dart';
14 import 'yaml_exception.dart';
15
16 /// A scanner that reads a string of Unicode characters and emits [Token]s.
17 ///
18 /// This is based on the libyaml scanner, available at
19 /// https://github.com/yaml/libyaml/blob/master/src/scanner.c. The license for
20 /// that is available in ../../libyaml-license.txt.
21 class Scanner {
22 static const TAB = 0x9;
23 static const LF = 0xA;
24 static const CR = 0xD;
25 static const SP = 0x20;
26 static const DOLLAR = 0x24;
27 static const LEFT_PAREN = 0x28;
28 static const RIGHT_PAREN = 0x29;
29 static const PLUS = 0x2B;
30 static const COMMA = 0x2C;
31 static const HYPHEN = 0x2D;
32 static const PERIOD = 0x2E;
33 static const QUESTION = 0x3F;
34 static const COLON = 0x3A;
35 static const SEMICOLON = 0x3B;
36 static const EQUALS = 0x3D;
37 static const LEFT_SQUARE = 0x5B;
38 static const RIGHT_SQUARE = 0x5D;
39 static const LEFT_CURLY = 0x7B;
40 static const RIGHT_CURLY = 0x7D;
41 static const HASH = 0x23;
42 static const AMPERSAND = 0x26;
43 static const ASTERISK = 0x2A;
44 static const EXCLAMATION = 0x21;
45 static const VERTICAL_BAR = 0x7C;
46 static const LEFT_ANGLE = 0x3C;
47 static const RIGHT_ANGLE = 0x3E;
48 static const SINGLE_QUOTE = 0x27;
49 static const DOUBLE_QUOTE = 0x22;
50 static const PERCENT = 0x25;
51 static const AT = 0x40;
52 static const GRAVE_ACCENT = 0x60;
53 static const TILDE = 0x7E;
54
55 static const NULL = 0x0;
56 static const BELL = 0x7;
57 static const BACKSPACE = 0x8;
58 static const VERTICAL_TAB = 0xB;
59 static const FORM_FEED = 0xC;
60 static const ESCAPE = 0x1B;
61 static const SLASH = 0x2F;
62 static const BACKSLASH = 0x5C;
63 static const UNDERSCORE = 0x5F;
64 static const NEL = 0x85;
65 static const NBSP = 0xA0;
66 static const LINE_SEPARATOR = 0x2028;
67 static const PARAGRAPH_SEPARATOR = 0x2029;
68 static const BOM = 0xFEFF;
69
70 static const NUMBER_0 = 0x30;
71 static const NUMBER_9 = 0x39;
72
73 static const LETTER_A = 0x61;
74 static const LETTER_B = 0x62;
75 static const LETTER_E = 0x65;
76 static const LETTER_F = 0x66;
77 static const LETTER_N = 0x6E;
78 static const LETTER_R = 0x72;
79 static const LETTER_T = 0x74;
80 static const LETTER_U = 0x75;
81 static const LETTER_V = 0x76;
82 static const LETTER_X = 0x78;
83 static const LETTER_Z = 0x7A;
84
85 static const LETTER_CAP_A = 0x41;
86 static const LETTER_CAP_F = 0x46;
87 static const LETTER_CAP_L = 0x4C;
88 static const LETTER_CAP_N = 0x4E;
89 static const LETTER_CAP_P = 0x50;
90 static const LETTER_CAP_U = 0x55;
91 static const LETTER_CAP_X = 0x58;
92 static const LETTER_CAP_Z = 0x5A;
93
94 /// The underlying [SpanScanner] used to read characters from the source text.
95 ///
96 /// This is also used to track line and column information and to generate
97 /// [SourceSpan]s.
98 final SpanScanner _scanner;
99
100 /// Whether this scanner has produced a [TokenType.STREAM_START] token
101 /// indicating the beginning of the YAML stream.
102 var _streamStartProduced = false;
103
104 /// Whether this scanner has produced a [TokenType.STREAM_END] token
105 /// indicating the end of the YAML stream.
106 var _streamEndProduced = false;
107
108 /// The queue of tokens yet to be emitted.
109 ///
110 /// These are queued up in advance so that [TokenType.KEY] tokens can be
111 /// inserted once the scanner determines that a series of tokens represents a
112 /// mapping key.
113 final _tokens = new QueueList<Token>();
114
115 /// The number of tokens that have been emitted.
116 ///
117 /// This doesn't count tokens in [tokens].
118 var _tokensParsed = 0;
119
120 /// Whether the next token in [_tokens] is ready to be returned.
121 ///
122 /// It might not be ready if there may still be a [TokenType.KEY] inserted
123 /// before it.
124 var _tokenAvailable = false;
125
126 /// The stack of indent levels for the current nested block contexts.
127 ///
128 /// The YAML spec specifies that the initial indentation level is -1 spaces.
129 final _indents = <int>[-1];
130
131 /// Whether a simple key is allowed in this context.
132 ///
133 /// A simple key refers to any mapping key that doesn't have an explicit "?".
134 var _simpleKeyAllowed = true;
135
136 /// The stack of potential simple keys for each level of flow nesting.
137 ///
138 /// Entries in this list may be `null`, indicating that there is no valid
139 /// simple key for the associated level of nesting.
140 ///
141 /// When a ":" is parsed and there's a simple key available, a [TokenType.KEY]
142 /// token is inserted in [_tokens] before that key's token. This allows the
143 /// parser to tell that the key is intended to be a mapping key.
144 final _simpleKeys = <_SimpleKey>[null];
145
146 /// The current indentation level.
147 int get _indent => _indents.last;
148
149 /// Whether the scanner's currently positioned in a block-level structure (as
150 /// opposed to flow-level).
151 bool get _inBlockContext => _simpleKeys.length == 1;
152
153 /// Whether the current character is a line break or the end of the source.
154 bool get _isBreakOrEnd => _scanner.isDone || _isBreak;
155
156 /// Whether the current character is a line break.
157 bool get _isBreak => _isBreakAt(0);
158
159 /// Whether the current character is whitespace or the end of the source.
160 bool get _isBlankOrEnd => _isBlankOrEndAt(0);
161
162 /// Whether the current character is whitespace.
163 bool get _isBlank => _isBlankAt(0);
164
165 /// Whether the current character is a valid tag name character.
166 ///
167 /// See http://yaml.org/spec/1.2/spec.html#ns-tag-name.
168 bool get _isTagChar {
169 var char = _scanner.peekChar();
170 if (char == null) return false;
171 switch (char) {
172 case HYPHEN:
173 case SEMICOLON:
174 case SLASH:
175 case COLON:
176 case AT:
177 case AMPERSAND:
178 case EQUALS:
179 case PLUS:
180 case DOLLAR:
181 case PERIOD:
182 case TILDE:
183 case QUESTION:
184 case ASTERISK:
185 case SINGLE_QUOTE:
186 case LEFT_PAREN:
187 case RIGHT_PAREN:
188 case PERCENT:
189 return true;
190 default:
191 return (char >= NUMBER_0 && char <= NUMBER_9) ||
192 (char >= LETTER_A && char <= LETTER_Z) ||
193 (char >= LETTER_CAP_A && char <= LETTER_CAP_Z);
194 }
195 }
196
197 /// Whether the current character is a valid anchor name character.
198 ///
199 /// See http://yaml.org/spec/1.2/spec.html#ns-anchor-name.
200 bool get _isAnchorChar {
201 if (!_isNonSpace) return false;
202
203 switch (_scanner.peekChar()) {
204 case COMMA:
205 case LEFT_SQUARE:
206 case RIGHT_SQUARE:
207 case LEFT_CURLY:
208 case RIGHT_CURLY:
209 return false;
210 default:
211 return true;
212 }
213 }
214
215 /// Whether the character at the current position is a decimal digit.
216 bool get _isDigit {
217 var char = _scanner.peekChar();
218 return char != null && (char >= NUMBER_0 && char <= NUMBER_9);
219 }
220
221 /// Whether the character at the current position is a hexidecimal
222 /// digit.
223 bool get _isHex {
224 var char = _scanner.peekChar();
225 if (char == null) return false;
226 return (char >= NUMBER_0 && char <= NUMBER_9) ||
227 (char >= LETTER_A && char <= LETTER_F) ||
228 (char >= LETTER_CAP_A && char <= LETTER_CAP_F);
229 }
230
231 /// Whether the character at the current position is a plain character.
232 ///
233 /// See http://yaml.org/spec/1.2/spec.html#ns-plain-char(c).
234 bool get _isPlainChar => _isPlainCharAt(0);
235
236 /// Whether the character at the current position is a printable character
237 /// other than a line break or byte-order mark.
238 ///
239 /// See http://yaml.org/spec/1.2/spec.html#nb-char.
240 bool get _isNonBreak {
241 var char = _scanner.peekChar();
242 if (char == null) return false;
243 switch (char) {
244 case LF:
245 case CR:
246 case BOM:
247 return false;
248 case TAB:
249 case NEL:
250 return true;
251 default:
252 return (char >= 0x00020 && char <= 0x00007E) ||
253 (char >= 0x000A0 && char <= 0x00D7FF) ||
254 (char >= 0x0E000 && char <= 0x00FFFD) ||
255 (char >= 0x10000 && char <= 0x10FFFF);
256 }
257 }
258
259 /// Whether the character at the current position is a printable character
260 /// other than whitespace.
261 ///
262 /// See http://yaml.org/spec/1.2/spec.html#nb-char.
263 bool get _isNonSpace {
264 var char = _scanner.peekChar();
265 if (char == null) return false;
266 switch (char) {
267 case LF:
268 case CR:
269 case BOM:
270 case SP:
271 return false;
272 case NEL:
273 return true;
274 default:
275 return (char >= 0x00020 && char <= 0x00007E) ||
276 (char >= 0x000A0 && char <= 0x00D7FF) ||
277 (char >= 0x0E000 && char <= 0x00FFFD) ||
278 (char >= 0x10000 && char <= 0x10FFFF);
279 }
280 }
281
282 /// Returns Whether or not the current character begins a documentation
283 /// indicator.
284 ///
285 /// If so, this sets the scanner's last match to that indicator.
286 bool get _isDocumentIndicator {
287 return _scanner.column == 0 && _isBlankOrEndAt(3) &&
288 (_scanner.matches('---') || _scanner.matches('...'));
289 }
290
291 /// Creates a scanner that scans [source].
292 ///
293 /// [sourceUrl] can be a String or a [Uri].
294 Scanner(String source, {sourceUrl})
295 : _scanner = new SpanScanner.eager(source, sourceUrl: sourceUrl);
296
297 /// Consumes and returns the next token.
298 Token scan() {
299 if (_streamEndProduced) throw new StateError("Out of tokens.");
300 if (!_tokenAvailable) _fetchMoreTokens();
301
302 var token = _tokens.removeFirst();
303 _tokenAvailable = false;
304 _tokensParsed++;
305 _streamEndProduced = token is Token &&
306 token.type == TokenType.STREAM_END;
307 return token;
308 }
309
310 /// Consumes the next token and returns the one after that.
311 Token advance() {
312 scan();
313 return peek();
314 }
315
316 /// Returns the next token without consuming it.
317 Token peek() {
318 if (_streamEndProduced) return null;
319 if (!_tokenAvailable) _fetchMoreTokens();
320 return _tokens.first;
321 }
322
323 /// Ensures that [_tokens] contains at least one token which can be returned.
324 void _fetchMoreTokens() {
325 while (true) {
326 if (_tokens.isNotEmpty) {
327 _staleSimpleKeys();
328
329 // If the current token could be a simple key, we need to scan more
330 // tokens until we determine whether it is or not. Otherwise we might
331 // not emit the `KEY` token before we emit the value of the key.
332 if (!_simpleKeys.any((key) =>
333 key != null && key.tokenNumber == _tokensParsed)) {
334 break;
335 }
336 }
337
338 _fetchNextToken();
339 }
340 _tokenAvailable = true;
341 }
342
343 /// The dispatcher for token fetchers.
344 void _fetchNextToken() {
345 if (!_streamStartProduced) {
346 _fetchStreamStart();
347 return;
348 }
349
350 _scanToNextToken();
351 _staleSimpleKeys();
352 _unrollIndent(_scanner.column);
353
354 if (_scanner.isDone) {
355 _fetchStreamEnd();
356 return;
357 }
358
359 if (_scanner.column == 0) {
360 if (_scanner.peekChar() == PERCENT) {
361 _fetchDirective();
362 return;
363 }
364
365 if (_isBlankOrEndAt(3)) {
366 if (_scanner.matches('---')) {
367 _fetchDocumentIndicator(TokenType.DOCUMENT_START);
368 return;
369 }
370
371 if (_scanner.matches('...')) {
372 _fetchDocumentIndicator(TokenType.DOCUMENT_END);
373 return;
374 }
375 }
376 }
377
378 switch (_scanner.peekChar()) {
379 case LEFT_SQUARE:
380 _fetchFlowCollectionStart(TokenType.FLOW_SEQUENCE_START);
381 return;
382 case LEFT_CURLY:
383 _fetchFlowCollectionStart(TokenType.FLOW_MAPPING_START);
384 return;
385 case RIGHT_SQUARE:
386 _fetchFlowCollectionEnd(TokenType.FLOW_SEQUENCE_END);
387 return;
388 case RIGHT_CURLY:
389 _fetchFlowCollectionEnd(TokenType.FLOW_MAPPING_END);
390 return;
391 case COMMA:
392 _fetchFlowEntry();
393 return;
394 case ASTERISK:
395 _fetchAnchor(anchor: false);
396 return;
397 case AMPERSAND:
398 _fetchAnchor(anchor: true);
399 return;
400 case EXCLAMATION:
401 _fetchTag();
402 return;
403 case SINGLE_QUOTE:
404 _fetchFlowScalar(singleQuote: true);
405 return;
406 case DOUBLE_QUOTE:
407 _fetchFlowScalar(singleQuote: false);
408 return;
409 case VERTICAL_BAR:
410 if (!_inBlockContext) _invalidScalarCharacter();
411 _fetchBlockScalar(literal: true);
412 return;
413 case RIGHT_ANGLE:
414 if (!_inBlockContext) _invalidScalarCharacter();
415 _fetchBlockScalar(literal: false);
416 return;
417 case PERCENT:
418 case AT:
419 case GRAVE_ACCENT:
420 _invalidScalarCharacter();
421 return;
422
423 // These characters may sometimes begin plain scalars.
424 case HYPHEN:
425 if (_isPlainCharAt(1)) {
426 _fetchPlainScalar();
427 } else {
428 _fetchBlockEntry();
429 }
430 return;
431 case QUESTION:
432 if (_isPlainCharAt(1)) {
433 _fetchPlainScalar();
434 } else {
435 _fetchKey();
436 }
437 return;
438 case COLON:
439 if (!_inBlockContext && _tokens.isNotEmpty) {
440 // If a colon follows a "JSON-like" value (an explicit map or list, or
441 // a quoted string) it isn't required to have whitespace after it
442 // since it unambiguously describes a map.
443 var token = _tokens.last;
444 if (token.type == TokenType.FLOW_SEQUENCE_END ||
445 token.type == TokenType.FLOW_MAPPING_END ||
446 (token.type == TokenType.SCALAR && token.style.isQuoted)) {
447 _fetchValue();
448 return;
449 }
450 }
451
452 if (_isPlainCharAt(1)) {
453 _fetchPlainScalar();
454 } else {
455 _fetchValue();
456 }
457 return;
458 default:
459 if (!_isNonBreak) _invalidScalarCharacter();
460
461 _fetchPlainScalar();
462 return;
463 }
464
465 throw 'Inaccessible';
466 }
467
468 /// Throws an error about a disallowed character.
469 void _invalidScalarCharacter() =>
470 _scanner.error("Unexpected character.", length: 1);
471
472 /// Checks the list of potential simple keys and remove the positions that
473 /// cannot contain simple keys anymore.
474 void _staleSimpleKeys() {
475 for (var i = 0; i < _simpleKeys.length; i++) {
476 var key = _simpleKeys[i];
477 if (key == null) continue;
478
479 // libyaml requires that all simple keys be a single line and no longer
480 // than 1024 characters. However, in section 7.4.2 of the spec
481 // (http://yaml.org/spec/1.2/spec.html#id2790832), these restrictions are
482 // only applied when the curly braces are omitted. It's difficult to
483 // retain enough context to know which keys need to have the restriction
484 // placed on them, so for now we go the other direction and allow
485 // everything but multiline simple keys in a block context.
486 if (!_inBlockContext) continue;
487
488 if (key.line == _scanner.line) continue;
489
490 if (key.required) {
491 throw new YamlException("Expected ':'.", _scanner.emptySpan);
492 }
493
494 _simpleKeys[i] = null;
495 }
496 }
497
498 /// Checks if a simple key may start at the current position and saves it if
499 /// so.
500 void _saveSimpleKey() {
501 // A simple key is required at the current position if the scanner is in the
502 // block context and the current column coincides with the indentation
503 // level.
504 var required = _inBlockContext && _indent == _scanner.column;
505
506 // A simple key is required only when it is the first token in the current
507 // line. Therefore it is always allowed. But we add a check anyway.
508 assert(_simpleKeyAllowed || !required);
509
510 if (!_simpleKeyAllowed) return;
511
512 // If the current position may start a simple key, save it.
513 _removeSimpleKey();
514 _simpleKeys[_simpleKeys.length - 1] = new _SimpleKey(
515 _tokensParsed + _tokens.length,
516 _scanner.line,
517 _scanner.column,
518 _scanner.location,
519 required: required);
520 }
521
522 /// Removes a potential simple key at the current flow level.
523 void _removeSimpleKey() {
524 var key = _simpleKeys.last;
525 if (key != null && key.required) {
526 throw new YamlException("Could not find expected ':' for simple key.",
527 key.location.pointSpan());
528 }
529
530 _simpleKeys[_simpleKeys.length - 1] = null;
531 }
532
533 /// Increases the flow level and resizes the simple key list.
534 void _increaseFlowLevel() {
535 _simpleKeys.add(null);
536 }
537
538 /// Decreases the flow level.
539 void _decreaseFlowLevel() {
540 if (_inBlockContext) return;
541 _simpleKeys.removeLast();
542 }
543
544 /// Pushes the current indentation level to the stack and sets the new level
545 /// if [column] is greater than [_indent].
546 ///
547 /// If it is, appends or inserts the specified token into [_tokens]. If
548 /// [tokenNumber] is provided, the corresponding token will be replaced;
549 /// otherwise, the token will be added at the end.
550 void _rollIndent(int column, TokenType type, SourceLocation location,
551 {int tokenNumber}) {
552 if (!_inBlockContext) return;
553 if (_indent != -1 && _indent >= column) return;
554
555 // Push the current indentation level to the stack and set the new
556 // indentation level.
557 _indents.add(column);
558
559 // Create a token and insert it into the queue.
560 var token = new Token(type, location.pointSpan());
561 if (tokenNumber == null) {
562 _tokens.add(token);
563 } else {
564 _tokens.insert(tokenNumber - _tokensParsed, token);
565 }
566 }
567
568 /// Pops indentation levels from [_indents] until the current level becomes
569 /// less than or equal to [column].
570 ///
571 /// For each indentation level, appends a [TokenType.BLOCK_END] token.
572 void _unrollIndent(int column) {
573 if (!_inBlockContext) return;
574
575 while (_indent > column) {
576 _tokens.add(new Token(TokenType.BLOCK_END, _scanner.emptySpan));
577 _indents.removeLast();
578 }
579 }
580
581 /// Pops indentation levels from [_indents] until the current level resets to
582 /// -1.
583 ///
584 /// For each indentation level, appends a [TokenType.BLOCK_END] token.
585 void _resetIndent() => _unrollIndent(-1);
586
587 /// Produces a [TokenType.STREAM_START] token.
588 void _fetchStreamStart() {
589 // Much of libyaml's initialization logic here is done in variable
590 // initializers instead.
591 _streamStartProduced = true;
592 _tokens.add(new Token(TokenType.STREAM_START, _scanner.emptySpan));
593 }
594
595 /// Produces a [TokenType.STREAM_END] token.
596 void _fetchStreamEnd() {
597 _resetIndent();
598 _removeSimpleKey();
599 _simpleKeyAllowed = false;
600 _tokens.add(new Token(TokenType.STREAM_END, _scanner.emptySpan));
601 }
602
603 /// Produces a [TokenType.VERSION_DIRECTIVE] or [TokenType.TAG_DIRECTIVE]
604 /// token.
605 void _fetchDirective() {
606 _resetIndent();
607 _removeSimpleKey();
608 _simpleKeyAllowed = false;
609 var directive = _scanDirective();
610 if (directive != null) _tokens.add(directive);
611 }
612
613 /// Produces a [TokenType.DOCUMENT_START] or [TokenType.DOCUMENT_END] token.
614 void _fetchDocumentIndicator(TokenType type) {
615 _resetIndent();
616 _removeSimpleKey();
617 _simpleKeyAllowed = false;
618
619 // Consume the indicator token.
620 var start = _scanner.state;
621 _scanner.readChar();
622 _scanner.readChar();
623 _scanner.readChar();
624
625 _tokens.add(new Token(type, _scanner.spanFrom(start)));
626 }
627
628 /// Produces a [TokenType.FLOW_SEQUENCE_START] or
629 /// [TokenType.FLOW_MAPPING_START] token.
630 void _fetchFlowCollectionStart(TokenType type) {
631 _saveSimpleKey();
632 _increaseFlowLevel();
633 _simpleKeyAllowed = true;
634 _addCharToken(type);
635 }
636
637 /// Produces a [TokenType.FLOW_SEQUENCE_END] or [TokenType.FLOW_MAPPING_END]
638 /// token.
639 void _fetchFlowCollectionEnd(TokenType type) {
640 _removeSimpleKey();
641 _decreaseFlowLevel();
642 _simpleKeyAllowed = false;
643 _addCharToken(type);
644 }
645
646 /// Produces a [TokenType.FLOW_ENTRY] token.
647 void _fetchFlowEntry() {
648 _removeSimpleKey();
649 _simpleKeyAllowed = true;
650 _addCharToken(TokenType.FLOW_ENTRY);
651 }
652
653 /// Produces a [TokenType.BLOCK_ENTRY] token.
654 void _fetchBlockEntry() {
655 if (_inBlockContext) {
656 if (!_simpleKeyAllowed) {
657 throw new YamlException(
658 "Block sequence entries are not allowed here.",
659 _scanner.emptySpan);
660 }
661
662 _rollIndent(
663 _scanner.column,
664 TokenType.BLOCK_SEQUENCE_START,
665 _scanner.location);
666 } else {
667 // It is an error for the '-' indicator to occur in the flow context, but
668 // we let the Parser detect and report it because it's able to point to
669 // the context.
670 }
671
672 _removeSimpleKey();
673 _simpleKeyAllowed = true;
674 _addCharToken(TokenType.BLOCK_ENTRY);
675 }
676
677 /// Produces the [TokenType.KEY] token.
678 void _fetchKey() {
679 if (_inBlockContext) {
680 if (!_simpleKeyAllowed) {
681 throw new YamlException("Mapping keys are not allowed here.",
682 _scanner.emptySpan);
683 }
684
685 _rollIndent(
686 _scanner.column,
687 TokenType.BLOCK_MAPPING_START,
688 _scanner.location);
689 }
690
691 // Simple keys are allowed after `?` in a block context.
692 _simpleKeyAllowed = _inBlockContext;
693 _addCharToken(TokenType.KEY);
694 }
695
696 /// Produces the [TokenType.VALUE] token.
697 void _fetchValue() {
698 var simpleKey = _simpleKeys.last;
699 if (simpleKey != null) {
700 // Add a [TokenType.KEY] directive before the first token of the simple
701 // key so the parser knows that it's part of a key/value pair.
702 _tokens.insert(simpleKey.tokenNumber - _tokensParsed,
703 new Token(TokenType.KEY, simpleKey.location.pointSpan()));
704
705 // In the block context, we may need to add the
706 // [TokenType.BLOCK_MAPPING_START] token.
707 _rollIndent(
708 simpleKey.column,
709 TokenType.BLOCK_MAPPING_START,
710 simpleKey.location,
711 tokenNumber: simpleKey.tokenNumber);
712
713 // Remove the simple key.
714 _simpleKeys[_simpleKeys.length - 1] = null;
715
716 // A simple key cannot follow another simple key.
717 _simpleKeyAllowed = false;
718 } else if (_inBlockContext) {
719 if (!_simpleKeyAllowed) {
720 throw new YamlException(
721 "Mapping values are not allowed here. Did you miss a colon "
722 "earlier?",
723 _scanner.emptySpan);
724 }
725
726 // If we're here, we've found the ':' indicator following a complex key.
727
728 _rollIndent(
729 _scanner.column,
730 TokenType.BLOCK_MAPPING_START,
731 _scanner.location);
732 _simpleKeyAllowed = true;
733 } else if (_simpleKeyAllowed) {
734 // If we're here, we've found the ':' indicator with an empty key. This
735 // behavior differs from libyaml, which disallows empty implicit keys.
736 _simpleKeyAllowed = false;
737 _addCharToken(TokenType.KEY);
738 }
739
740 _addCharToken(TokenType.VALUE);
741 }
742
743 /// Adds a token with [type] to [_tokens].
744 ///
745 /// The span of the new token is the current character.
746 void _addCharToken(TokenType type) {
747 var start = _scanner.state;
748 _scanner.readChar();
749 _tokens.add(new Token(type, _scanner.spanFrom(start)));
750 }
751
752 /// Produces a [TokenType.ALIAS] or [TokenType.ANCHOR] token.
753 void _fetchAnchor({bool anchor: true}) {
754 _saveSimpleKey();
755 _simpleKeyAllowed = false;
756 _tokens.add(_scanAnchor(anchor: anchor));
757 }
758
759 /// Produces a [TokenType.TAG] token.
760 void _fetchTag() {
761 _saveSimpleKey();
762 _simpleKeyAllowed = false;
763 _tokens.add(_scanTag());
764 }
765
766 /// Produces a [TokenType.SCALAR] token with style [ScalarStyle.LITERAL] or
767 /// [ScalarStyle.FOLDED].
768 void _fetchBlockScalar({bool literal: false}) {
769 _removeSimpleKey();
770 _simpleKeyAllowed = true;
771 _tokens.add(_scanBlockScalar(literal: literal));
772 }
773
774 /// Produces a [TokenType.SCALAR] token with style [ScalarStyle.SINGLE_QUOTED]
775 /// or [ScalarStyle.DOUBLE_QUOTED].
776 void _fetchFlowScalar({bool singleQuote: false}) {
777 _saveSimpleKey();
778 _simpleKeyAllowed = false;
779 _tokens.add(_scanFlowScalar(singleQuote: singleQuote));
780 }
781
782 /// Produces a [TokenType.SCALAR] token with style [ScalarStyle.PLAIN].
783 void _fetchPlainScalar() {
784 _saveSimpleKey();
785 _simpleKeyAllowed = false;
786 _tokens.add(_scanPlainScalar());
787 }
788
789 /// Eats whitespace and comments until the next token is found.
790 void _scanToNextToken() {
791 var afterLineBreak = false;
792 while (true) {
793 // Allow the BOM to start a line.
794 if (_scanner.column == 0) _scanner.scan("\uFEFF");
795
796 // Eat whitespace.
797 //
798 // libyaml disallows tabs after "-", "?", or ":", but the spec allows
799 // them. See section 6.2: http://yaml.org/spec/1.2/spec.html#id2778241.
800 while (_scanner.peekChar() == SP ||
801 ((!_inBlockContext || !afterLineBreak) &&
802 _scanner.peekChar() == TAB)) {
803 _scanner.readChar();
804 }
805
806 if (_scanner.peekChar() == TAB) {
807 _scanner.error("Tab characters are not allowed as indentation.",
808 length: 1);
809 }
810
811 // Eat a comment until a line break.
812 _skipComment();
813
814 // If we're at a line break, eat it.
815 if (_isBreak) {
816 _skipLine();
817
818 // In the block context, a new line may start a simple key.
819 if (_inBlockContext) _simpleKeyAllowed = true;
820 afterLineBreak = true;
821 } else {
822 // Otherwise we've found a token.
823 break;
824 }
825 }
826 }
827
828 /// Scans a [TokenType.YAML_DIRECTIVE] or [TokenType.TAG_DIRECTIVE] token.
829 ///
830 /// %YAML 1.2 # a comment \n
831 /// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
832 /// %TAG !yaml! tag:yaml.org,2002: \n
833 /// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
834 Token _scanDirective() {
835 var start = _scanner.state;
836
837 // Eat '%'.
838 _scanner.readChar();
839
840 var token;
841 var name = _scanDirectiveName();
842 if (name == "YAML") {
843 token = _scanVersionDirectiveValue(start);
844 } else if (name == "TAG") {
845 token = _scanTagDirectiveValue(start);
846 } else {
847 warn("Warning: unknown directive.", _scanner.spanFrom(start));
848
849 // libyaml doesn't support unknown directives, but the spec says to ignore
850 // them and warn: http://yaml.org/spec/1.2/spec.html#id2781147.
851 while (!_isBreakOrEnd) {
852 _scanner.readChar();
853 }
854
855 return null;
856 }
857
858 // Eat the rest of the line, including any comments.
859 _skipBlanks();
860 _skipComment();
861
862 if (!_isBreakOrEnd) {
863 throw new YamlException(
864 "Expected comment or line break after directive.",
865 _scanner.spanFrom(start));
866 }
867
868 _skipLine();
869 return token;
870 }
871
872 /// Scans a directive name.
873 ///
874 /// %YAML 1.2 # a comment \n
875 /// ^^^^
876 /// %TAG !yaml! tag:yaml.org,2002: \n
877 /// ^^^
878 String _scanDirectiveName() {
879 // libyaml only allows word characters in directive names, but the spec
880 // disagrees: http://yaml.org/spec/1.2/spec.html#ns-directive-name.
881 var start = _scanner.position;
882 while (_isNonSpace) {
883 _scanner.readChar();
884 }
885
886 var name = _scanner.substring(start);
887 if (name.isEmpty) {
888 throw new YamlException("Expected directive name.", _scanner.emptySpan);
889 } else if (!_isBlankOrEnd) {
890 throw new YamlException(
891 "Unexpected character in directive name.", _scanner.emptySpan);
892 }
893
894 return name;
895 }
896
897 /// Scans the value of a version directive.
898 ///
899 /// %YAML 1.2 # a comment \n
900 /// ^^^^^^
901 Token _scanVersionDirectiveValue(LineScannerState start) {
902 _skipBlanks();
903
904 var major = _scanVersionDirectiveNumber();
905 _scanner.expect('.');
906 var minor = _scanVersionDirectiveNumber();
907
908 return new VersionDirectiveToken(_scanner.spanFrom(start), major, minor);
909 }
910
911 /// Scans the version number of a version directive.
912 ///
913 /// %YAML 1.2 # a comment \n
914 /// ^
915 /// %YAML 1.2 # a comment \n
916 /// ^
917 int _scanVersionDirectiveNumber() {
918 var start = _scanner.position;
919 while (_isDigit) {
920 _scanner.readChar();
921 }
922
923 var number = _scanner.substring(start);
924 if (number.isEmpty) {
925 throw new YamlException("Expected version number.", _scanner.emptySpan);
926 }
927
928 return int.parse(number);
929 }
930
931 /// Scans the value of a tag directive.
932 ///
933 /// %TAG !yaml! tag:yaml.org,2002: \n
934 /// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
935 Token _scanTagDirectiveValue(LineScannerState start) {
936 _skipBlanks();
937
938 var handle = _scanTagHandle(directive: true);
939 if (!_isBlank) {
940 throw new YamlException("Expected whitespace.", _scanner.emptySpan);
941 }
942
943 _skipBlanks();
944
945 var prefix = _scanTagUri();
946 if (!_isBlankOrEnd) {
947 throw new YamlException("Expected whitespace.", _scanner.emptySpan);
948 }
949
950 return new TagDirectiveToken(_scanner.spanFrom(start), handle, prefix);
951 }
952
953 /// Scans a [TokenType.ANCHOR] token.
954 Token _scanAnchor({bool anchor: true}) {
955 var start = _scanner.state;
956
957 // Eat the indicator character.
958 _scanner.readChar();
959
960 // libyaml only allows word characters in anchor names, but the spec
961 // disagrees: http://yaml.org/spec/1.2/spec.html#ns-anchor-char.
962 var startPosition = _scanner.position;
963 while (_isAnchorChar) {
964 _scanner.readChar();
965 }
966 var name = _scanner.substring(startPosition);
967
968 var next = _scanner.peekChar();
969 if (name.isEmpty ||
970 (!_isBlankOrEnd && next != QUESTION && next != COLON &&
971 next != COMMA && next != RIGHT_SQUARE && next != RIGHT_CURLY &&
972 next != PERCENT && next != AT && next != GRAVE_ACCENT)) {
973 throw new YamlException("Expected alphanumeric character.",
974 _scanner.emptySpan);
975 }
976
977 if (anchor) {
978 return new AnchorToken(_scanner.spanFrom(start), name);
979 } else {
980 return new AliasToken(_scanner.spanFrom(start), name);
981 }
982 }
983
984 /// Scans a [TokenType.TAG] token.
985 Token _scanTag() {
986 var handle;
987 var suffix;
988 var start = _scanner.state;
989
990 // Check if the tag is in the canonical form.
991 if (_scanner.peekChar(1) == LEFT_ANGLE) {
992 // Eat '!<'.
993 _scanner.readChar();
994 _scanner.readChar();
995
996 handle = '';
997 suffix = _scanTagUri();
998
999 _scanner.expect('>');
1000 } else {
1001 // The tag has either the '!suffix' or the '!handle!suffix' form.
1002
1003 // First, try to scan a handle.
1004 handle = _scanTagHandle();
1005
1006 if (handle.length > 1 && handle.startsWith('!') && handle.endsWith('!')) {
1007 suffix = _scanTagUri(flowSeparators: false);
1008 } else {
1009 suffix = _scanTagUri(head: handle, flowSeparators: false);
1010
1011 // There was no explicit handle.
1012 if (suffix.isEmpty) {
1013 // This is the special '!' tag.
1014 handle = null;
1015 suffix = '!';
1016 } else {
1017 handle = '!';
1018 }
1019 }
1020 }
1021
1022 // libyaml insists on whitespace after a tag, but example 7.2 indicates
1023 // that it's not required: http://yaml.org/spec/1.2/spec.html#id2786720.
1024
1025 return new TagToken(_scanner.spanFrom(start), handle, suffix);
1026 }
1027
1028 /// Scans a tag handle.
1029 String _scanTagHandle({bool directive: false}) {
1030 _scanner.expect('!');
1031
1032 var buffer = new StringBuffer('!');
1033
1034 // libyaml only allows word characters in tags, but the spec disagrees:
1035 // http://yaml.org/spec/1.2/spec.html#ns-tag-char.
1036 var start = _scanner.position;
1037 while (_isTagChar) {
1038 _scanner.readChar();
1039 }
1040 buffer.write(_scanner.substring(start));
1041
1042 if (_scanner.peekChar() == EXCLAMATION) {
1043 buffer.writeCharCode(_scanner.readChar());
1044 } else {
1045 // It's either the '!' tag or not really a tag handle. If it's a %TAG
1046 // directive, it's an error. If it's a tag token, it must be part of a
1047 // URI.
1048 if (directive && buffer.toString() != '!') _scanner.expect('!');
1049 }
1050
1051 return buffer.toString();
1052 }
1053
1054 /// Scans a tag URI.
1055 ///
1056 /// [head] is the initial portion of the tag that's already been scanned.
1057 /// [flowSeparators] indicates whether the tag URI can contain flow
1058 /// separators.
1059 String _scanTagUri({String head, bool flowSeparators: true}) {
1060 var length = head == null ? 0 : head.length;
1061 var buffer = new StringBuffer();
1062
1063 // Copy the head if needed.
1064 //
1065 // Note that we don't copy the leading '!' character.
1066 if (length > 1) buffer.write(head.substring(1));
1067
1068 // The set of characters that may appear in URI is as follows:
1069 //
1070 // '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
1071 // '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
1072 // '%'.
1073 //
1074 // In a shorthand tag annotation, the flow separators ',', '[', and ']' are
1075 // disallowed.
1076 var start = _scanner.position;
1077 var char = _scanner.peekChar();
1078 while (_isTagChar || (flowSeparators &&
1079 (char == COMMA || char == LEFT_SQUARE || char == RIGHT_SQUARE))) {
1080 _scanner.readChar();
1081 char = _scanner.peekChar();
1082 }
1083
1084 // libyaml manually decodes the URL, but we don't have to do that.
1085 return Uri.decodeFull(_scanner.substring(start));
1086 }
1087
1088 /// Scans a block scalar.
1089 Token _scanBlockScalar({bool literal: false}) {
1090 var start = _scanner.state;
1091
1092 // Eat the indicator '|' or '>'.
1093 _scanner.readChar();
1094
1095 // Check for a chomping indicator.
1096 var chomping = _Chomping.CLIP;
1097 var increment = 0;
1098 var char = _scanner.peekChar();
1099 if (char == PLUS || char == HYPHEN) {
1100 chomping = char == PLUS ? _Chomping.KEEP : _Chomping.STRIP;
1101 _scanner.readChar();
1102
1103 // Check for an indentation indicator.
1104 if (_isDigit) {
1105 // Check that the indentation is greater than 0.
1106 if (_scanner.peekChar() == NUMBER_0) {
1107 throw new YamlException(
1108 "0 may not be used as an indentation indicator.",
1109 _scanner.spanFrom(start));
1110 }
1111
1112 increment = _scanner.readChar() - NUMBER_0;
1113 }
1114 } else if (_isDigit) {
1115 // Do the same as above, but in the opposite order.
1116 if (_scanner.peekChar() == NUMBER_0) {
1117 throw new YamlException(
1118 "0 may not be used as an indentation indicator.",
1119 _scanner.spanFrom(start));
1120 }
1121
1122 increment = _scanner.readChar() - NUMBER_0;
1123
1124 char = _scanner.peekChar();
1125 if (char == PLUS || char == HYPHEN) {
1126 chomping = char == PLUS ? _Chomping.KEEP : _Chomping.STRIP;
1127 _scanner.readChar();
1128 }
1129 }
1130
1131 // Eat whitespace and comments to the end of the line.
1132 _skipBlanks();
1133 _skipComment();
1134
1135 // Check if we're at the end of the line.
1136 if (!_isBreakOrEnd) {
1137 throw new YamlException("Expected comment or line break.",
1138 _scanner.emptySpan);
1139 }
1140
1141 _skipLine();
1142
1143 // If the block scalar has an explicit indentation indicator, add that to
1144 // the current indentation to get the indentation level for the scalar's
1145 // contents.
1146 var indent = 0;
1147 if (increment != 0) {
1148 indent = _indent >= 0 ? _indent + increment : increment;
1149 }
1150
1151 // Scan the leading line breaks to determine the indentation level if
1152 // needed.
1153 var pair = _scanBlockScalarBreaks(indent);
1154 indent = pair.first;
1155 var trailingBreaks = pair.last;
1156
1157 // Scan the block scalar contents.
1158 var buffer = new StringBuffer();
1159 var leadingBreak = '';
1160 var leadingBlank = false;
1161 var trailingBlank = false;
1162 var end = _scanner.state;
1163 while (_scanner.column == indent && !_scanner.isDone) {
1164 // Check for a document indicator. libyaml doesn't do this, but the spec
1165 // mandates it. See example 9.5:
1166 // http://yaml.org/spec/1.2/spec.html#id2801606.
1167 if (_isDocumentIndicator) break;
1168
1169 // We are at the beginning of a non-empty line.
1170
1171 // Is there trailing whitespace?
1172 trailingBlank = _isBlank;
1173
1174 // Check if we need to fold the leading line break.
1175 if (!literal && leadingBreak.isNotEmpty && !leadingBlank &&
1176 !trailingBlank) {
1177 // Do we need to join the lines with a space?
1178 if (trailingBreaks.isEmpty) buffer.writeCharCode(SP);
1179 } else {
1180 buffer.write(leadingBreak);
1181 }
1182 leadingBreak = '';
1183
1184 // Append the remaining line breaks.
1185 buffer.write(trailingBreaks);
1186
1187 // Is there leading whitespace?
1188 leadingBlank = _isBlank;
1189
1190 var startPosition = _scanner.position;
1191 while (!_isBreakOrEnd) {
1192 _scanner.readChar();
1193 }
1194 buffer.write(_scanner.substring(startPosition));
1195 end = _scanner.state;
1196
1197 // libyaml always reads a line here, but this breaks on block scalars at
1198 // the end of the document that end without newlines. See example 8.1:
1199 // http://yaml.org/spec/1.2/spec.html#id2793888.
1200 if (!_scanner.isDone) leadingBreak = _readLine();
1201
1202 // Eat the following indentation and spaces.
1203 var pair = _scanBlockScalarBreaks(indent);
1204 indent = pair.first;
1205 trailingBreaks = pair.last;
1206 }
1207
1208 // Chomp the tail.
1209 if (chomping != _Chomping.STRIP) buffer.write(leadingBreak);
1210 if (chomping == _Chomping.KEEP) buffer.write(trailingBreaks);
1211
1212 return new ScalarToken(_scanner.spanFrom(start, end), buffer.toString(),
1213 literal ? ScalarStyle.LITERAL : ScalarStyle.FOLDED);
1214 }
1215
1216 /// Scans indentation spaces and line breaks for a block scalar.
1217 ///
1218 /// Determines the intendation level if needed. Returns the new indentation
1219 /// level and the text of the line breaks.
1220 Pair<int, String> _scanBlockScalarBreaks(int indent) {
1221 var maxIndent = 0;
1222 var breaks = new StringBuffer();
1223
1224 while (true) {
1225 while ((indent == 0 || _scanner.column < indent) &&
1226 _scanner.peekChar() == SP) {
1227 _scanner.readChar();
1228 }
1229
1230 if (_scanner.column > maxIndent) maxIndent = _scanner.column;
1231
1232 // libyaml throws an error here if a tab character is detected, but the
1233 // spec treats tabs like any other non-space character. See example 8.2:
1234 // http://yaml.org/spec/1.2/spec.html#id2794311.
1235
1236 if (!_isBreak) break;
1237 breaks.write(_readLine());
1238 }
1239
1240 if (indent == 0) {
1241 indent = maxIndent;
1242 if (indent < _indent + 1) indent = _indent + 1;
1243
1244 // libyaml forces indent to be at least 1 here, but that doesn't seem to
1245 // be supported by the spec.
1246 }
1247
1248 return new Pair(indent, breaks.toString());
1249 }
1250
1251 // Scans a quoted scalar.
1252 Token _scanFlowScalar({bool singleQuote: false}) {
1253 var start = _scanner.state;
1254 var buffer = new StringBuffer();
1255
1256 // Eat the left quote.
1257 _scanner.readChar();
1258
1259 while (true) {
1260 // Check that there are no document indicators at the beginning of the
1261 // line.
1262 if (_isDocumentIndicator) {
1263 _scanner.error("Unexpected document indicator.");
1264 }
1265
1266 if (_scanner.isDone) {
1267 throw new YamlException("Unexpected end of file.", _scanner.emptySpan);
1268 }
1269
1270 var leadingBlanks = false;
1271 while (!_isBlankOrEnd) {
1272 var char = _scanner.peekChar();
1273 if (singleQuote && char == SINGLE_QUOTE &&
1274 _scanner.peekChar(1) == SINGLE_QUOTE) {
1275 // An escaped single quote.
1276 _scanner.readChar();
1277 _scanner.readChar();
1278 buffer.writeCharCode(SINGLE_QUOTE);
1279 } else if (char == (singleQuote ? SINGLE_QUOTE : DOUBLE_QUOTE)) {
1280 // The closing quote.
1281 break;
1282 } else if (!singleQuote && char == BACKSLASH && _isBreakAt(1)) {
1283 // An escaped newline.
1284 _scanner.readChar();
1285 _skipLine();
1286 leadingBlanks = true;
1287 break;
1288 } else if (!singleQuote && char == BACKSLASH) {
1289 var escapeStart = _scanner.state;
1290
1291 // An escape sequence.
1292 var codeLength = null;
1293 switch (_scanner.peekChar(1)) {
1294 case NUMBER_0:
1295 buffer.writeCharCode(NULL);
1296 break;
1297 case LETTER_A:
1298 buffer.writeCharCode(BELL);
1299 break;
1300 case LETTER_B:
1301 buffer.writeCharCode(BACKSPACE);
1302 break;
1303 case LETTER_T:
1304 case TAB:
1305 buffer.writeCharCode(TAB);
1306 break;
1307 case LETTER_N:
1308 buffer.writeCharCode(LF);
1309 break;
1310 case LETTER_V:
1311 buffer.writeCharCode(VERTICAL_TAB);
1312 break;
1313 case LETTER_F:
1314 buffer.writeCharCode(FORM_FEED);
1315 break;
1316 case LETTER_R:
1317 buffer.writeCharCode(CR);
1318 break;
1319 case LETTER_E:
1320 buffer.writeCharCode(ESCAPE);
1321 break;
1322 case SP:
1323 case DOUBLE_QUOTE:
1324 case SLASH:
1325 case BACKSLASH:
1326 // libyaml doesn't support an escaped forward slash, but it was
1327 // added in YAML 1.2. See section 5.7:
1328 // http://yaml.org/spec/1.2/spec.html#id2776092
1329 buffer.writeCharCode(_scanner.peekChar(1));
1330 break;
1331 case LETTER_CAP_N:
1332 buffer.writeCharCode(NEL);
1333 break;
1334 case UNDERSCORE:
1335 buffer.writeCharCode(NBSP);
1336 break;
1337 case LETTER_CAP_L:
1338 buffer.writeCharCode(LINE_SEPARATOR);
1339 break;
1340 case LETTER_CAP_P:
1341 buffer.writeCharCode(PARAGRAPH_SEPARATOR);
1342 break;
1343 case LETTER_X:
1344 codeLength = 2;
1345 break;
1346 case LETTER_U:
1347 codeLength = 4;
1348 break;
1349 case LETTER_CAP_U:
1350 codeLength = 8;
1351 break;
1352 default:
1353 throw new YamlException("Unknown escape character.",
1354 _scanner.spanFrom(escapeStart));
1355 }
1356
1357 _scanner.readChar();
1358 _scanner.readChar();
1359
1360 if (codeLength != null) {
1361 var value = 0;
1362 for (var i = 0; i < codeLength; i++) {
1363 if (!_isHex) {
1364 _scanner.readChar();
1365 throw new YamlException(
1366 "Expected $codeLength-digit hexidecimal number.",
1367 _scanner.spanFrom(escapeStart));
1368 }
1369
1370 value = (value << 4) + _asHex(_scanner.readChar());
1371 }
1372
1373 // Check the value and write the character.
1374 if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) {
1375 throw new YamlException(
1376 "Invalid Unicode character escape code.",
1377 _scanner.spanFrom(escapeStart));
1378 }
1379
1380 buffer.writeCharCode(value);
1381 }
1382 } else {
1383 buffer.writeCharCode(_scanner.readChar());
1384 }
1385 }
1386
1387 // Check if we're at the end of a scalar.
1388 if (_scanner.peekChar() == (singleQuote ? SINGLE_QUOTE : DOUBLE_QUOTE)) {
1389 break;
1390 }
1391
1392 var whitespace = new StringBuffer();
1393 var leadingBreak = '';
1394 var trailingBreaks = new StringBuffer();
1395 while (_isBlank || _isBreak) {
1396 if (_isBlank) {
1397 // Consume a space or a tab.
1398 if (!leadingBlanks) {
1399 whitespace.writeCharCode(_scanner.readChar());
1400 } else {
1401 _scanner.readChar();
1402 }
1403 } else {
1404 // Check if it's a first line break.
1405 if (!leadingBlanks) {
1406 whitespace.clear();
1407 leadingBreak = _readLine();
1408 leadingBlanks = true;
1409 } else {
1410 trailingBreaks.write(_readLine());
1411 }
1412 }
1413 }
1414
1415 // Join the whitespace or fold line breaks.
1416 if (leadingBlanks) {
1417 if (leadingBreak.isNotEmpty && trailingBreaks.isEmpty) {
1418 buffer.writeCharCode(SP);
1419 } else {
1420 buffer.write(trailingBreaks);
1421 }
1422 } else {
1423 buffer.write(whitespace);
1424 whitespace.clear();
1425 }
1426 }
1427
1428 // Eat the right quote.
1429 _scanner.readChar();
1430
1431 return new ScalarToken(_scanner.spanFrom(start), buffer.toString(),
1432 singleQuote ? ScalarStyle.SINGLE_QUOTED : ScalarStyle.DOUBLE_QUOTED);
1433 }
1434
1435 /// Scans a plain scalar.
1436 Token _scanPlainScalar() {
1437 var start = _scanner.state;
1438 var end = _scanner.state;
1439 var buffer = new StringBuffer();
1440 var leadingBreak = '';
1441 var trailingBreaks = '';
1442 var whitespace = new StringBuffer();
1443 var indent = _indent + 1;
1444
1445 while (true) {
1446 // Check for a document indicator.
1447 if (_isDocumentIndicator) break;
1448
1449 // Check for a comment.
1450 if (_scanner.peekChar() == HASH) break;
1451
1452 if (_isPlainChar) {
1453 // Join the whitespace or fold line breaks.
1454 if (leadingBreak.isNotEmpty) {
1455 if (trailingBreaks.isEmpty) {
1456 buffer.writeCharCode(SP);
1457 } else {
1458 buffer.write(trailingBreaks);
1459 }
1460 leadingBreak = '';
1461 trailingBreaks = '';
1462 } else {
1463 buffer.write(whitespace);
1464 whitespace.clear();
1465 }
1466 }
1467
1468 // libyaml's notion of valid identifiers differs substantially from YAML
1469 // 1.2's. We use [_isPlainChar] instead of libyaml's character here.
1470 var startPosition = _scanner.position;
1471 while (_isPlainChar) {
1472 _scanner.readChar();
1473 }
1474 buffer.write(_scanner.substring(startPosition));
1475 end = _scanner.state;
1476
1477 // Is it the end?
1478 if (!_isBlank && !_isBreak) break;
1479
1480 while (_isBlank || _isBreak) {
1481 if (_isBlank) {
1482 // Check for a tab character messing up the intendation.
1483 if (leadingBreak.isNotEmpty && _scanner.column < indent &&
1484 _scanner.peekChar() == TAB) {
1485 _scanner.error("Expected a space but found a tab.", length: 1);
1486 }
1487
1488 if (leadingBreak.isEmpty) {
1489 whitespace.writeCharCode(_scanner.readChar());
1490 } else {
1491 _scanner.readChar();
1492 }
1493 } else {
1494 // Check if it's a first line break.
1495 if (leadingBreak.isEmpty) {
1496 leadingBreak = _readLine();
1497 whitespace.clear();
1498 } else {
1499 trailingBreaks = _readLine();
1500 }
1501 }
1502 }
1503
1504 // Check the indentation level.
1505 if (_inBlockContext && _scanner.column < indent) break;
1506 }
1507
1508 // Allow a simple key after a plain scalar with leading blanks.
1509 if (leadingBreak.isNotEmpty) _simpleKeyAllowed = true;
1510
1511 return new ScalarToken(_scanner.spanFrom(start, end), buffer.toString(),
1512 ScalarStyle.PLAIN);
1513 }
1514
1515 /// Moves past the current line break, if there is one.
1516 void _skipLine() {
1517 var char = _scanner.peekChar();
1518 if (char != CR && char != LF) return;
1519 _scanner.readChar();
1520 if (char == CR && _scanner.peekChar() == LF) _scanner.readChar();
1521 }
1522
1523 // Moves past the current line break and returns a newline.
1524 String _readLine() {
1525 var char = _scanner.peekChar();
1526
1527 // libyaml supports NEL, PS, and LS characters as line separators, but this
1528 // is explicitly forbidden in section 5.4 of the YAML spec.
1529 if (char != CR && char != LF) {
1530 throw new YamlException("Expected newline.", _scanner.emptySpan);
1531 }
1532
1533 _scanner.readChar();
1534 // CR LF | CR | LF -> LF
1535 if (char == CR && _scanner.peekChar() == LF) _scanner.readChar();
1536 return "\n";
1537 }
1538
1539 // Returns whether the character at [offset] is whitespace.
1540 bool _isBlankAt(int offset) {
1541 var char = _scanner.peekChar(offset);
1542 return char == SP || char == TAB;
1543 }
1544
1545 // Returns whether the character at [offset] is a line break.
1546 bool _isBreakAt(int offset) {
1547 // Libyaml considers NEL, LS, and PS to be line breaks as well, but that's
1548 // contrary to the spec.
1549 var char = _scanner.peekChar(offset);
1550 return char == CR || char == LF;
1551 }
1552
1553 // Returns whether the character at [offset] is whitespace or past the end of
1554 // the source.
1555 bool _isBlankOrEndAt(int offset) {
1556 var char = _scanner.peekChar(offset);
1557 return char == null || char == SP || char == TAB || char == CR ||
1558 char == LF;
1559 }
1560
1561 /// Returns whether the character at [offset] is a plain character.
1562 ///
1563 /// See http://yaml.org/spec/1.2/spec.html#ns-plain-char(c).
1564 bool _isPlainCharAt(int offset) {
1565 switch (_scanner.peekChar(offset)) {
1566 case COLON:
1567 return _isPlainSafeAt(offset + 1);
1568 case HASH:
1569 var previous = _scanner.peekChar(offset - 1);
1570 return previous != SP && previous != TAB;
1571 default:
1572 return _isPlainSafeAt(offset);
1573 }
1574 }
1575
1576 /// Returns whether the character at [offset] is a plain-safe character.
1577 ///
1578 /// See http://yaml.org/spec/1.2/spec.html#ns-plain-safe(c).
1579 bool _isPlainSafeAt(int offset) {
1580 var char = _scanner.peekChar(offset);
1581 switch (char) {
1582 case COMMA:
1583 case LEFT_SQUARE:
1584 case RIGHT_SQUARE:
1585 case LEFT_CURLY:
1586 case RIGHT_CURLY:
1587 // These characters are delimiters in a flow context and thus are only
1588 // safe in a block context.
1589 return _inBlockContext;
1590 case SP:
1591 case TAB:
1592 case LF:
1593 case CR:
1594 case BOM:
1595 return false;
1596 case NEL:
1597 return true;
1598 default:
1599 return char != null &&
1600 ((char >= 0x00020 && char <= 0x00007E) ||
1601 (char >= 0x000A0 && char <= 0x00D7FF) ||
1602 (char >= 0x0E000 && char <= 0x00FFFD) ||
1603 (char >= 0x10000 && char <= 0x10FFFF));
1604 }
1605 }
1606
1607 /// Returns the hexidecimal value of [char].
1608 int _asHex(int char) {
1609 if (char <= NUMBER_9) return char - NUMBER_0;
1610 if (char <= LETTER_CAP_F) return 10 + char - LETTER_CAP_A;
1611 return 10 + char - LETTER_A;
1612 }
1613
1614 /// Moves the scanner past any blank characters.
1615 void _skipBlanks() {
1616 while (_isBlank) {
1617 _scanner.readChar();
1618 }
1619 }
1620
1621 /// Moves the scanner past a comment, if one starts at the current position.
1622 void _skipComment() {
1623 if (_scanner.peekChar() != HASH) return;
1624 while (!_isBreakOrEnd) {
1625 _scanner.readChar();
1626 }
1627 }
1628 }
1629
1630 /// A record of the location of a potential simple key.
1631 class _SimpleKey {
1632 /// The index of the token that begins the simple key.
1633 ///
1634 /// This is the index relative to all tokens emitted, rather than relative to
1635 /// [_tokens].
1636 final int tokenNumber;
1637
1638 /// The source location of the beginning of the simple key.
1639 ///
1640 /// This is used for error reporting and for determining when a simple key is
1641 /// no longer on the current line.
1642 final SourceLocation location;
1643
1644 /// The line on which the key appears.
1645 ///
1646 /// We could get this from [location], but that requires a binary search
1647 /// whereas this is O(1).
1648 final int line;
1649
1650 /// The column on which the key appears.
1651 ///
1652 /// We could get this from [location], but that requires a binary search
1653 /// whereas this is O(1).
1654 final int column;
1655
1656 /// Whether this key must exist for the document to be scanned.
1657 final bool required;
1658
1659 _SimpleKey(this.tokenNumber, this.line, this.column, this.location,
1660 {bool required})
1661 : required = required;
1662 }
1663
1664 /// An enum of chomping indicators that describe how to handle trailing
1665 /// whitespace for a block scalar.
1666 ///
1667 /// See http://yaml.org/spec/1.2/spec.html#id2794534.
1668 class _Chomping {
1669 /// All trailing whitespace is discarded.
1670 static const STRIP = const _Chomping("STRIP");
1671
1672 /// A single trailing newline is retained.
1673 static const CLIP = const _Chomping("CLIP");
1674
1675 /// All trailing whitespace is preserved.
1676 static const KEEP = const _Chomping("KEEP");
1677
1678 final String name;
1679
1680 const _Chomping(this.name);
1681
1682 String toString() => name;
1683 }
OLDNEW
« no previous file with comments | « yaml/lib/src/parser.dart ('k') | yaml/lib/src/style.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698