pkg/yaml/lib/src/scanner.dart - Issue 689513002: Rewrite the pkg/yaml parser.

Side by Side Diff: pkg/yaml/lib/src/scanner.dart

Issue 689513002: Rewrite the pkg/yaml parser. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Update string_scanner dependency. Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file

	2 // for details. All rights reserved. Use of this source code is governed by a

	3 // BSD-style license that can be found in the LICENSE file.

	4

	5 library yaml.scanner;

	6

	7 import 'package:collection/collection.dart';

	8 import 'package:string_scanner/string_scanner.dart';

	9 import 'package:source_span/source_span.dart';

	10

	11 import 'style.dart';

	12 import 'token.dart';

	13 import 'utils.dart';

	14 import 'yaml_exception.dart';

	15

	16 /// A scanner that reads a string of Unicode characters and emits [Token]s.

	17 ///

	18 /// This is based on the libyaml scanner, available at

	19 /// https://github.com/yaml/libyaml/blob/master/src/scanner.c. The license for

	20 /// that is available in ../../libyaml-license.txt.

	21 class Scanner {

	22 static const TAB = 0x9;

	23 static const LF = 0xA;

	24 static const CR = 0xD;

	25 static const SP = 0x20;

	26 static const TILDE = 0x7E;
	Bob Nystrom 2014/10/31 20:03:29 Move to after GRAVE_ACCENT? Move to after GRAVE_ACCENT? nweiz 2014/11/04 22:19:37 Done. Show quoted text On 2014/10/31 20:03:29, Bob Nystrom wrote: > Move to after GRAVE_ACCENT? Done.
	27 static const NEL = 0x85;
	Bob Nystrom 2014/10/31 20:03:29 This one's a bit uncommon. How about moving either This one's a bit uncommon. How about moving either to the next section or have it in numeric order after GRAVE_ACCENT. nweiz 2014/11/04 22:19:38 Done. Show quoted text On 2014/10/31 20:03:29, Bob Nystrom wrote: > This one's a bit uncommon. How about moving either to the next section or have > it in numeric order after GRAVE_ACCENT. Done.
	28 static const DOLLAR = 0x24;

	29 static const LEFT_PAREN = 0x28;

	30 static const RIGHT_PAREN = 0x29;

	31 static const PLUS = 0x2B;

	32 static const COMMA = 0x2C;

	33 static const HYPHEN = 0x2D;

	34 static const PERIOD = 0x2E;

	35 static const QUESTION = 0x3F;

	36 static const COLON = 0x3A;

	37 static const SEMICOLON = 0x3B;

	38 static const EQUALS = 0x3D;

	39 static const LEFT_SQUARE = 0x5B;

	40 static const RIGHT_SQUARE = 0x5D;

	41 static const LEFT_CURLY = 0x7B;

	42 static const RIGHT_CURLY = 0x7D;

	43 static const HASH = 0x23;

	44 static const AMPERSAND = 0x26;

	45 static const ASTERISK = 0x2A;

	46 static const EXCLAMATION = 0x21;

	47 static const VERTICAL_BAR = 0x7C;

	48 static const LEFT_ANGLE = 0x3C;

	49 static const RIGHT_ANGLE = 0x3E;

	50 static const SINGLE_QUOTE = 0x27;

	51 static const DOUBLE_QUOTE = 0x22;

	52 static const PERCENT = 0x25;

	53 static const AT = 0x40;

	54 static const GRAVE_ACCENT = 0x60;

	55

	56 static const NULL = 0x0;

	57 static const BELL = 0x7;

	58 static const BACKSPACE = 0x8;

	59 static const VERTICAL_TAB = 0xB;

	60 static const FORM_FEED = 0xC;

	61 static const ESCAPE = 0x1B;

	62 static const SLASH = 0x2F;

	63 static const BACKSLASH = 0x5C;

	64 static const UNDERSCORE = 0x5F;

	65 static const NBSP = 0xA0;

	66 static const LINE_SEPARATOR = 0x2028;

	67 static const PARAGRAPH_SEPARATOR = 0x2029;

	68 static const BOM = 0xFEFF;

	69

	70 static const NUMBER_0 = 0x30;

	71 static const NUMBER_9 = 0x39;

	72

	73 static const LETTER_A = 0x61;

	74 static const LETTER_B = 0x62;

	75 static const LETTER_E = 0x65;

	76 static const LETTER_F = 0x66;

	77 static const LETTER_N = 0x6E;

	78 static const LETTER_R = 0x72;

	79 static const LETTER_T = 0x74;

	80 static const LETTER_U = 0x75;

	81 static const LETTER_V = 0x76;

	82 static const LETTER_X = 0x78;

	83 static const LETTER_Z = 0x7A;

	84

	85 static const LETTER_CAP_A = 0x41;

	86 static const LETTER_CAP_F = 0x46;

	87 static const LETTER_CAP_L = 0x4C;

	88 static const LETTER_CAP_N = 0x4E;

	89 static const LETTER_CAP_P = 0x50;

	90 static const LETTER_CAP_U = 0x55;

	91 static const LETTER_CAP_X = 0x58;

	92 static const LETTER_CAP_Z = 0x5A;

	93

	94 /// The underlying [SpanScanner] used to read characters from the source text.

	95 ///

	96 /// This is also used to track line and column information and to generate

	97 /// [SourceSpan]s.

	98 final SpanScanner _scanner;

	99

	100 /// Whether this scanner has produced a [TokenType.STREAM_START] token

	101 /// indicating the beginning of the YAML stream.

	102 var _streamStartProduced = false;

	103

	104 /// Whether this scanner has produced a [TokenType.STREAM_END] token

	105 /// indicating the end of the YAML stream.

	106 var _streamEndProduced = false;

	107

	108 /// How many levels deep the scanner is in flow nesting.

	109 var _flowLevel = 0;
	Bob Nystrom 2014/10/31 20:03:28 Can this be inferred from _simpleKeys.length? Can this be inferred from _simpleKeys.length? nweiz 2014/11/04 22:19:37 Yes, good idea. Show quoted text On 2014/10/31 20:03:28, Bob Nystrom wrote: > Can this be inferred from _simpleKeys.length? Yes, good idea.
	110

	111 /// The queue of tokens yet to be emitted.

	112 ///

	113 /// These are queued up in advance so that [TokenType.KEY] tokens can be

	114 /// inserted once the scanner determines that a series of tokens represents a

	115 /// mapping key.

	116 final _tokens = new QueueList<Token>();

	117

	118 /// The number of tokens that have been emitted.

	119 ///

	120 /// This doesn't count tokens in [tokens].

	121 var _tokensParsed = 0;
	Bob Nystrom 2014/10/31 20:03:28 "Parsed" -> "Scanned"? "Parsed" -> "Scanned"? nweiz 2014/11/04 22:19:37 Done. Show quoted text On 2014/10/31 20:03:28, Bob Nystrom wrote: > "Parsed" -> "Scanned"? Done.
	122

	123 /// Whether the next token in [_tokens] is ready to be returned.

	124 ///

	125 /// It might not be ready if there may still be a [TokenType.KEY] inserted

	126 /// before it.

	127 var _tokenAvailable = false;

	128

	129 /// The stack of indent levels for the current nested block contexts.

	130 final _indents = new List<int>();
	Bob Nystrom 2014/10/31 20:03:29 <int>[] <int>[] nweiz 2014/11/04 22:19:37 Done. Show quoted text On 2014/10/31 20:03:29, Bob Nystrom wrote: > <int>[] Done.
	131

	132 /// The current indent level.

	133 var _indent = -1;
	Bob Nystrom 2014/10/31 20:03:27 Document what -1 means (or make a constant). Does Document what -1 means (or make a constant). Does this need to be a separate field, or could this be _indents.last and just push the value onto that instead of setting this? nweiz 2014/11/04 22:19:38 Done. Show quoted text On 2014/10/31 20:03:27, Bob Nystrom wrote: > Document what -1 means (or make a constant). Done. Show quoted text > Does this need to be a separate field, or could this be _indents.last and just > push the value onto that instead of setting this? Done.
	134

	135 /// Whether a simple key is allowed in this context.

	136 ///

	137 /// A simple key refers to any mapping key that doesn't have an explicit "?".

	138 var _simpleKeyAllowed = true;

	139

	140 /// The stack of potential simple keys for each level of flow nesting.

	141 ///

	142 /// Entries in this list may be `null`, indicating that there is no valid

	143 /// simple key for the associated level of nesting.

	144 ///

	145 /// When a ":" is parsed and there's a simple key available, a [TokenType.KEY]

	146 /// token is inserted in [_tokens] before that key's token. This allows the

	147 /// parser to tell that the key is intended to be a mapping key.

	148 final _simpleKeys = <_SimpleKey>[null];
	Bob Nystrom 2014/10/31 20:03:28 Why isn't this initially empty? Why isn't this initially empty? nweiz 2014/11/04 22:19:37 Because there is an initial flow level that could Show quoted text On 2014/10/31 20:03:28, Bob Nystrom wrote: > Why isn't this initially empty? Because there is an initial flow level that could have a simple key.
	149

	150 /// Whether the scanner's currently positioned in a block-level structure (as

	151 /// opposed to flow-level).

	152 bool get _inBlockContext => _flowLevel == 0;

	153

	154 /// Whether the current character is a line break or the end of the source.

	155 bool get _isBreakOrEnd => _scanner.isDone \|\| _isBreak;

	156

	157 /// Whether the current character is a line break.

	158 bool get _isBreak => _isBreakAt(0);

	159

	160 /// Whether the current character is whitespace or the end of the source.

	161 bool get _isBlankOrEnd => _isBlankOrEndAt(0);

	162

	163 /// Whether the current character is whitespace.

	164 bool get _isBlank => _isBlankAt(0);

	165

	166 /// Whether the current character is a valid tag name character.

	167 ///

	168 /// See http://yaml.org/spec/1.2/spec.html#ns-tag-name.

	169 bool get _isTagChar {

	170 var char = _scanner.peekChar();

	171 if (char == null) return false;

	172 return (char >= NUMBER_0 && char <= NUMBER_9) \|\|

	173 (char >= LETTER_A && char <= LETTER_Z) \|\|

	174 (char >= LETTER_CAP_A && char <= LETTER_CAP_Z) \|\|

	175 char == HYPHEN \|\| char == SEMICOLON \|\| char == SLASH \|\|

	176 char == COLON \|\| char == AT \|\| char == AMPERSAND \|\|

	177 char == EQUALS \|\| char == PLUS \|\| char == DOLLAR \|\|

	178 char == PERIOD \|\| char == TILDE \|\| char == QUESTION \|\|

	179 char == ASTERISK \|\| char == SINGLE_QUOTE \|\| char == LEFT_PAREN \|\|

	180 char == RIGHT_PAREN \|\| char == PERCENT;
	Bob Nystrom 2014/10/31 20:03:28 It may be quicker to look this up in a map or even It may be quicker to look this up in a map or even index into a bool array. Maybe see if that makes a noticeable perf difference? Another option is to use a switch for the literal values like you do in _isNonBreak. That may let the VM optimize it to a jump table for you. nweiz 2014/11/04 22:19:37 Done. Show quoted text On 2014/10/31 20:03:28, Bob Nystrom wrote: > It may be quicker to look this up in a map or even index into a bool array. > Maybe see if that makes a noticeable perf difference? > > Another option is to use a switch for the literal values like you do in > _isNonBreak. That may let the VM optimize it to a jump table for you. Done.
	181 }

	182

	183 /// Whether the current character is a valid anchor name character.

	184 ///

	185 /// See http://yaml.org/spec/1.2/spec.html#ns-anchor-name.

	186 bool get _isAnchorChar {

	187 if (!_isNonSpace) return false;

	188

	189 var char = _scanner.peekChar();

	190 return char != COMMA && char != LEFT_SQUARE && char != RIGHT_SQUARE &&

	191 char != LEFT_CURLY && char != RIGHT_CURLY;

	192 }

	193

	194 /// Whether the character at the current position is a decimal digit.

	195 bool get _isDigit {

	196 var char = _scanner.peekChar();

	197 return char != null && (char >= NUMBER_0 && char <= NUMBER_9);

	198 }

	199

	200 /// Whether the character at the current position is a hexidecimal

	201 /// digit.

	202 bool get _isHex {

	203 var char = _scanner.peekChar();

	204 return char != null &&

	205 ((char >= NUMBER_0 && char <= NUMBER_9) \|\|

	206 (char >= LETTER_A && char <= LETTER_F) \|\|

	207 (char >= LETTER_CAP_A && char <= LETTER_CAP_F));

	208 }

	209

	210 /// Whether the character at the current position is a plain character.

	211 ///

	212 /// See http://yaml.org/spec/1.2/spec.html#ns-plain-char(c).

	213 bool get _isPlainChar => _isPlainCharAt(0);

	214

	215 /// Whether the character at the current position is a printable character

	216 /// other than a line break or byte-order mark.

	217 ///

	218 /// See http://yaml.org/spec/1.2/spec.html#nb-char.

	219 bool get _isNonBreak {

	220 var char = _scanner.peekChar();

	221 switch (char) {

	222 case LF:

	223 case CR:

	224 case BOM:

	225 return false;

	226 case TAB:

	227 case NEL:

	228 return true;

	229 default:

	230 return char != null &&

	231 ((char >= 0x00020 && char <= 0x00007E) \|\|
	Bob Nystrom 2014/10/31 20:03:28 Nit: +2 more before "(". Nit: +2 more before "(". nweiz 2014/11/04 22:19:37 Done. Show quoted text On 2014/10/31 20:03:28, Bob Nystrom wrote: > Nit: +2 more before "(". Done.
	232 (char >= 0x000A0 && char <= 0x00D7FF) \|\|

	233 (char >= 0x0E000 && char <= 0x00FFFD) \|\|

	234 (char >= 0x10000 && char <= 0x10FFFF));

	235 }

	236 }

	237

	238 /// Whether the character at the current position is a printable character

	239 /// other than whitespace.

	240 ///

	241 /// See http://yaml.org/spec/1.2/spec.html#nb-char.

	242 bool get _isNonSpace {

	243 var char = _scanner.peekChar();

	244 return char != null && char != LF && char != CR && char != BOM && char != SP &&
	Bob Nystrom 2014/10/31 20:03:27 Long line. Long line. nweiz 2014/11/04 22:19:37 Done. Show quoted text On 2014/10/31 20:03:27, Bob Nystrom wrote: > Long line. Done.
	245 char != SP &&

	246 (char == NEL \|\|

	247 (char >= 0x00020 && char <= 0x00007E) \|\|

	248 (char >= 0x000A0 && char <= 0x00D7FF) \|\|

	249 (char >= 0x0E000 && char <= 0x00FFFD) \|\|

	250 (char >= 0x10000 && char <= 0x10FFFF));
	Bob Nystrom 2014/10/31 20:03:28 This expression is pretty huge. How about using a This expression is pretty huge. How about using a switch or some early returns like the above fns? nweiz 2014/11/04 22:19:36 Done. Show quoted text On 2014/10/31 20:03:28, Bob Nystrom wrote: > This expression is pretty huge. How about using a switch or some early returns > like the above fns? Done.
	251 }

	252

	253 /// Creates a scanner that scans [source].

	254 ///

	255 /// [sourceUrl] can be a String or a [Uri].

	256 Scanner(String source, {sourceUrl})

	257 : _scanner = new SpanScanner(source, sourceUrl: sourceUrl);

	258

	259 /// Consumes and returns the next token.

	260 Token scan() {

	261 if (_streamEndProduced) throw new StateError("Out of tokens.");

	262 if (!_tokenAvailable) _fetchMoreTokens();

	263

	264 var token = _tokens.removeFirst();

	265 _tokenAvailable = false;

	266 _tokensParsed++;

	267 _streamEndProduced = token is Token &&

	268 token.type == TokenType.STREAM_END;

	269 return token;

	270 }

	271

	272 /// Returns the next token without consuming it.

	273 Token peek() {

	274 if (_streamEndProduced) return null;

	275 if (!_tokenAvailable) _fetchMoreTokens();

	276 return _tokens.first;

	277 }

	278

	279 /// Ensures that [_tokens] contains at least one token which can be returned.

	280 void _fetchMoreTokens() {

	281 while (true) {

	282 if (_tokens.isNotEmpty) {

	283 _staleSimpleKeys();

	284 if (!_simpleKeys.any((key) =>
	Bob Nystrom 2014/10/31 20:03:28 Document this. Document this. nweiz 2014/11/04 22:19:38 Done. Show quoted text On 2014/10/31 20:03:28, Bob Nystrom wrote: > Document this. Done.
	285 key != null && key.tokenNumber == _tokensParsed)) {

	286 break;

	287 }

	288 }

	289

	290 _fetchNextToken();

	291 }

	292 _tokenAvailable = true;

	293 }

	294

	295 /// The dispatcher for token fetchers.

	296 void _fetchNextToken() {

	297 if (!_streamStartProduced) {

	298 _fetchStreamStart();

	299 return;

	300 }

	301

	302 _scanToNextToken();

	303 _staleSimpleKeys();

	304 _unrollIndent(_scanner.column);

	305

	306 if (_scanner.isDone) {

	307 _fetchStreamEnd();

	308 return;

	309 }

	310

	311 if (_scanner.column == 0) {

	312 if (_scanner.peekChar() == PERCENT) {

	313 _fetchDirective();

	314 return;

	315 } else if (_isBlankOrEndAt(3)) {
	Bob Nystrom 2014/10/31 20:03:28 Ditch the else. Ditch the else. nweiz 2014/11/04 22:19:37 Done. Show quoted text On 2014/10/31 20:03:28, Bob Nystrom wrote: > Ditch the else. Done.
	316 if (_scanner.matches('---')) {

	317 _fetchDocumentIndicator(TokenType.DOCUMENT_START);

	318 return;

	319 } else if (_scanner.matches('...')) {
	Bob Nystrom 2014/10/31 20:03:28 Here too. Here too. nweiz 2014/11/04 22:19:36 Done. Show quoted text On 2014/10/31 20:03:28, Bob Nystrom wrote: > Here too. Done.
	320 _fetchDocumentIndicator(TokenType.DOCUMENT_END);

	321 return;

	322 }

	323 }

	324 }

	325

	326 switch (_scanner.peekChar()) {

	327 case LEFT_SQUARE:

	328 _fetchFlowCollectionStart(TokenType.FLOW_SEQUENCE_START);

	329 return;
	Bob Nystrom 2014/10/31 20:03:29 Is there a reason to prefer return over break thro Is there a reason to prefer return over break throughout this? nweiz 2014/11/04 22:19:37 It allows the reader to avoid checking the end of Show quoted text On 2014/10/31 20:03:29, Bob Nystrom wrote: > Is there a reason to prefer return over break throughout this? It allows the reader to avoid checking the end of the function to see if anything else happens while following the flow.
	330 case LEFT_CURLY:

	331 _fetchFlowCollectionStart(TokenType.FLOW_MAPPING_START);

	332 return;

	333 case RIGHT_SQUARE:

	334 _fetchFlowCollectionEnd(TokenType.FLOW_SEQUENCE_END);

	335 return;

	336 case RIGHT_CURLY:

	337 _fetchFlowCollectionEnd(TokenType.FLOW_MAPPING_END);

	338 return;

	339 case COMMA:

	340 _fetchFlowEntry();

	341 return;

	342 case ASTERISK:

	343 _fetchAnchor(anchor: false);

	344 return;

	345 case AMPERSAND:

	346 _fetchAnchor(anchor: true);

	347 return;

	348 case EXCLAMATION:

	349 _fetchTag();

	350 return;

	351 case SINGLE_QUOTE:

	352 _fetchFlowScalar(singleQuote: true);

	353 return;

	354 case DOUBLE_QUOTE:

	355 _fetchFlowScalar(singleQuote: false);

	356 return;

	357 case VERTICAL_BAR:

	358 if (!_inBlockContext) _invalidScalarCharacter();

	359 _fetchBlockScalar(literal: true);

	360 return;

	361 case RIGHT_ANGLE:

	362 if (!_inBlockContext) _invalidScalarCharacter();

	363 _fetchBlockScalar(literal: false);

	364 return;

	365 case PERCENT:

	366 case AT:

	367 case GRAVE_ACCENT:

	368 _invalidScalarCharacter();

	369 return;

	370

	371 // These characters may sometimes begin plain scalars.

	372 case HYPHEN:

	373 if (_isPlainCharAt(1)) {

	374 _fetchPlainScalar();

	375 } else {

	376 _fetchBlockEntry();

	377 }

	378 return;

	379 case QUESTION:

	380 if (_isPlainCharAt(1)) {

	381 _fetchPlainScalar();

	382 } else {

	383 _fetchKey();

	384 }

	385 return;

	386 case COLON:

	387 if (!_inBlockContext && _tokens.isNotEmpty) {

	388 // If a colon follows a "JSON-like" value (an explicit map or list, or

	389 // a quoted string) it isn't required to have whitespace after it

	390 // since it unambiguously describes a map.

	391 var token = _tokens.last;

	392 if (token.type == TokenType.FLOW_SEQUENCE_END \|\|

	393 token.type == TokenType.FLOW_MAPPING_END \|\|

	394 (token.type == TokenType.SCALAR && token.style.isQuoted)) {

	395 _fetchValue();

	396 return;

	397 }

	398 }

	399

	400 if (_isPlainCharAt(1)) {

	401 _fetchPlainScalar();

	402 } else {

	403 _fetchValue();

	404 }

	405 return;

	406 default:

	407 if (!_isNonBreak) _invalidScalarCharacter();

	408

	409 _fetchPlainScalar();

	410 return;

	411 }

	412

	413 throw 'Inaccessible';

	414 }

	415

	416 /// Throws an error about a disallowed character.

	417 void _invalidScalarCharacter() =>

	418 _scanner.error("Unexpected character.", length: 1);

	419

	420 /// Checks the list of potential simple keys and remove the positions that

	421 /// cannot contain simple keys anymore.

	422 void _staleSimpleKeys() {

	423 for (var i = 0; i < _simpleKeys.length; i++) {

	424 var key = _simpleKeys[i];

	425 if (key == null) continue;

	426

	427 // libyaml requires that all simple keys be a single line and no longer

	428 // than 1024 characters. However, in section 7.4.2 of the spec

	429 // (http://yaml.org/spec/1.2/spec.html#id2790832), these restriction is
	Bob Nystrom 2014/10/31 20:03:27 "restrictions are" "restrictions are" nweiz 2014/11/04 22:19:38 Done. Show quoted text On 2014/10/31 20:03:27, Bob Nystrom wrote: > "restrictions are" Done.
	430 // only applied when the curly braces are omitted. It's difficult to

	431 // retain enough context to know which keys need to have the restriction

	432 // placed on them, so for now we go the other direction and allow

	433 // everything but multiline simple keys in a block context.

	434 if (!_inBlockContext) continue;

	435

	436 if (key.location.line == _scanner.line) continue;

	437

	438 if (key.required) {

	439 throw new YamlException("Expected ':'.", _scanner.emptySpan);

	440 }

	441

	442 _simpleKeys[i] = null;

	443 }

	444 }

	445

	446 /// Checks if a simple key may start at the current position and saves it if

	447 /// so.

	448 void _saveSimpleKey() {

	449 // A simple key is required at the current position if the scanner is in the

	450 // block context and the current column coincides with the indentation

	451 // level.

	452 var required = _inBlockContext && _indent == _scanner.column;

	453

	454 // A simple key is required only when it is the first token in the current

	455 // line. Therefore it is always allowed. But we add a check anyway.

	456 assert(_simpleKeyAllowed \|\| !required);

	457

	458 if (!_simpleKeyAllowed) return;

	459

	460 // If the current position may start a simple key, save it.

	461 _removeSimpleKey();

	462 _simpleKeys[_simpleKeys.length - 1] = new _SimpleKey(

	463 _tokensParsed + _tokens.length,

	464 _scanner.location,

	465 required: required);

	466 }

	467

	468 /// Removes a potential simple key at the current flow level.

	469 void _removeSimpleKey() {

	470 var key = _simpleKeys.last;

	471 if (key != null && key.required) {

	472 throw new YamlException("Could not find expected ':' for simple key.",

	473 key.location.pointSpan());

	474 }

	475

	476 _simpleKeys[_simpleKeys.length - 1] = null;

	477 }

	478

	479 /// Increases the flow level and resizes the simple key list.

	480 void _increaseFlowLevel() {

	481 _simpleKeys.add(null);

	482 _flowLevel++;

	483 }

	484

	485 /// Decreases the flow level.

	486 void _decreaseFlowLevel() {

	487 if (_inBlockContext) return;

	488 _simpleKeys.removeLast();

	489 _flowLevel--;

	490 }

	491

	492 /// Pushes the current indentation level to the stack and sets the new level i f
	Bob Nystrom 2014/10/31 20:03:28 Long line. Long line. nweiz 2014/11/04 22:19:36 Done. Show quoted text On 2014/10/31 20:03:28, Bob Nystrom wrote: > Long line. Done.
	493 /// [column] is greater than [_indent].

	494 ///

	495 /// In it is, appends or inserts the specified token into [_tokens]. If
	Bob Nystrom 2014/10/31 20:03:27 "it is"? "it is"? nweiz 2014/11/04 22:19:37 Done. Show quoted text On 2014/10/31 20:03:27, Bob Nystrom wrote: > "it is"? Done.
	496 /// [tokenNumber] is provided, the corresponding token will be replaced;

	497 /// otherwise, the token will be added at the end.

	498 void _rollIndent(int column, TokenType type, SourceLocation location,

	499 {int tokenNumber}) {

	500 if (!_inBlockContext) return;

	501 if (_indent != -1 && _indent >= column) return;

	502

	503 // Push the current indentation level to the stack and set the new

	504 // indentation level.

	505 _indents.add(_indent);

	506 _indent = column;

	507

	508 // Create a token and insert it into the queue.

	509 var token = new Token(type, location.pointSpan());

	510 if (tokenNumber == null) {

	511 _tokens.add(token);

	512 } else {

	513 _tokens.insert(tokenNumber - _tokensParsed, token);

	514 }

	515 }

	516

	517 /// Pops indentation levels from [_indents] until the current level becomes

	518 /// less than or equal to [column].

	519 ///

	520 /// For each indentation level, appends a [TokenType.BLOCK_END] token.

	521 void _unrollIndent(int column) {

	522 if (!_inBlockContext) return;

	523

	524 while (_indent > column) {

	525 _tokens.add(new Token(TokenType.BLOCK_END, _scanner.emptySpan));

	526 _indent = _indents.removeLast();

	527 }

	528 }

	529

	530 /// Pops indentation levels from [_indents] until the current level resets to

	531 /// -1.

	532 ///

	533 /// For each indentation level, appends a [TokenType.BLOCK_END] token.

	534 void _resetIndent() => _unrollIndent(-1);

	535

	536 /// Produces a [TokenType.STREAM_START] token.

	537 void _fetchStreamStart() {

	538 // Much of libyaml's initialization logic here is done in variable

	539 // initializers instead.

	540 _streamStartProduced = true;

	541 _tokens.add(new Token(TokenType.STREAM_START, _scanner.emptySpan));

	542 }

	543

	544 /// Produces a [TokenType.STREAM_END] token.

	545 void _fetchStreamEnd() {

	546 _resetIndent();

	547 _removeSimpleKey();

	548 _simpleKeyAllowed = false;

	549 _tokens.add(new Token(TokenType.STREAM_END, _scanner.emptySpan));

	550 }

	551

	552 /// Produces a [TokenType.VERSION_DIRECTIVE] or [TokenType.TAG_DIRECTIVE]

	553 /// token.

	554 void _fetchDirective() {

	555 _resetIndent();

	556 _removeSimpleKey();

	557 _simpleKeyAllowed = false;

	558 var directive = _scanDirective();

	559 if (directive != null) _tokens.add(directive);

	560 }

	561

	562 /// Produces a [TokenType.DOCUMENT_START] or [TokenType.DOCUMENT_END] token.

	563 void _fetchDocumentIndicator(TokenType type) {

	564 _resetIndent();

	565 _removeSimpleKey();

	566 _simpleKeyAllowed = false;
	Bob Nystrom 2014/10/31 20:03:28 Hoist these three lines into a _resetState() metho Hoist these three lines into a _resetState() method? nweiz 2014/11/04 22:19:36 I'd rather have the visual similarity with the met Show quoted text On 2014/10/31 20:03:28, Bob Nystrom wrote: > Hoist these three lines into a _resetState() method? I'd rather have the visual similarity with the methods below that don't have the exact same lines to make the contrast explicit.
	567

	568 // Consume the indicator token.

	569 var start = _scanner.state;

	570 _scanner.readChar();

	571 _scanner.readChar();

	572 _scanner.readChar();

	573

	574 _tokens.add(new Token(type, _scanner.spanFrom(start)));

	575 }

	576

	577 /// Produces a [TokenType.FLOW_SEQUENCE_START] or

	578 /// [TokenType.FLOW_MAPPING_START] token.

	579 void _fetchFlowCollectionStart(TokenType type) {

	580 _saveSimpleKey();

	581 _increaseFlowLevel();

	582 _simpleKeyAllowed = true;

	583 _addCharToken(type);

	584 }

	585

	586 /// Produces a [TokenType.FLOW_SEQUENCE_END] or [TokenType.FLOW_MAPPING_END]

	587 /// token.

	588 void _fetchFlowCollectionEnd(TokenType type) {

	589 _removeSimpleKey();

	590 _decreaseFlowLevel();

	591 _simpleKeyAllowed = false;

	592 _addCharToken(type);

	593 }

	594

	595 /// Produces a [TokenType.FLOW_ENTRY] token.

	596 void _fetchFlowEntry() {

	597 _removeSimpleKey();

	598 _simpleKeyAllowed = true;

	599 _addCharToken(TokenType.FLOW_ENTRY);

	600 }

	601

	602 /// Produces a [TokenType.BLOCK_ENTRY] token.

	603 void _fetchBlockEntry() {

	604 if (_inBlockContext) {

	605 if (!_simpleKeyAllowed) {

	606 throw new YamlException(

	607 "Block sequence entries are not allowed in this context.",
	Bob Nystrom 2014/10/31 20:03:29 Would be good to describe the context instead of j Would be good to describe the context instead of just "this". nweiz 2014/11/04 22:19:37 That's pretty tough... we'd have to track the reas Show quoted text On 2014/10/31 20:03:29, Bob Nystrom wrote: > Would be good to describe the context instead of just "this". That's pretty tough... we'd have to track the reason that [_simpleKeyAllowed] was set to false. I think the description combined with the span information should be sufficient.
	608 _scanner.emptySpan);

	609 }

	610

	611 _rollIndent(

	612 _scanner.column,

	613 TokenType.BLOCK_SEQUENCE_START,

	614 _scanner.emptySpan.start);

	615 } else {

	616 // It is an error for the '-' indicator to occur in the flow context, but

	617 // we let the Parser detect and report it because it's able to point to

	618 // the context.

	619 }

	620

	621 _removeSimpleKey();

	622 _simpleKeyAllowed = true;

	623 _addCharToken(TokenType.BLOCK_ENTRY);

	624 }

	625

	626 /// Produces the [TokenType.KEY] token.

	627 void _fetchKey() {

	628 if (_inBlockContext) {

	629 if (!_simpleKeyAllowed) {

	630 throw new YamlException("Mapping keys are not allowed in this context.",
	Bob Nystrom 2014/10/31 20:03:28 Ditto. Ditto.
	631 _scanner.emptySpan);

	632 }

	633

	634 _rollIndent(

	635 _scanner.column,

	636 TokenType.BLOCK_MAPPING_START,

	637 _scanner.emptySpan.start);

	638 }

	639

	640 // Simple keys are allowed after `?` in a block context.

	641 _simpleKeyAllowed = _inBlockContext;

	642 _addCharToken(TokenType.KEY);

	643 }

	644

	645 /// Produces the [TokenType.VALUE] token.

	646 void _fetchValue() {

	647 var simpleKey = _simpleKeys.last;

	648 if (simpleKey != null) {

	649 // Add a [TokenType.KEY] directive before the first token of the simple

	650 // key so the parser knows that it's part of a key/value pair.

	651 _tokens.insert(simpleKey.tokenNumber - _tokensParsed,

	652 new Token(TokenType.KEY, simpleKey.location.pointSpan()));

	653

	654 // In the block context, we may need to add the

	655 // [TokenType.BLOCK_MAPPING_START] token.

	656 _rollIndent(

	657 simpleKey.location.column,

	658 TokenType.BLOCK_MAPPING_START,

	659 simpleKey.location,

	660 tokenNumber: simpleKey.tokenNumber);

	661

	662 // Remove the simple key.

	663 _simpleKeys[_simpleKeys.length - 1] = null;

	664

	665 // A simple key cannot follow another simple key.

	666 _simpleKeyAllowed = false;

	667 } else if (_inBlockContext) {

	668 // If we're here, we've found the ':' indicator following a complex key.

	669

	670 if (!_simpleKeyAllowed) {

	671 throw new YamlException(

	672 "Mapping values are not allowed in this context.",

	673 _scanner.emptySpan);

	674 }

	675

	676 _rollIndent(

	677 _scanner.column,

	678 TokenType.BLOCK_MAPPING_START,

	679 _scanner.location);

	680 _simpleKeyAllowed = true;

	681 } else if (_simpleKeyAllowed) {

	682 // If we're here, we've found the ':' indicator with an empty key. This

	683 // behavior differs from libyaml, which disallows empty implicit keys.

	684 _simpleKeyAllowed = false;

	685 _addCharToken(TokenType.KEY);

	686 }

	687

	688 _addCharToken(TokenType.VALUE);

	689 }

	690

	691 /// Adds a token with [type] to [_tokens].

	692 ///

	693 /// The span of the new token is the current character.

	694 void _addCharToken(TokenType type) {

	695 var start = _scanner.state;

	696 _scanner.readChar();

	697 _tokens.add(new Token(type, _scanner.spanFrom(start)));

	698 }

	699

	700 /// Produces a [TokenType.ALIAS] or [TokenType.ANCHOR] token.

	701 void _fetchAnchor({bool anchor: true}) {

	702 _saveSimpleKey();

	703 _simpleKeyAllowed = false;

	704 _tokens.add(_scanAnchor(anchor: anchor));

	705 }

	706

	707 /// Produces a [TokenType.TAG] token.

	708 void _fetchTag() {

	709 _saveSimpleKey();

	710 _simpleKeyAllowed = false;

	711 _tokens.add(_scanTag());

	712 }

	713

	714 /// Produces a [TokenType.SCALAR] token with style [ScalarStyle.LITERAL] or

	715 /// [ScalarStyle.FOLDED].

	716 void _fetchBlockScalar({bool literal: false}) {

	717 _removeSimpleKey();

	718 _simpleKeyAllowed = true;

	719 _tokens.add(_scanBlockScalar(literal: literal));

	720 }

	721

	722 /// Produces a [TokenType.SCALAR] token with style [ScalarStyle.SINGLE_QUOTED]

	723 /// or [ScalarStyle.DOUBLE_QUOTED].

	724 void _fetchFlowScalar({bool singleQuote: false}) {

	725 _saveSimpleKey();

	726 _simpleKeyAllowed = false;

	727 _tokens.add(_scanFlowScalar(singleQuote: singleQuote));

	728 }

	729

	730 /// Produces a [TokenType.SCALAR] token with style [ScalarStyle.PLAIN].

	731 void _fetchPlainScalar() {

	732 _saveSimpleKey();

	733 _simpleKeyAllowed = false;

	734 _tokens.add(_scanPlainScalar());

	735 }

	736

	737 /// Eats whitespace and comments until the next token is found.

	738 void _scanToNextToken() {

	739 var afterLineBreak = false;

	740 while (true) {

	741 // Allow the BOM to start a line.

	742 if (_scanner.column == 0) _scanner.scan("\uFEFF");

	743

	744 // Eat whitespace.

	745 //

	746 // libyaml disallows tabs after "-", "?", or ":", but the spec allows

	747 // them. See section 6.2: http://yaml.org/spec/1.2/spec.html#id2778241.

	748 while (_scanner.peekChar() == SP \|\|

	749 ((!_inBlockContext \|\| !afterLineBreak) &&

	750 _scanner.peekChar() == TAB)) {

	751 _scanner.readChar();

	752 }

	753

	754 if (_scanner.peekChar() == TAB) {

	755 _scanner.error("Tab characters are not allowed as indentation.",

	756 length: 1);

	757 }

	758

	759 // Eat a comment until a line break.

	760 if (_scanner.peekChar() == HASH) {

	761 while (!_isBreakOrEnd) {

	762 _scanner.readChar();

	763 }

	764 }

	765

	766 // If we're at a line break, eat it.

	767 if (_isBreak) {

	768 _skipLine();

	769

	770 // In the block context, a new line may start a simple key.

	771 if (_inBlockContext) _simpleKeyAllowed = true;

	772 afterLineBreak = true;

	773 } else {

	774 // Otherwise we've found a token.

	775 break;

	776 }

	777 }

	778 }

	779

	780 /// Scans a [TokenType.YAML_DIRECTIVE] or [TokenType.TAG_DIRECTIVE] token.

	781 ///

	782 /// %YAML 1.2 # a comment \n

	783 /// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

	784 /// %TAG !yaml! tag:yaml.org,2002: \n

	785 /// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

	786 Token _scanDirective() {

	787 var start = _scanner.state;

	788

	789 // Eat '%'.

	790 _scanner.readChar();

	791

	792 var token;

	793 var name = _scanDirectiveName();

	794 if (name == "YAML") {

	795 token = _scanVersionDirectiveValue(start);

	796 } else if (name == "TAG") {

	797 token = _scanTagDirectiveValue(start);

	798 } else {

	799 warn("Warning: unknown directive.", _scanner.spanFrom(start));
	Bob Nystrom 2014/10/31 20:03:27 I don't think the parser should output directly to I don't think the parser should output directly to the terminal. How about having this invoke a user-provided callback or at least be configurable in some way? nweiz 2014/11/04 22:19:37 Done. I wish there were a more standard way to do Show quoted text On 2014/10/31 20:03:27, Bob Nystrom wrote: > I don't think the parser should output directly to the terminal. How about > having this invoke a user-provided callback or at least be configurable in some > way? Done. I wish there were a more standard way to do this.
	800

	801 // libyaml doesn't support unknown directives, but the spec says to ignore

	802 // them and warn: http://yaml.org/spec/1.2/spec.html#id2781147.

	803 while (!_isBreakOrEnd) {

	804 _scanner.readChar();

	805 }

	806

	807 return null;

	808 }

	809

	810 // Eat the rest of the line, including any comments.

	811 while (_isBlank) {

	812 _scanner.readChar();

	813 }
	Bob Nystrom 2014/10/31 20:03:28 Make a _skipBlanks() method for this since you do Make a _skipBlanks() method for this since you do it pretty frequently. nweiz 2014/11/04 22:19:37 Done. Show quoted text On 2014/10/31 20:03:28, Bob Nystrom wrote: > Make a _skipBlanks() method for this since you do it pretty frequently. Done.
	814

	815 if (_scanner.peekChar() == HASH) {

	816 while (!_isBreakOrEnd) {

	817 _scanner.readChar();

	818 }

	819 }
	Bob Nystrom 2014/10/31 20:03:28 Probably this too. Probably this too. nweiz 2014/11/04 22:19:36 Done. Show quoted text On 2014/10/31 20:03:28, Bob Nystrom wrote: > Probably this too. Done.
	820

	821 if (!_isBreakOrEnd) {

	822 throw new YamlException(

	823 "Expected comment or line break after directive.",

	824 _scanner.spanFrom(start));

	825 }

	826

	827 if (_isBreak) _skipLine();
	Bob Nystrom 2014/10/31 20:03:27 Do you need to check _isBreak here? Doesn't _skipL Do you need to check _isBreak here? Doesn't _skipLine() do that? nweiz 2014/11/04 22:19:36 Done. Show quoted text On 2014/10/31 20:03:27, Bob Nystrom wrote: > Do you need to check _isBreak here? Doesn't _skipLine() do that? Done.
	828 return token;

	829 }

	830

	831 /// Scans a directive name.

	832 ///

	833 /// %YAML 1.2 # a comment \n

	834 /// ^^^^

	835 /// %TAG !yaml! tag:yaml.org,2002: \n

	836 /// ^^^

	837 String _scanDirectiveName() {

	838 var buffer = new StringBuffer();

	839 // libyaml only allows word characters in directive names, but the spec

	840 // disagrees: http://yaml.org/spec/1.2/spec.html#ns-directive-name.

	841 while (_isNonSpace) {

	842 buffer.writeCharCode(_scanner.readChar());
	Bob Nystrom 2014/10/31 20:03:29 This seems inefficient. Can you just get a substri This seems inefficient. Can you just get a substring directly from the _scanner? nweiz 2014/11/04 22:19:36 Done. Show quoted text On 2014/10/31 20:03:29, Bob Nystrom wrote: > This seems inefficient. Can you just get a substring directly from the _scanner? Done.
	843 }

	844

	845 var name = buffer.toString();

	846 if (name.isEmpty) {

	847 throw new YamlException("Expected directive name.", _scanner.emptySpan);

	848 } else if (!_isBlankOrEnd) {
	Bob Nystrom 2014/10/31 20:03:28 What about: %YAML#Comment. I'd expect this to be What about: %YAML#Comment. I'd expect this to be a parse error, but not necessarily a scanner error. Maybe just look for a hash and throw a different error? nweiz 2014/11/04 22:19:36 I don't think that's a likely enough error to warr Show quoted text On 2014/10/31 20:03:28, Bob Nystrom wrote: > What about: > > %YAML#Comment. > > I'd expect this to be a parse error, but not necessarily a scanner error. Maybe > just look for a hash and throw a different error? I don't think that's a likely enough error to warrant special-casing the message.
	849 throw new YamlException(

	850 "Unexpected character in directive name.", _scanner.emptySpan);

	851 }

	852

	853 return name;

	854 }

	855

	856 /// Scans the value of a version directive.

	857 ///

	858 /// %YAML 1.2 # a comment \n

	859 /// ^^^^^^

	860 Token _scanVersionDirectiveValue(LineScannerState start) {

	861 while (_isBlank) {

	862 _scanner.readChar();

	863 }

	864

	865 var major = _scanVersionDirectiveNumber();

	866 _scanner.expect('.');

	867 var minor = _scanVersionDirectiveNumber();

	868

	869 return new VersionDirectiveToken(_scanner.spanFrom(start), major, minor);

	870 }

	871

	872 /// Scans the version number of a version directive.

	873 ///

	874 /// %YAML 1.2 # a comment \n

	875 /// ^

	876 /// %YAML 1.2 # a comment \n

	877 /// ^

	878 int _scanVersionDirectiveNumber() {

	879 var buffer = new StringBuffer();

	880 while (_isDigit) {

	881 buffer.writeCharCode(_scanner.readChar());

	882 }

	883

	884 var number = buffer.toString();

	885 if (number.isEmpty) {

	886 throw new YamlException("Expected version number.", _scanner.emptySpan);

	887 }

	888

	889 return int.parse(number);

	890 }

	891

	892 /// Scans the value of a tag directive.

	893 ///

	894 /// %TAG !yaml! tag:yaml.org,2002: \n

	895 /// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

	896 Token _scanTagDirectiveValue(LineScannerState start) {

	897 while (_isBlank) {

	898 _scanner.readChar();

	899 }

	900

	901 var handle = _scanTagHandle(directive: true);

	902 if (!_isBlank) {

	903 throw new YamlException("Expected whitespace.", _scanner.emptySpan);

	904 }

	905

	906 while (_isBlank) {

	907 _scanner.readChar();

	908 }

	909

	910 var prefix = _scanTagUri();

	911 if (!_isBlankOrEnd) {

	912 throw new YamlException("Expected whitespace.", _scanner.emptySpan);

	913 }

	914

	915 return new TagDirectiveToken(_scanner.spanFrom(start), handle, prefix);

	916 }

	917

	918 /// Scans a [TokenType.ANCHOR] token.

	919 Token _scanAnchor({bool anchor: true}) {

	920 var start = _scanner.state;

	921

	922 // Eat the indicator character.

	923 _scanner.readChar();

	924

	925 var buffer = new StringBuffer();

	926 // libyaml only allows word characters in anchor names, but the spec

	927 // disagrees: http://yaml.org/spec/1.2/spec.html#ns-anchor-char.

	928 while (_isAnchorChar) {

	929 buffer.writeCharCode(_scanner.readChar());

	930 }

	931

	932 var next = _scanner.peekChar();

	933 if (buffer.length == 0 \|\|

	934 (!_isBlankOrEnd && next != QUESTION && next != COLON &&

	935 next != COMMA && next != RIGHT_SQUARE && next != RIGHT_CURLY &&

	936 next != PERCENT && next != AT && next != GRAVE_ACCENT)) {
	Bob Nystrom 2014/10/31 20:03:29 What are these specific character tests for? What are these specific character tests for? nweiz 2014/11/04 22:19:37 They check whether the anchor is followed by some Show quoted text On 2014/10/31 20:03:29, Bob Nystrom wrote: > What are these specific character tests for? They check whether the anchor is followed by some sort of flow indicator, which is allowed (it indicates that the anchor is anchoring an empty scalar). Otherwise, it's followed by a character that's invalid when adjacent to an anchor—possibly an invalid text character, possibly something like "&anchor[]".
	937 throw new YamlException("Expected alphanumeric character.",

	938 _scanner.emptySpan);

	939 }

	940

	941 if (anchor) {

	942 return new AnchorToken(_scanner.spanFrom(start), buffer.toString());

	943 } else {

	944 return new AliasToken(_scanner.spanFrom(start), buffer.toString());

	945 }

	946 }

	947

	948 /// Scans a [TokenType.TAG] token.

	949 Token _scanTag() {

	950 var handle;

	951 var suffix;

	952 var start = _scanner.state;

	953

	954 // Check if the tag is in the canonical form.

	955 if (_scanner.peekChar(1) == LEFT_ANGLE) {
	Bob Nystrom 2014/10/31 20:03:29 Does this fail on "!" (a bang by itself)? Does this fail on "!" (a bang by itself)? nweiz 2014/11/04 22:19:36 No; [peekChar] returns null for out-of-range indic Show quoted text On 2014/10/31 20:03:29, Bob Nystrom wrote: > Does this fail on "!" (a bang by itself)? No; [peekChar] returns null for out-of-range indices for just this reason.
	956 // Eat '!<'.

	957 _scanner.readChar();

	958 _scanner.readChar();

	959

	960 handle = '';

	961 suffix = _scanTagUri();

	962

	963 _scanner.expect('>');

	964 } else {

	965 // The tag has either the '!suffix' or the '!handle!suffix' form.

	966

	967 // First, try to scan a handle.

	968 handle = _scanTagHandle();

	969

	970 if (handle.length > 1 && handle.startsWith('!') && handle.endsWith('!')) {

	971 suffix = _scanTagUri(flowSeparators: false);

	972 } else {

	973 suffix = _scanTagUri(head: handle, flowSeparators: false);

	974

	975 // There was no explicit handle.

	976 if (suffix.isEmpty) {

	977 // This is the special '!' tag.

	978 handle = null;

	979 suffix = '!';

	980 } else {

	981 handle = '!';

	982 }

	983 }

	984 }

	985

	986 // libyaml insists on whitespace after a tag, but example 7.2 indicates

	987 // that it's not required: http://yaml.org/spec/1.2/spec.html#id2786720.

	988

	989 return new TagToken(_scanner.spanFrom(start), handle, suffix);

	990 }

	991

	992 /// Scans a tag handle.

	993 String _scanTagHandle({bool directive: false}) {

	994 _scanner.expect('!');

	995

	996 var buffer = new StringBuffer('!');

	997

	998 // libyaml only allows word characters in tags, but the spec disagrees:

	999 // http://yaml.org/spec/1.2/spec.html#ns-tag-char.

	1000 while (_isTagChar) {

	1001 buffer.writeCharCode(_scanner.readChar());

	1002 }

	1003

	1004 if (_scanner.peekChar() == EXCLAMATION) {

	1005 buffer.writeCharCode(_scanner.readChar());

	1006 } else {

	1007 // It's either the '!' tag or not really a tag handle. If it's a %TAG

	1008 // directive, it's an error. If it's a tag token, it must be part of a

	1009 // URI.

	1010 if (directive && buffer.toString() != '!') _scanner.expect('!');

	1011 }

	1012

	1013 return buffer.toString();

	1014 }

	1015

	1016 /// Scans a tag URI.

	1017 ///

	1018 /// [head] is the initial portion of the tag that's already been scanned.

	1019 /// [flowSeparators] indicates whether the tag URI can contain flow

	1020 /// separators.

	1021 String _scanTagUri({String head, bool flowSeparators: true}) {

	1022 var length = head == null ? 0 : head.length;

	1023 var buffer = new StringBuffer();

	1024

	1025 // Copy the head if needed.

	1026 //

	1027 // Note that we don't copy the leading '!' character.

	1028 if (length > 1) buffer.write(head.substring(1));

	1029

	1030 // The set of characters that may appear in URI is as follows:

	1031 //

	1032 // '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',

	1033 // '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',

	1034 // '%'.

	1035 //

	1036 // In a shorthand tag annotation, the flow separators ',', '[', and ']' are

	1037 // disallowed.

	1038 var char = _scanner.peekChar();

	1039 while (_isTagChar \|\| (flowSeparators &&

	1040 (char == COMMA \|\| char == LEFT_SQUARE \|\| char == RIGHT_SQUARE))) {

	1041 buffer.writeCharCode(_scanner.readChar());

	1042 char = _scanner.peekChar();

	1043 }

	1044

	1045 // libyaml manually decodes the URL, but we don't have to do that.

	1046 return Uri.decodeFull(buffer.toString());

	1047 }

	1048

	1049 /// Scans a block scalar.

	1050 Token _scanBlockScalar({bool literal: false}) {

	1051 var start = _scanner.state;

	1052

	1053 // Eat the indicator '\|' or '>'.

	1054 _scanner.readChar();

	1055

	1056 // Check for a chomping indicator.

	1057 var chomping = _Chomping.CLIP;

	1058 var increment = 0;

	1059 var char = _scanner.peekChar();

	1060 if (char == PLUS \|\| char == HYPHEN) {

	1061 chomping = char == PLUS ? _Chomping.KEEP : _Chomping.STRIP;

	1062 _scanner.readChar();

	1063

	1064 // Check for an indentation indicator.

	1065 if (_isDigit) {

	1066 // Check that the indentation is greater than 0.

	1067 if (_scanner.peekChar() == 0) {
	Bob Nystrom 2014/10/31 20:03:29 NUMBER_0? NUMBER_0? nweiz 2014/11/04 22:19:38 Done. Show quoted text On 2014/10/31 20:03:29, Bob Nystrom wrote: > NUMBER_0? Done.
	1068 throw new YamlException(

	1069 "0 may not be used as an indentation indicator.",

	1070 _scanner.spanFrom(start));

	1071 }

	1072

	1073 increment = _scanner.readChar() - NUMBER_0;

	1074 }

	1075 } else if (_isDigit) {

	1076 // Do the same as above, but in the opposite order.

	1077 if (_scanner.peekChar() == 0) {
	Bob Nystrom 2014/10/31 20:03:29 Ditto. Ditto. nweiz 2014/11/04 22:19:36 Done. Show quoted text On 2014/10/31 20:03:29, Bob Nystrom wrote: > Ditto. Done.
	1078 throw new YamlException(

	1079 "0 may not be used as an indentation indicator.",

	1080 _scanner.spanFrom(start));

	1081 }

	1082

	1083 increment = _scanner.readChar() - NUMBER_0;

	1084

	1085 char = _scanner.peekChar();

	1086 if (char == PLUS \|\| char == HYPHEN) {

	1087 chomping = char == PLUS ? _Chomping.KEEP : _Chomping.STRIP;

	1088 _scanner.readChar();

	1089 }

	1090 }

	1091

	1092 // Eat whitespace and comments to the end of the line.

	1093 while (_isBlank) {

	1094 _scanner.readChar();

	1095 }

	1096

	1097 if (_scanner.peekChar() == HASH) {

	1098 while (!_isBreakOrEnd) {

	1099 _scanner.readChar();

	1100 }

	1101 }

	1102

	1103 // Check if we're at the end of the line.

	1104 if (!_isBreakOrEnd) {

	1105 throw new YamlException("Expected comment or line break.",

	1106 _scanner.emptySpan);

	1107 }

	1108

	1109 if (_isBreak) _skipLine();

	1110

	1111 var indent = 0;
	Bob Nystrom 2014/10/31 20:03:28 Document this little block. Document this little block. nweiz 2014/11/04 22:19:37 Done. Show quoted text On 2014/10/31 20:03:28, Bob Nystrom wrote: > Document this little block. Done.
	1112 if (increment != 0) {

	1113 indent = _indent >= 0 ? _indent + increment : increment;

	1114 }

	1115

	1116 // Scan the leading line breaks to determine the indentation level if

	1117 // needed.

	1118 var pair = _scanBlockScalarBreaks(indent);

	1119 indent = pair.first;

	1120 var trailingBreaks = pair.last;

	1121

	1122 // Scan the block scalar contents.

	1123 var buffer = new StringBuffer();

	1124 var leadingBreak = '';

	1125 var leadingBlank = false;

	1126 var trailingBlank = false;

	1127 while (_scanner.column == indent && !_scanner.isDone) {

	1128 // Check for a document indicator. libyaml doesn't do this, but the spec

	1129 // mandates it. See example 9.5:

	1130 // http://yaml.org/spec/1.2/spec.html#id2801606.

	1131 if (_scanner.column == 0 && _isBlankOrEndAt(3) &&

	1132 (_scanner.matches('---') \|\| _scanner.matches('...'))) {

	1133 break;

	1134 }

	1135

	1136 // We are at the beginning of a non-empty line.

	1137

	1138 // Is there trailing whitespace?

	1139 trailingBlank = _isBlank;

	1140

	1141 // Check if we need to fold the leading line break.

	1142 if (!literal && leadingBreak.isNotEmpty && !leadingBlank &&

	1143 !trailingBlank) {

	1144 // Do we need to join the lines with a space?

	1145 if (trailingBreaks.isEmpty) buffer.writeCharCode(SP);

	1146 leadingBreak = '';
	Bob Nystrom 2014/10/31 20:03:29 Move this after the if. Move this after the if. nweiz 2014/11/04 22:19:38 Done. Show quoted text On 2014/10/31 20:03:29, Bob Nystrom wrote: > Move this after the if. Done.
	1147 } else {

	1148 buffer.write(leadingBreak);

	1149 leadingBreak = '';

	1150 }

	1151

	1152 // Append the remaining line breaks.

	1153 buffer.write(trailingBreaks);

	1154

	1155 // Is there leading whitespace?

	1156 leadingBlank = _isBlank;

	1157

	1158 while (!_isBreakOrEnd) {

	1159 buffer.writeCharCode(_scanner.readChar());

	1160 }

	1161

	1162 // libyaml always reads a line here, but this breaks on block scalars at

	1163 // the end of the document that end without newlines. See example 8.1:

	1164 // http://yaml.org/spec/1.2/spec.html#id2793888.

	1165 if (!_scanner.isDone) leadingBreak = _readLine();

	1166

	1167 // Eat the following indentation and spaces.

	1168 var pair = _scanBlockScalarBreaks(indent);

	1169 indent = pair.first;

	1170 trailingBreaks = pair.last;

	1171 }

	1172

	1173 // Chomp the tail.

	1174 if (chomping != _Chomping.STRIP) {
	Bob Nystrom 2014/10/31 20:03:28 Nit, but maybe make these single-line ifs? Nit, but maybe make these single-line ifs? nweiz 2014/11/04 22:19:37 Done. Show quoted text On 2014/10/31 20:03:28, Bob Nystrom wrote: > Nit, but maybe make these single-line ifs? Done.
	1175 buffer.write(leadingBreak);

	1176 }

	1177 if (chomping == _Chomping.KEEP) {

	1178 buffer.write(trailingBreaks);

	1179 }

	1180

	1181 return new ScalarToken(_scanner.spanFrom(start), buffer.toString(),

	1182 literal ? ScalarStyle.LITERAL : ScalarStyle.FOLDED);

	1183 }

	1184

	1185 /// Scans indentation spaces and line breaks for a block scalar.

	1186 ///

	1187 /// Determines the intendation level if needed. Returns the new indentation

	1188 /// level and the text of the line breaks.

	1189 Pair<int, String> _scanBlockScalarBreaks(int indent) {

	1190 var maxIndent = 0;

	1191 var breaks = new StringBuffer();

	1192

	1193 while (true) {

	1194 while ((indent == 0 \|\| _scanner.column < indent) &&

	1195 _scanner.peekChar() == SP) {

	1196 _scanner.readChar();

	1197 }

	1198

	1199 if (_scanner.column > maxIndent) maxIndent = _scanner.column;

	1200

	1201 // libyaml throws an error here if a tab character is detected, but the

	1202 // spec treats tabs like any other non-space character. See example 8.2:

	1203 // http://yaml.org/spec/1.2/spec.html#id2794311.

	1204

	1205 if (!_isBreak) break;

	1206 breaks.write(_readLine());

	1207 }

	1208

	1209 if (indent == 0) {

	1210 indent = maxIndent;

	1211 if (indent < _indent + 1) indent = _indent + 1;

	1212

	1213 // libyaml forces indent to be at least 1 here, but that doesn't seem to

	1214 // be supported by the spec.

	1215 }

	1216

	1217 return new Pair(indent, breaks.toString());

	1218 }

	1219

	1220 // Scans a quoted scalar.

	1221 Token _scanFlowScalar({bool singleQuote: false}) {

	1222 var start = _scanner.state;

	1223 var buffer = new StringBuffer();

	1224

	1225 // Eat the left quote.

	1226 _scanner.readChar();

	1227

	1228 while (true) {

	1229 // Check that there are no document indicators at the beginning of the

	1230 // line.

	1231 if (_scanner.column == 0 && _isBlankOrEndAt(3) &&

	1232 (_scanner.scan("---") \|\| _scanner.scan("..."))) {

	1233 _scanner.error("Unexpected document indicator.");

	1234 }
	Bob Nystrom 2014/10/31 20:03:28 Hoist this out into a function? Hoist this out into a function? nweiz 2014/11/04 22:19:36 Done. Show quoted text On 2014/10/31 20:03:28, Bob Nystrom wrote: > Hoist this out into a function? Done.
	1235

	1236 if (_scanner.isDone) {

	1237 throw new YamlException("Unexpected end of file.", _scanner.emptySpan);

	1238 }

	1239

	1240 var leadingBlanks = false;

	1241 while (!_isBlankOrEnd) {

	1242 var char = _scanner.peekChar();

	1243 if (singleQuote && char == SINGLE_QUOTE &&

	1244 _scanner.peekChar(1) == SINGLE_QUOTE) {

	1245 // An escaped single quote.

	1246 _scanner.readChar();

	1247 _scanner.readChar();

	1248 buffer.writeCharCode(SINGLE_QUOTE);

	1249 } else if (char == (singleQuote ? SINGLE_QUOTE : DOUBLE_QUOTE)) {

	1250 // The closing quote.

	1251 break;

	1252 } else if (!singleQuote && char == BACKSLASH && _isBreakAt(1)) {

	1253 // An escaped newline.

	1254 _scanner.readChar();

	1255 _skipLine();

	1256 leadingBlanks = true;

	1257 break;

	1258 } else if (!singleQuote && char == BACKSLASH) {

	1259 var escapeStart = _scanner.state;

	1260

	1261 // An escape sequence.

	1262 var codeLength = null;

	1263 switch (_scanner.peekChar(1)) {

	1264 case NUMBER_0:

	1265 buffer.writeCharCode(NULL);

	1266 break;

	1267 case LETTER_A:

	1268 buffer.writeCharCode(BELL);

	1269 break;

	1270 case LETTER_B:

	1271 buffer.writeCharCode(BACKSPACE);

	1272 break;

	1273 case LETTER_T:

	1274 case TAB:
	Bob Nystrom 2014/10/31 20:03:29 Oh, YAML. You so crazy. Oh, YAML. You so crazy.
	1275 buffer.writeCharCode(TAB);

	1276 break;

	1277 case LETTER_N:

	1278 buffer.writeCharCode(LF);

	1279 break;

	1280 case LETTER_V:

	1281 buffer.writeCharCode(VERTICAL_TAB);

	1282 break;

	1283 case LETTER_F:

	1284 buffer.writeCharCode(FORM_FEED);

	1285 break;

	1286 case LETTER_R:

	1287 buffer.writeCharCode(CR);

	1288 break;

	1289 case LETTER_E:

	1290 buffer.writeCharCode(ESCAPE);

	1291 break;

	1292 case SP:

	1293 case DOUBLE_QUOTE:

	1294 case SLASH:

	1295 case BACKSLASH:

	1296 // libyaml doesn't support an escaped forward slash, but it was

	1297 // added in YAML 1.2. See section 5.7:

	1298 // http://yaml.org/spec/1.2/spec.html#id2776092

	1299 buffer.writeCharCode(_scanner.peekChar(1));

	1300 break;

	1301 case LETTER_CAP_N:

	1302 buffer.writeCharCode(NEL);

	1303 break;

	1304 case UNDERSCORE:

	1305 buffer.writeCharCode(NBSP);

	1306 break;

	1307 case LETTER_CAP_L:

	1308 buffer.writeCharCode(LINE_SEPARATOR);

	1309 break;

	1310 case LETTER_CAP_P:

	1311 buffer.writeCharCode(PARAGRAPH_SEPARATOR);

	1312 break;

	1313 case LETTER_X:

	1314 codeLength = 2;

	1315 break;

	1316 case LETTER_U:

	1317 codeLength = 4;

	1318 break;

	1319 case LETTER_CAP_U:

	1320 codeLength = 8;

	1321 break;

	1322 default:

	1323 throw new YamlException("Unknown escape character.",

	1324 _scanner.spanFrom(escapeStart));

	1325 }

	1326

	1327 _scanner.readChar();

	1328 _scanner.readChar();

	1329

	1330 if (codeLength != null) {

	1331 var value = 0;

	1332 for (var i = 0; i < codeLength; i++) {

	1333 if (!_isHex) {

	1334 _scanner.readChar();

	1335 throw new YamlException(

	1336 "Expected $codeLength-digit hexidecimal number.",

	1337 _scanner.spanFrom(escapeStart));

	1338 }

	1339

	1340 value = (value << 4) + _asHex(_scanner.readChar());

	1341 }

	1342

	1343 // Check the value and write the character.

	1344 if ((value >= 0xD800 && value <= 0xDFFF) \|\| value > 0x10FFFF) {

	1345 throw new YamlException(

	1346 "Invalid Unicode character escape code.",

	1347 _scanner.spanFrom(escapeStart));

	1348 }

	1349

	1350 buffer.writeCharCode(value);

	1351 }

	1352 } else {

	1353 buffer.writeCharCode(_scanner.readChar());

	1354 }

	1355 }

	1356

	1357 // Check if we're at the end of a scalar.

	1358 if (_scanner.peekChar() == (singleQuote ? SINGLE_QUOTE : DOUBLE_QUOTE)) {

	1359 break;

	1360 }

	1361

	1362 var whitespace = new StringBuffer();

	1363 var leadingBreak = '';

	1364 var trailingBreaks = new StringBuffer();

	1365 while (_isBlank \|\| _isBreak) {

	1366 if (_isBlank) {

	1367 // Consume a space or a tab.

	1368 if (!leadingBlanks) {

	1369 whitespace.writeCharCode(_scanner.readChar());

	1370 } else {

	1371 _scanner.readChar();

	1372 }

	1373 } else {

	1374 // Check if it's a first line break.

	1375 if (!leadingBlanks) {

	1376 whitespace.clear();

	1377 leadingBreak = _readLine();

	1378 leadingBlanks = true;

	1379 } else {

	1380 trailingBreaks.write(_readLine());

	1381 }

	1382 }

	1383 }

	1384

	1385 // Join the whitespace or fold line breaks.

	1386 if (leadingBlanks) {

	1387 if (leadingBreak.isNotEmpty && trailingBreaks.isEmpty) {

	1388 buffer.writeCharCode(SP);

	1389 } else {

	1390 buffer.write(trailingBreaks);

	1391 }

	1392 } else {

	1393 buffer.write(whitespace);

	1394 whitespace.clear();

	1395 }

	1396 }

	1397

	1398 // Eat the right quote.

	1399 _scanner.readChar();

	1400

	1401 return new ScalarToken(_scanner.spanFrom(start), buffer.toString(),

	1402 singleQuote ? ScalarStyle.SINGLE_QUOTED : ScalarStyle.DOUBLE_QUOTED);

	1403 }

	1404

	1405 /// Scans a plain scalar.

	1406 Token _scanPlainScalar() {

	1407 var start = _scanner.state;

	1408 var buffer = new StringBuffer();

	1409 var leadingBreak = '';

	1410 var trailingBreaks = '';

	1411 var whitespace = new StringBuffer();

	1412 var indent = _indent + 1;

	1413

	1414 while (true) {

	1415 // Check for a document indicator.

	1416 if (_scanner.column == 0 && _isBlankOrEndAt(3) &&

	1417 (_scanner.matches('---') \|\| _scanner.matches('...'))) {

	1418 break;

	1419 }

	1420

	1421 // Check for a comment.

	1422 if (_scanner.peekChar() == HASH) break;

	1423

	1424 if (_isPlainChar) {

	1425 // Join the whitespace or fold line breaks.

	1426 if (leadingBreak.isNotEmpty) {

	1427 if (trailingBreaks.isEmpty) {

	1428 buffer.writeCharCode(SP);

	1429 } else {

	1430 buffer.write(trailingBreaks);

	1431 }

	1432 leadingBreak = '';

	1433 trailingBreaks = '';

	1434 } else {

	1435 buffer.write(whitespace);

	1436 whitespace.clear();

	1437 }

	1438 }

	1439

	1440 // libyaml's notion of valid identifiers differs substantially from YAML

	1441 // 1.2's. We use [_isPlainChar] instead of libyaml's character here.

	1442 while (_isPlainChar) {

	1443 buffer.writeCharCode(_scanner.readChar());

	1444 }

	1445

	1446 // Is it the end?

	1447 if (!_isBlank && !_isBreak) break;

	1448

	1449 while (_isBlank \|\| _isBreak) {

	1450 if (_isBlank) {

	1451 // Check for a tab character messing up the intendation.

	1452 if (leadingBreak.isNotEmpty && _scanner.column < indent &&

	1453 _scanner.peekChar() == TAB) {

	1454 _scanner.error("Expected a space but found a tab.", length: 1);

	1455 }

	1456

	1457 if (leadingBreak.isEmpty) {

	1458 whitespace.writeCharCode(_scanner.readChar());

	1459 } else {

	1460 _scanner.readChar();

	1461 }

	1462 } else {

	1463 // Check if it's a first line break.

	1464 if (leadingBreak.isEmpty) {

	1465 leadingBreak = _readLine();

	1466 whitespace.clear();

	1467 } else {

	1468 trailingBreaks = _readLine();

	1469 }

	1470 }

	1471 }

	1472

	1473 // Check the indentation level.

	1474 if (_inBlockContext && _scanner.column < indent) break;

	1475 }

	1476

	1477 // Allow a simple key after a plain scalar with leading blanks.

	1478 if (leadingBreak.isNotEmpty) _simpleKeyAllowed = true;

	1479

	1480 return new ScalarToken(_scanner.spanFrom(start), buffer.toString(),

	1481 ScalarStyle.PLAIN);

	1482 }

	1483

	1484 /// Moves past the current line break, if there is one.

	1485 void _skipLine() {

	1486 var char = _scanner.peekChar();

	1487 if (char != CR && char != LF) return;

	1488 _scanner.readChar();

	1489 if (char == CR && _scanner.peekChar() == LF) _scanner.readChar();

	1490 }

	1491

	1492 // Moves past the current line break and returns a newline.

	1493 String _readLine() {

	1494 var char = _scanner.peekChar();

	1495

	1496 // libyaml supports NEL, PS, and LS characters as line separators, but this

	1497 // is explicitly forbidden in section 5.4 of the YAML spec.

	1498 if (char != CR && char != LF) {

	1499 throw new YamlException("Expected newline.", _scanner.emptySpan);

	1500 }

	1501

	1502 _scanner.readChar();

	1503 // CR LF \| CR \| LF -> LF

	1504 if (char == CR && _scanner.peekChar() == LF) _scanner.readChar();

	1505 return "\n";

	1506 }

	1507

	1508 // Returns whether the character at [offset] is whitespace.

	1509 bool _isBlankAt(int offset) {

	1510 var char = _scanner.peekChar(offset);

	1511 return char == SP \|\| char == TAB;

	1512 }

	1513

	1514 // Returns whether the character at [offset] is a line break.

	1515 bool _isBreakAt(int offset) {

	1516 // Libyaml considers NEL, LS, and PS to be line breaks as well, but that's

	1517 // contrary to the spec.

	1518 var char = _scanner.peekChar(offset);

	1519 return char == CR \|\| char == LF;

	1520 }

	1521

	1522 // Returns whether the character at [offset] is whitespace or past the end of

	1523 // the source.

	1524 bool _isBlankOrEndAt(int offset) {

	1525 var char = _scanner.peekChar(offset);

	1526 return char == null \|\| char == SP \|\| char == TAB \|\| char == CR \|\|

	1527 char == LF;

	1528 }

	1529

	1530 /// Returns whether the character at [offset] is a plain character.

	1531 ///

	1532 /// See http://yaml.org/spec/1.2/spec.html#ns-plain-char(c).

	1533 bool _isPlainCharAt(int offset) {

	1534 switch (_scanner.peekChar(offset)) {

	1535 case COLON:

	1536 return _isPlainSafeAt(offset + 1);

	1537 case HASH:

	1538 var previous = _scanner.peekChar(offset - 1);

	1539 return previous != SP && previous != TAB;

	1540 default:

	1541 return _isPlainSafeAt(offset);

	1542 }

	1543 }

	1544

	1545 /// Returns whether the character at [offset] is a plain-safe character.

	1546 ///

	1547 /// See http://yaml.org/spec/1.2/spec.html#ns-plain-safe(c).

	1548 bool _isPlainSafeAt(int offset) {

	1549 var char = _scanner.peekChar(offset);

	1550 switch (char) {

	1551 case COMMA:

	1552 case LEFT_SQUARE:

	1553 case RIGHT_SQUARE:

	1554 case LEFT_CURLY:

	1555 case RIGHT_CURLY:

	1556 // These characters are delimiters in a flow context and thus are only

	1557 // safe in a block context.

	1558 return _inBlockContext;

	1559 case SP:

	1560 case TAB:

	1561 case LF:

	1562 case CR:

	1563 case BOM:

	1564 return false;

	1565 case NEL:

	1566 return true;

	1567 default:

	1568 return char != null &&

	1569 ((char >= 0x00020 && char <= 0x00007E) \|\|

	1570 (char >= 0x000A0 && char <= 0x00D7FF) \|\|

	1571 (char >= 0x0E000 && char <= 0x00FFFD) \|\|

	1572 (char >= 0x10000 && char <= 0x10FFFF));

	1573 }

	1574 }

	1575

	1576 /// Returns the hexidecimal value of [char].

	1577 int _asHex(int char) {

	1578 if (char <= NUMBER_9) return char - NUMBER_0;

	1579 if (char <= LETTER_CAP_F) return 10 + char - LETTER_CAP_A;

	1580 return 10 + char - LETTER_A;

	1581 }

	1582 }

	1583

	1584 /// A record of the location of a potential simple key.

	1585 class _SimpleKey {

	1586 /// The index of the token that begins the simple key.

	1587 ///

	1588 /// This is the index relative to all tokens emitted, rather than relative to

	1589 /// [_tokens].

	1590 final int tokenNumber;

	1591

	1592 /// The source location of the beginning of the simple key.

	1593 ///

	1594 /// This is used for error reporting and for determining when a simple key is

	1595 /// no longer on the current line.

	1596 final SourceLocation location;

	1597

	1598 /// Whether this key must exist for the document to be scanned.

	1599 final bool required;

	1600

	1601 _SimpleKey(this.tokenNumber, this.location, {bool required})

	1602 : required = required;

	1603 }

	1604

	1605 /// An enum of chomping indicators that describe how to handle trailing

	1606 /// whitespace for a block scalar.

	1607 ///

	1608 /// See http://yaml.org/spec/1.2/spec.html#id2794534.

	1609 class _Chomping {

	1610 /// All trailing whitespace is discarded.

	1611 static const STRIP = const _Chomping("STRIP");

	1612

	1613 /// A single trailing newline is retained.

	1614 static const CLIP = const _Chomping("CLIP");

	1615

	1616 /// All trailing whitespace is preserved.

	1617 static const KEEP = const _Chomping("KEEP");

	1618

	1619 final String name;

	1620

	1621 const _Chomping(this.name);

	1622

	1623 String toString() => name;

	1624 }

OLD	NEW

« pkg/yaml/lib/src/parser.dart ('K') | « pkg/yaml/lib/src/parser.dart ('k') | pkg/yaml/lib/src/style.dart » ('j') | pkg/yaml/lib/src/token.dart » ('J')