Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | |
| 2 // for details. All rights reserved. Use of this source code is governed by a | |
| 3 // BSD-style license that can be found in the LICENSE file. | |
| 4 | |
| 5 library yaml.scanner; | |
| 6 | |
| 7 import 'package:collection/collection.dart'; | |
| 8 import 'package:string_scanner/string_scanner.dart'; | |
| 9 import 'package:source_span/source_span.dart'; | |
| 10 | |
| 11 import 'style.dart'; | |
| 12 import 'token.dart'; | |
| 13 import 'utils.dart'; | |
| 14 import 'yaml_exception.dart'; | |
| 15 | |
| 16 /// A scanner that reads a string of Unicode characters and emits [Token]s. | |
| 17 /// | |
| 18 /// This is based on the libyaml scanner, available at | |
| 19 /// https://github.com/yaml/libyaml/blob/master/src/scanner.c. The license for | |
| 20 /// that is available in ../../libyaml-license.txt. | |
| 21 class Scanner { | |
| 22 static const TAB = 0x9; | |
| 23 static const LF = 0xA; | |
| 24 static const CR = 0xD; | |
| 25 static const SP = 0x20; | |
| 26 static const TILDE = 0x7E; | |
|
Bob Nystrom
2014/10/31 20:03:29
Move to after GRAVE_ACCENT?
nweiz
2014/11/04 22:19:37
Done.
| |
| 27 static const NEL = 0x85; | |
|
Bob Nystrom
2014/10/31 20:03:29
This one's a bit uncommon. How about moving either
nweiz
2014/11/04 22:19:38
Done.
| |
| 28 static const DOLLAR = 0x24; | |
| 29 static const LEFT_PAREN = 0x28; | |
| 30 static const RIGHT_PAREN = 0x29; | |
| 31 static const PLUS = 0x2B; | |
| 32 static const COMMA = 0x2C; | |
| 33 static const HYPHEN = 0x2D; | |
| 34 static const PERIOD = 0x2E; | |
| 35 static const QUESTION = 0x3F; | |
| 36 static const COLON = 0x3A; | |
| 37 static const SEMICOLON = 0x3B; | |
| 38 static const EQUALS = 0x3D; | |
| 39 static const LEFT_SQUARE = 0x5B; | |
| 40 static const RIGHT_SQUARE = 0x5D; | |
| 41 static const LEFT_CURLY = 0x7B; | |
| 42 static const RIGHT_CURLY = 0x7D; | |
| 43 static const HASH = 0x23; | |
| 44 static const AMPERSAND = 0x26; | |
| 45 static const ASTERISK = 0x2A; | |
| 46 static const EXCLAMATION = 0x21; | |
| 47 static const VERTICAL_BAR = 0x7C; | |
| 48 static const LEFT_ANGLE = 0x3C; | |
| 49 static const RIGHT_ANGLE = 0x3E; | |
| 50 static const SINGLE_QUOTE = 0x27; | |
| 51 static const DOUBLE_QUOTE = 0x22; | |
| 52 static const PERCENT = 0x25; | |
| 53 static const AT = 0x40; | |
| 54 static const GRAVE_ACCENT = 0x60; | |
| 55 | |
| 56 static const NULL = 0x0; | |
| 57 static const BELL = 0x7; | |
| 58 static const BACKSPACE = 0x8; | |
| 59 static const VERTICAL_TAB = 0xB; | |
| 60 static const FORM_FEED = 0xC; | |
| 61 static const ESCAPE = 0x1B; | |
| 62 static const SLASH = 0x2F; | |
| 63 static const BACKSLASH = 0x5C; | |
| 64 static const UNDERSCORE = 0x5F; | |
| 65 static const NBSP = 0xA0; | |
| 66 static const LINE_SEPARATOR = 0x2028; | |
| 67 static const PARAGRAPH_SEPARATOR = 0x2029; | |
| 68 static const BOM = 0xFEFF; | |
| 69 | |
| 70 static const NUMBER_0 = 0x30; | |
| 71 static const NUMBER_9 = 0x39; | |
| 72 | |
| 73 static const LETTER_A = 0x61; | |
| 74 static const LETTER_B = 0x62; | |
| 75 static const LETTER_E = 0x65; | |
| 76 static const LETTER_F = 0x66; | |
| 77 static const LETTER_N = 0x6E; | |
| 78 static const LETTER_R = 0x72; | |
| 79 static const LETTER_T = 0x74; | |
| 80 static const LETTER_U = 0x75; | |
| 81 static const LETTER_V = 0x76; | |
| 82 static const LETTER_X = 0x78; | |
| 83 static const LETTER_Z = 0x7A; | |
| 84 | |
| 85 static const LETTER_CAP_A = 0x41; | |
| 86 static const LETTER_CAP_F = 0x46; | |
| 87 static const LETTER_CAP_L = 0x4C; | |
| 88 static const LETTER_CAP_N = 0x4E; | |
| 89 static const LETTER_CAP_P = 0x50; | |
| 90 static const LETTER_CAP_U = 0x55; | |
| 91 static const LETTER_CAP_X = 0x58; | |
| 92 static const LETTER_CAP_Z = 0x5A; | |
| 93 | |
| 94 /// The underlying [SpanScanner] used to read characters from the source text. | |
| 95 /// | |
| 96 /// This is also used to track line and column information and to generate | |
| 97 /// [SourceSpan]s. | |
| 98 final SpanScanner _scanner; | |
| 99 | |
| 100 /// Whether this scanner has produced a [TokenType.STREAM_START] token | |
| 101 /// indicating the beginning of the YAML stream. | |
| 102 var _streamStartProduced = false; | |
| 103 | |
| 104 /// Whether this scanner has produced a [TokenType.STREAM_END] token | |
| 105 /// indicating the end of the YAML stream. | |
| 106 var _streamEndProduced = false; | |
| 107 | |
| 108 /// How many levels deep the scanner is in flow nesting. | |
| 109 var _flowLevel = 0; | |
|
Bob Nystrom
2014/10/31 20:03:28
Can this be inferred from _simpleKeys.length?
nweiz
2014/11/04 22:19:37
Yes, good idea.
| |
| 110 | |
| 111 /// The queue of tokens yet to be emitted. | |
| 112 /// | |
| 113 /// These are queued up in advance so that [TokenType.KEY] tokens can be | |
| 114 /// inserted once the scanner determines that a series of tokens represents a | |
| 115 /// mapping key. | |
| 116 final _tokens = new QueueList<Token>(); | |
| 117 | |
| 118 /// The number of tokens that have been emitted. | |
| 119 /// | |
| 120 /// This doesn't count tokens in [tokens]. | |
| 121 var _tokensParsed = 0; | |
|
Bob Nystrom
2014/10/31 20:03:28
"Parsed" -> "Scanned"?
nweiz
2014/11/04 22:19:37
Done.
| |
| 122 | |
| 123 /// Whether the next token in [_tokens] is ready to be returned. | |
| 124 /// | |
| 125 /// It might not be ready if there may still be a [TokenType.KEY] inserted | |
| 126 /// before it. | |
| 127 var _tokenAvailable = false; | |
| 128 | |
| 129 /// The stack of indent levels for the current nested block contexts. | |
| 130 final _indents = new List<int>(); | |
|
Bob Nystrom
2014/10/31 20:03:29
<int>[]
nweiz
2014/11/04 22:19:37
Done.
| |
| 131 | |
| 132 /// The current indent level. | |
| 133 var _indent = -1; | |
|
Bob Nystrom
2014/10/31 20:03:27
Document what -1 means (or make a constant).
Does
nweiz
2014/11/04 22:19:38
Done.
| |
| 134 | |
| 135 /// Whether a simple key is allowed in this context. | |
| 136 /// | |
| 137 /// A simple key refers to any mapping key that doesn't have an explicit "?". | |
| 138 var _simpleKeyAllowed = true; | |
| 139 | |
| 140 /// The stack of potential simple keys for each level of flow nesting. | |
| 141 /// | |
| 142 /// Entries in this list may be `null`, indicating that there is no valid | |
| 143 /// simple key for the associated level of nesting. | |
| 144 /// | |
| 145 /// When a ":" is parsed and there's a simple key available, a [TokenType.KEY] | |
| 146 /// token is inserted in [_tokens] before that key's token. This allows the | |
| 147 /// parser to tell that the key is intended to be a mapping key. | |
| 148 final _simpleKeys = <_SimpleKey>[null]; | |
|
Bob Nystrom
2014/10/31 20:03:28
Why isn't this initially empty?
nweiz
2014/11/04 22:19:37
Because there is an initial flow level that could
| |
| 149 | |
| 150 /// Whether the scanner's currently positioned in a block-level structure (as | |
| 151 /// opposed to flow-level). | |
| 152 bool get _inBlockContext => _flowLevel == 0; | |
| 153 | |
| 154 /// Whether the current character is a line break or the end of the source. | |
| 155 bool get _isBreakOrEnd => _scanner.isDone || _isBreak; | |
| 156 | |
| 157 /// Whether the current character is a line break. | |
| 158 bool get _isBreak => _isBreakAt(0); | |
| 159 | |
| 160 /// Whether the current character is whitespace or the end of the source. | |
| 161 bool get _isBlankOrEnd => _isBlankOrEndAt(0); | |
| 162 | |
| 163 /// Whether the current character is whitespace. | |
| 164 bool get _isBlank => _isBlankAt(0); | |
| 165 | |
| 166 /// Whether the current character is a valid tag name character. | |
| 167 /// | |
| 168 /// See http://yaml.org/spec/1.2/spec.html#ns-tag-name. | |
| 169 bool get _isTagChar { | |
| 170 var char = _scanner.peekChar(); | |
| 171 if (char == null) return false; | |
| 172 return (char >= NUMBER_0 && char <= NUMBER_9) || | |
| 173 (char >= LETTER_A && char <= LETTER_Z) || | |
| 174 (char >= LETTER_CAP_A && char <= LETTER_CAP_Z) || | |
| 175 char == HYPHEN || char == SEMICOLON || char == SLASH || | |
| 176 char == COLON || char == AT || char == AMPERSAND || | |
| 177 char == EQUALS || char == PLUS || char == DOLLAR || | |
| 178 char == PERIOD || char == TILDE || char == QUESTION || | |
| 179 char == ASTERISK || char == SINGLE_QUOTE || char == LEFT_PAREN || | |
| 180 char == RIGHT_PAREN || char == PERCENT; | |
|
Bob Nystrom
2014/10/31 20:03:28
It may be quicker to look this up in a map or even
nweiz
2014/11/04 22:19:37
Done.
| |
| 181 } | |
| 182 | |
| 183 /// Whether the current character is a valid anchor name character. | |
| 184 /// | |
| 185 /// See http://yaml.org/spec/1.2/spec.html#ns-anchor-name. | |
| 186 bool get _isAnchorChar { | |
| 187 if (!_isNonSpace) return false; | |
| 188 | |
| 189 var char = _scanner.peekChar(); | |
| 190 return char != COMMA && char != LEFT_SQUARE && char != RIGHT_SQUARE && | |
| 191 char != LEFT_CURLY && char != RIGHT_CURLY; | |
| 192 } | |
| 193 | |
| 194 /// Whether the character at the current position is a decimal digit. | |
| 195 bool get _isDigit { | |
| 196 var char = _scanner.peekChar(); | |
| 197 return char != null && (char >= NUMBER_0 && char <= NUMBER_9); | |
| 198 } | |
| 199 | |
| 200 /// Whether the character at the current position is a hexidecimal | |
| 201 /// digit. | |
| 202 bool get _isHex { | |
| 203 var char = _scanner.peekChar(); | |
| 204 return char != null && | |
| 205 ((char >= NUMBER_0 && char <= NUMBER_9) || | |
| 206 (char >= LETTER_A && char <= LETTER_F) || | |
| 207 (char >= LETTER_CAP_A && char <= LETTER_CAP_F)); | |
| 208 } | |
| 209 | |
| 210 /// Whether the character at the current position is a plain character. | |
| 211 /// | |
| 212 /// See http://yaml.org/spec/1.2/spec.html#ns-plain-char(c). | |
| 213 bool get _isPlainChar => _isPlainCharAt(0); | |
| 214 | |
| 215 /// Whether the character at the current position is a printable character | |
| 216 /// other than a line break or byte-order mark. | |
| 217 /// | |
| 218 /// See http://yaml.org/spec/1.2/spec.html#nb-char. | |
| 219 bool get _isNonBreak { | |
| 220 var char = _scanner.peekChar(); | |
| 221 switch (char) { | |
| 222 case LF: | |
| 223 case CR: | |
| 224 case BOM: | |
| 225 return false; | |
| 226 case TAB: | |
| 227 case NEL: | |
| 228 return true; | |
| 229 default: | |
| 230 return char != null && | |
| 231 ((char >= 0x00020 && char <= 0x00007E) || | |
|
Bob Nystrom
2014/10/31 20:03:28
Nit: +2 more before "(".
nweiz
2014/11/04 22:19:37
Done.
| |
| 232 (char >= 0x000A0 && char <= 0x00D7FF) || | |
| 233 (char >= 0x0E000 && char <= 0x00FFFD) || | |
| 234 (char >= 0x10000 && char <= 0x10FFFF)); | |
| 235 } | |
| 236 } | |
| 237 | |
| 238 /// Whether the character at the current position is a printable character | |
| 239 /// other than whitespace. | |
| 240 /// | |
| 241 /// See http://yaml.org/spec/1.2/spec.html#nb-char. | |
| 242 bool get _isNonSpace { | |
| 243 var char = _scanner.peekChar(); | |
| 244 return char != null && char != LF && char != CR && char != BOM && char != SP && | |
|
Bob Nystrom
2014/10/31 20:03:27
Long line.
nweiz
2014/11/04 22:19:37
Done.
| |
| 245 char != SP && | |
| 246 (char == NEL || | |
| 247 (char >= 0x00020 && char <= 0x00007E) || | |
| 248 (char >= 0x000A0 && char <= 0x00D7FF) || | |
| 249 (char >= 0x0E000 && char <= 0x00FFFD) || | |
| 250 (char >= 0x10000 && char <= 0x10FFFF)); | |
|
Bob Nystrom
2014/10/31 20:03:28
This expression is pretty huge. How about using a
nweiz
2014/11/04 22:19:36
Done.
| |
| 251 } | |
| 252 | |
| 253 /// Creates a scanner that scans [source]. | |
| 254 /// | |
| 255 /// [sourceUrl] can be a String or a [Uri]. | |
| 256 Scanner(String source, {sourceUrl}) | |
| 257 : _scanner = new SpanScanner(source, sourceUrl: sourceUrl); | |
| 258 | |
| 259 /// Consumes and returns the next token. | |
| 260 Token scan() { | |
| 261 if (_streamEndProduced) throw new StateError("Out of tokens."); | |
| 262 if (!_tokenAvailable) _fetchMoreTokens(); | |
| 263 | |
| 264 var token = _tokens.removeFirst(); | |
| 265 _tokenAvailable = false; | |
| 266 _tokensParsed++; | |
| 267 _streamEndProduced = token is Token && | |
| 268 token.type == TokenType.STREAM_END; | |
| 269 return token; | |
| 270 } | |
| 271 | |
| 272 /// Returns the next token without consuming it. | |
| 273 Token peek() { | |
| 274 if (_streamEndProduced) return null; | |
| 275 if (!_tokenAvailable) _fetchMoreTokens(); | |
| 276 return _tokens.first; | |
| 277 } | |
| 278 | |
| 279 /// Ensures that [_tokens] contains at least one token which can be returned. | |
| 280 void _fetchMoreTokens() { | |
| 281 while (true) { | |
| 282 if (_tokens.isNotEmpty) { | |
| 283 _staleSimpleKeys(); | |
| 284 if (!_simpleKeys.any((key) => | |
|
Bob Nystrom
2014/10/31 20:03:28
Document this.
nweiz
2014/11/04 22:19:38
Done.
| |
| 285 key != null && key.tokenNumber == _tokensParsed)) { | |
| 286 break; | |
| 287 } | |
| 288 } | |
| 289 | |
| 290 _fetchNextToken(); | |
| 291 } | |
| 292 _tokenAvailable = true; | |
| 293 } | |
| 294 | |
| 295 /// The dispatcher for token fetchers. | |
| 296 void _fetchNextToken() { | |
| 297 if (!_streamStartProduced) { | |
| 298 _fetchStreamStart(); | |
| 299 return; | |
| 300 } | |
| 301 | |
| 302 _scanToNextToken(); | |
| 303 _staleSimpleKeys(); | |
| 304 _unrollIndent(_scanner.column); | |
| 305 | |
| 306 if (_scanner.isDone) { | |
| 307 _fetchStreamEnd(); | |
| 308 return; | |
| 309 } | |
| 310 | |
| 311 if (_scanner.column == 0) { | |
| 312 if (_scanner.peekChar() == PERCENT) { | |
| 313 _fetchDirective(); | |
| 314 return; | |
| 315 } else if (_isBlankOrEndAt(3)) { | |
|
Bob Nystrom
2014/10/31 20:03:28
Ditch the else.
nweiz
2014/11/04 22:19:37
Done.
| |
| 316 if (_scanner.matches('---')) { | |
| 317 _fetchDocumentIndicator(TokenType.DOCUMENT_START); | |
| 318 return; | |
| 319 } else if (_scanner.matches('...')) { | |
|
Bob Nystrom
2014/10/31 20:03:28
Here too.
nweiz
2014/11/04 22:19:36
Done.
| |
| 320 _fetchDocumentIndicator(TokenType.DOCUMENT_END); | |
| 321 return; | |
| 322 } | |
| 323 } | |
| 324 } | |
| 325 | |
| 326 switch (_scanner.peekChar()) { | |
| 327 case LEFT_SQUARE: | |
| 328 _fetchFlowCollectionStart(TokenType.FLOW_SEQUENCE_START); | |
| 329 return; | |
|
Bob Nystrom
2014/10/31 20:03:29
Is there a reason to prefer return over break thro
nweiz
2014/11/04 22:19:37
It allows the reader to avoid checking the end of
| |
| 330 case LEFT_CURLY: | |
| 331 _fetchFlowCollectionStart(TokenType.FLOW_MAPPING_START); | |
| 332 return; | |
| 333 case RIGHT_SQUARE: | |
| 334 _fetchFlowCollectionEnd(TokenType.FLOW_SEQUENCE_END); | |
| 335 return; | |
| 336 case RIGHT_CURLY: | |
| 337 _fetchFlowCollectionEnd(TokenType.FLOW_MAPPING_END); | |
| 338 return; | |
| 339 case COMMA: | |
| 340 _fetchFlowEntry(); | |
| 341 return; | |
| 342 case ASTERISK: | |
| 343 _fetchAnchor(anchor: false); | |
| 344 return; | |
| 345 case AMPERSAND: | |
| 346 _fetchAnchor(anchor: true); | |
| 347 return; | |
| 348 case EXCLAMATION: | |
| 349 _fetchTag(); | |
| 350 return; | |
| 351 case SINGLE_QUOTE: | |
| 352 _fetchFlowScalar(singleQuote: true); | |
| 353 return; | |
| 354 case DOUBLE_QUOTE: | |
| 355 _fetchFlowScalar(singleQuote: false); | |
| 356 return; | |
| 357 case VERTICAL_BAR: | |
| 358 if (!_inBlockContext) _invalidScalarCharacter(); | |
| 359 _fetchBlockScalar(literal: true); | |
| 360 return; | |
| 361 case RIGHT_ANGLE: | |
| 362 if (!_inBlockContext) _invalidScalarCharacter(); | |
| 363 _fetchBlockScalar(literal: false); | |
| 364 return; | |
| 365 case PERCENT: | |
| 366 case AT: | |
| 367 case GRAVE_ACCENT: | |
| 368 _invalidScalarCharacter(); | |
| 369 return; | |
| 370 | |
| 371 // These characters may sometimes begin plain scalars. | |
| 372 case HYPHEN: | |
| 373 if (_isPlainCharAt(1)) { | |
| 374 _fetchPlainScalar(); | |
| 375 } else { | |
| 376 _fetchBlockEntry(); | |
| 377 } | |
| 378 return; | |
| 379 case QUESTION: | |
| 380 if (_isPlainCharAt(1)) { | |
| 381 _fetchPlainScalar(); | |
| 382 } else { | |
| 383 _fetchKey(); | |
| 384 } | |
| 385 return; | |
| 386 case COLON: | |
| 387 if (!_inBlockContext && _tokens.isNotEmpty) { | |
| 388 // If a colon follows a "JSON-like" value (an explicit map or list, or | |
| 389 // a quoted string) it isn't required to have whitespace after it | |
| 390 // since it unambiguously describes a map. | |
| 391 var token = _tokens.last; | |
| 392 if (token.type == TokenType.FLOW_SEQUENCE_END || | |
| 393 token.type == TokenType.FLOW_MAPPING_END || | |
| 394 (token.type == TokenType.SCALAR && token.style.isQuoted)) { | |
| 395 _fetchValue(); | |
| 396 return; | |
| 397 } | |
| 398 } | |
| 399 | |
| 400 if (_isPlainCharAt(1)) { | |
| 401 _fetchPlainScalar(); | |
| 402 } else { | |
| 403 _fetchValue(); | |
| 404 } | |
| 405 return; | |
| 406 default: | |
| 407 if (!_isNonBreak) _invalidScalarCharacter(); | |
| 408 | |
| 409 _fetchPlainScalar(); | |
| 410 return; | |
| 411 } | |
| 412 | |
| 413 throw 'Inaccessible'; | |
| 414 } | |
| 415 | |
| 416 /// Throws an error about a disallowed character. | |
| 417 void _invalidScalarCharacter() => | |
| 418 _scanner.error("Unexpected character.", length: 1); | |
| 419 | |
| 420 /// Checks the list of potential simple keys and remove the positions that | |
| 421 /// cannot contain simple keys anymore. | |
| 422 void _staleSimpleKeys() { | |
| 423 for (var i = 0; i < _simpleKeys.length; i++) { | |
| 424 var key = _simpleKeys[i]; | |
| 425 if (key == null) continue; | |
| 426 | |
| 427 // libyaml requires that all simple keys be a single line and no longer | |
| 428 // than 1024 characters. However, in section 7.4.2 of the spec | |
| 429 // (http://yaml.org/spec/1.2/spec.html#id2790832), these restriction is | |
|
Bob Nystrom
2014/10/31 20:03:27
"restrictions are"
nweiz
2014/11/04 22:19:38
Done.
| |
| 430 // only applied when the curly braces are omitted. It's difficult to | |
| 431 // retain enough context to know which keys need to have the restriction | |
| 432 // placed on them, so for now we go the other direction and allow | |
| 433 // everything but multiline simple keys in a block context. | |
| 434 if (!_inBlockContext) continue; | |
| 435 | |
| 436 if (key.location.line == _scanner.line) continue; | |
| 437 | |
| 438 if (key.required) { | |
| 439 throw new YamlException("Expected ':'.", _scanner.emptySpan); | |
| 440 } | |
| 441 | |
| 442 _simpleKeys[i] = null; | |
| 443 } | |
| 444 } | |
| 445 | |
| 446 /// Checks if a simple key may start at the current position and saves it if | |
| 447 /// so. | |
| 448 void _saveSimpleKey() { | |
| 449 // A simple key is required at the current position if the scanner is in the | |
| 450 // block context and the current column coincides with the indentation | |
| 451 // level. | |
| 452 var required = _inBlockContext && _indent == _scanner.column; | |
| 453 | |
| 454 // A simple key is required only when it is the first token in the current | |
| 455 // line. Therefore it is always allowed. But we add a check anyway. | |
| 456 assert(_simpleKeyAllowed || !required); | |
| 457 | |
| 458 if (!_simpleKeyAllowed) return; | |
| 459 | |
| 460 // If the current position may start a simple key, save it. | |
| 461 _removeSimpleKey(); | |
| 462 _simpleKeys[_simpleKeys.length - 1] = new _SimpleKey( | |
| 463 _tokensParsed + _tokens.length, | |
| 464 _scanner.location, | |
| 465 required: required); | |
| 466 } | |
| 467 | |
| 468 /// Removes a potential simple key at the current flow level. | |
| 469 void _removeSimpleKey() { | |
| 470 var key = _simpleKeys.last; | |
| 471 if (key != null && key.required) { | |
| 472 throw new YamlException("Could not find expected ':' for simple key.", | |
| 473 key.location.pointSpan()); | |
| 474 } | |
| 475 | |
| 476 _simpleKeys[_simpleKeys.length - 1] = null; | |
| 477 } | |
| 478 | |
| 479 /// Increases the flow level and resizes the simple key list. | |
| 480 void _increaseFlowLevel() { | |
| 481 _simpleKeys.add(null); | |
| 482 _flowLevel++; | |
| 483 } | |
| 484 | |
| 485 /// Decreases the flow level. | |
| 486 void _decreaseFlowLevel() { | |
| 487 if (_inBlockContext) return; | |
| 488 _simpleKeys.removeLast(); | |
| 489 _flowLevel--; | |
| 490 } | |
| 491 | |
| 492 /// Pushes the current indentation level to the stack and sets the new level i f | |
|
Bob Nystrom
2014/10/31 20:03:28
Long line.
nweiz
2014/11/04 22:19:36
Done.
| |
| 493 /// [column] is greater than [_indent]. | |
| 494 /// | |
| 495 /// In it is, appends or inserts the specified token into [_tokens]. If | |
|
Bob Nystrom
2014/10/31 20:03:27
"it is"?
nweiz
2014/11/04 22:19:37
Done.
| |
| 496 /// [tokenNumber] is provided, the corresponding token will be replaced; | |
| 497 /// otherwise, the token will be added at the end. | |
| 498 void _rollIndent(int column, TokenType type, SourceLocation location, | |
| 499 {int tokenNumber}) { | |
| 500 if (!_inBlockContext) return; | |
| 501 if (_indent != -1 && _indent >= column) return; | |
| 502 | |
| 503 // Push the current indentation level to the stack and set the new | |
| 504 // indentation level. | |
| 505 _indents.add(_indent); | |
| 506 _indent = column; | |
| 507 | |
| 508 // Create a token and insert it into the queue. | |
| 509 var token = new Token(type, location.pointSpan()); | |
| 510 if (tokenNumber == null) { | |
| 511 _tokens.add(token); | |
| 512 } else { | |
| 513 _tokens.insert(tokenNumber - _tokensParsed, token); | |
| 514 } | |
| 515 } | |
| 516 | |
| 517 /// Pops indentation levels from [_indents] until the current level becomes | |
| 518 /// less than or equal to [column]. | |
| 519 /// | |
| 520 /// For each indentation level, appends a [TokenType.BLOCK_END] token. | |
| 521 void _unrollIndent(int column) { | |
| 522 if (!_inBlockContext) return; | |
| 523 | |
| 524 while (_indent > column) { | |
| 525 _tokens.add(new Token(TokenType.BLOCK_END, _scanner.emptySpan)); | |
| 526 _indent = _indents.removeLast(); | |
| 527 } | |
| 528 } | |
| 529 | |
| 530 /// Pops indentation levels from [_indents] until the current level resets to | |
| 531 /// -1. | |
| 532 /// | |
| 533 /// For each indentation level, appends a [TokenType.BLOCK_END] token. | |
| 534 void _resetIndent() => _unrollIndent(-1); | |
| 535 | |
| 536 /// Produces a [TokenType.STREAM_START] token. | |
| 537 void _fetchStreamStart() { | |
| 538 // Much of libyaml's initialization logic here is done in variable | |
| 539 // initializers instead. | |
| 540 _streamStartProduced = true; | |
| 541 _tokens.add(new Token(TokenType.STREAM_START, _scanner.emptySpan)); | |
| 542 } | |
| 543 | |
| 544 /// Produces a [TokenType.STREAM_END] token. | |
| 545 void _fetchStreamEnd() { | |
| 546 _resetIndent(); | |
| 547 _removeSimpleKey(); | |
| 548 _simpleKeyAllowed = false; | |
| 549 _tokens.add(new Token(TokenType.STREAM_END, _scanner.emptySpan)); | |
| 550 } | |
| 551 | |
| 552 /// Produces a [TokenType.VERSION_DIRECTIVE] or [TokenType.TAG_DIRECTIVE] | |
| 553 /// token. | |
| 554 void _fetchDirective() { | |
| 555 _resetIndent(); | |
| 556 _removeSimpleKey(); | |
| 557 _simpleKeyAllowed = false; | |
| 558 var directive = _scanDirective(); | |
| 559 if (directive != null) _tokens.add(directive); | |
| 560 } | |
| 561 | |
| 562 /// Produces a [TokenType.DOCUMENT_START] or [TokenType.DOCUMENT_END] token. | |
| 563 void _fetchDocumentIndicator(TokenType type) { | |
| 564 _resetIndent(); | |
| 565 _removeSimpleKey(); | |
| 566 _simpleKeyAllowed = false; | |
|
Bob Nystrom
2014/10/31 20:03:28
Hoist these three lines into a _resetState() metho
nweiz
2014/11/04 22:19:36
I'd rather have the visual similarity with the met
| |
| 567 | |
| 568 // Consume the indicator token. | |
| 569 var start = _scanner.state; | |
| 570 _scanner.readChar(); | |
| 571 _scanner.readChar(); | |
| 572 _scanner.readChar(); | |
| 573 | |
| 574 _tokens.add(new Token(type, _scanner.spanFrom(start))); | |
| 575 } | |
| 576 | |
| 577 /// Produces a [TokenType.FLOW_SEQUENCE_START] or | |
| 578 /// [TokenType.FLOW_MAPPING_START] token. | |
| 579 void _fetchFlowCollectionStart(TokenType type) { | |
| 580 _saveSimpleKey(); | |
| 581 _increaseFlowLevel(); | |
| 582 _simpleKeyAllowed = true; | |
| 583 _addCharToken(type); | |
| 584 } | |
| 585 | |
| 586 /// Produces a [TokenType.FLOW_SEQUENCE_END] or [TokenType.FLOW_MAPPING_END] | |
| 587 /// token. | |
| 588 void _fetchFlowCollectionEnd(TokenType type) { | |
| 589 _removeSimpleKey(); | |
| 590 _decreaseFlowLevel(); | |
| 591 _simpleKeyAllowed = false; | |
| 592 _addCharToken(type); | |
| 593 } | |
| 594 | |
| 595 /// Produces a [TokenType.FLOW_ENTRY] token. | |
| 596 void _fetchFlowEntry() { | |
| 597 _removeSimpleKey(); | |
| 598 _simpleKeyAllowed = true; | |
| 599 _addCharToken(TokenType.FLOW_ENTRY); | |
| 600 } | |
| 601 | |
| 602 /// Produces a [TokenType.BLOCK_ENTRY] token. | |
| 603 void _fetchBlockEntry() { | |
| 604 if (_inBlockContext) { | |
| 605 if (!_simpleKeyAllowed) { | |
| 606 throw new YamlException( | |
| 607 "Block sequence entries are not allowed in this context.", | |
|
Bob Nystrom
2014/10/31 20:03:29
Would be good to describe the context instead of j
nweiz
2014/11/04 22:19:37
That's pretty tough... we'd have to track the reas
| |
| 608 _scanner.emptySpan); | |
| 609 } | |
| 610 | |
| 611 _rollIndent( | |
| 612 _scanner.column, | |
| 613 TokenType.BLOCK_SEQUENCE_START, | |
| 614 _scanner.emptySpan.start); | |
| 615 } else { | |
| 616 // It is an error for the '-' indicator to occur in the flow context, but | |
| 617 // we let the Parser detect and report it because it's able to point to | |
| 618 // the context. | |
| 619 } | |
| 620 | |
| 621 _removeSimpleKey(); | |
| 622 _simpleKeyAllowed = true; | |
| 623 _addCharToken(TokenType.BLOCK_ENTRY); | |
| 624 } | |
| 625 | |
| 626 /// Produces the [TokenType.KEY] token. | |
| 627 void _fetchKey() { | |
| 628 if (_inBlockContext) { | |
| 629 if (!_simpleKeyAllowed) { | |
| 630 throw new YamlException("Mapping keys are not allowed in this context.", | |
|
Bob Nystrom
2014/10/31 20:03:28
Ditto.
| |
| 631 _scanner.emptySpan); | |
| 632 } | |
| 633 | |
| 634 _rollIndent( | |
| 635 _scanner.column, | |
| 636 TokenType.BLOCK_MAPPING_START, | |
| 637 _scanner.emptySpan.start); | |
| 638 } | |
| 639 | |
| 640 // Simple keys are allowed after `?` in a block context. | |
| 641 _simpleKeyAllowed = _inBlockContext; | |
| 642 _addCharToken(TokenType.KEY); | |
| 643 } | |
| 644 | |
| 645 /// Produces the [TokenType.VALUE] token. | |
| 646 void _fetchValue() { | |
| 647 var simpleKey = _simpleKeys.last; | |
| 648 if (simpleKey != null) { | |
| 649 // Add a [TokenType.KEY] directive before the first token of the simple | |
| 650 // key so the parser knows that it's part of a key/value pair. | |
| 651 _tokens.insert(simpleKey.tokenNumber - _tokensParsed, | |
| 652 new Token(TokenType.KEY, simpleKey.location.pointSpan())); | |
| 653 | |
| 654 // In the block context, we may need to add the | |
| 655 // [TokenType.BLOCK_MAPPING_START] token. | |
| 656 _rollIndent( | |
| 657 simpleKey.location.column, | |
| 658 TokenType.BLOCK_MAPPING_START, | |
| 659 simpleKey.location, | |
| 660 tokenNumber: simpleKey.tokenNumber); | |
| 661 | |
| 662 // Remove the simple key. | |
| 663 _simpleKeys[_simpleKeys.length - 1] = null; | |
| 664 | |
| 665 // A simple key cannot follow another simple key. | |
| 666 _simpleKeyAllowed = false; | |
| 667 } else if (_inBlockContext) { | |
| 668 // If we're here, we've found the ':' indicator following a complex key. | |
| 669 | |
| 670 if (!_simpleKeyAllowed) { | |
| 671 throw new YamlException( | |
| 672 "Mapping values are not allowed in this context.", | |
| 673 _scanner.emptySpan); | |
| 674 } | |
| 675 | |
| 676 _rollIndent( | |
| 677 _scanner.column, | |
| 678 TokenType.BLOCK_MAPPING_START, | |
| 679 _scanner.location); | |
| 680 _simpleKeyAllowed = true; | |
| 681 } else if (_simpleKeyAllowed) { | |
| 682 // If we're here, we've found the ':' indicator with an empty key. This | |
| 683 // behavior differs from libyaml, which disallows empty implicit keys. | |
| 684 _simpleKeyAllowed = false; | |
| 685 _addCharToken(TokenType.KEY); | |
| 686 } | |
| 687 | |
| 688 _addCharToken(TokenType.VALUE); | |
| 689 } | |
| 690 | |
| 691 /// Adds a token with [type] to [_tokens]. | |
| 692 /// | |
| 693 /// The span of the new token is the current character. | |
| 694 void _addCharToken(TokenType type) { | |
| 695 var start = _scanner.state; | |
| 696 _scanner.readChar(); | |
| 697 _tokens.add(new Token(type, _scanner.spanFrom(start))); | |
| 698 } | |
| 699 | |
| 700 /// Produces a [TokenType.ALIAS] or [TokenType.ANCHOR] token. | |
| 701 void _fetchAnchor({bool anchor: true}) { | |
| 702 _saveSimpleKey(); | |
| 703 _simpleKeyAllowed = false; | |
| 704 _tokens.add(_scanAnchor(anchor: anchor)); | |
| 705 } | |
| 706 | |
| 707 /// Produces a [TokenType.TAG] token. | |
| 708 void _fetchTag() { | |
| 709 _saveSimpleKey(); | |
| 710 _simpleKeyAllowed = false; | |
| 711 _tokens.add(_scanTag()); | |
| 712 } | |
| 713 | |
| 714 /// Produces a [TokenType.SCALAR] token with style [ScalarStyle.LITERAL] or | |
| 715 /// [ScalarStyle.FOLDED]. | |
| 716 void _fetchBlockScalar({bool literal: false}) { | |
| 717 _removeSimpleKey(); | |
| 718 _simpleKeyAllowed = true; | |
| 719 _tokens.add(_scanBlockScalar(literal: literal)); | |
| 720 } | |
| 721 | |
| 722 /// Produces a [TokenType.SCALAR] token with style [ScalarStyle.SINGLE_QUOTED] | |
| 723 /// or [ScalarStyle.DOUBLE_QUOTED]. | |
| 724 void _fetchFlowScalar({bool singleQuote: false}) { | |
| 725 _saveSimpleKey(); | |
| 726 _simpleKeyAllowed = false; | |
| 727 _tokens.add(_scanFlowScalar(singleQuote: singleQuote)); | |
| 728 } | |
| 729 | |
| 730 /// Produces a [TokenType.SCALAR] token with style [ScalarStyle.PLAIN]. | |
| 731 void _fetchPlainScalar() { | |
| 732 _saveSimpleKey(); | |
| 733 _simpleKeyAllowed = false; | |
| 734 _tokens.add(_scanPlainScalar()); | |
| 735 } | |
| 736 | |
| 737 /// Eats whitespace and comments until the next token is found. | |
| 738 void _scanToNextToken() { | |
| 739 var afterLineBreak = false; | |
| 740 while (true) { | |
| 741 // Allow the BOM to start a line. | |
| 742 if (_scanner.column == 0) _scanner.scan("\uFEFF"); | |
| 743 | |
| 744 // Eat whitespace. | |
| 745 // | |
| 746 // libyaml disallows tabs after "-", "?", or ":", but the spec allows | |
| 747 // them. See section 6.2: http://yaml.org/spec/1.2/spec.html#id2778241. | |
| 748 while (_scanner.peekChar() == SP || | |
| 749 ((!_inBlockContext || !afterLineBreak) && | |
| 750 _scanner.peekChar() == TAB)) { | |
| 751 _scanner.readChar(); | |
| 752 } | |
| 753 | |
| 754 if (_scanner.peekChar() == TAB) { | |
| 755 _scanner.error("Tab characters are not allowed as indentation.", | |
| 756 length: 1); | |
| 757 } | |
| 758 | |
| 759 // Eat a comment until a line break. | |
| 760 if (_scanner.peekChar() == HASH) { | |
| 761 while (!_isBreakOrEnd) { | |
| 762 _scanner.readChar(); | |
| 763 } | |
| 764 } | |
| 765 | |
| 766 // If we're at a line break, eat it. | |
| 767 if (_isBreak) { | |
| 768 _skipLine(); | |
| 769 | |
| 770 // In the block context, a new line may start a simple key. | |
| 771 if (_inBlockContext) _simpleKeyAllowed = true; | |
| 772 afterLineBreak = true; | |
| 773 } else { | |
| 774 // Otherwise we've found a token. | |
| 775 break; | |
| 776 } | |
| 777 } | |
| 778 } | |
| 779 | |
| 780 /// Scans a [TokenType.YAML_DIRECTIVE] or [TokenType.TAG_DIRECTIVE] token. | |
| 781 /// | |
| 782 /// %YAML 1.2 # a comment \n | |
| 783 /// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| 784 /// %TAG !yaml! tag:yaml.org,2002: \n | |
| 785 /// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| 786 Token _scanDirective() { | |
| 787 var start = _scanner.state; | |
| 788 | |
| 789 // Eat '%'. | |
| 790 _scanner.readChar(); | |
| 791 | |
| 792 var token; | |
| 793 var name = _scanDirectiveName(); | |
| 794 if (name == "YAML") { | |
| 795 token = _scanVersionDirectiveValue(start); | |
| 796 } else if (name == "TAG") { | |
| 797 token = _scanTagDirectiveValue(start); | |
| 798 } else { | |
| 799 warn("Warning: unknown directive.", _scanner.spanFrom(start)); | |
|
Bob Nystrom
2014/10/31 20:03:27
I don't think the parser should output directly to
nweiz
2014/11/04 22:19:37
Done. I wish there were a more standard way to do
| |
| 800 | |
| 801 // libyaml doesn't support unknown directives, but the spec says to ignore | |
| 802 // them and warn: http://yaml.org/spec/1.2/spec.html#id2781147. | |
| 803 while (!_isBreakOrEnd) { | |
| 804 _scanner.readChar(); | |
| 805 } | |
| 806 | |
| 807 return null; | |
| 808 } | |
| 809 | |
| 810 // Eat the rest of the line, including any comments. | |
| 811 while (_isBlank) { | |
| 812 _scanner.readChar(); | |
| 813 } | |
|
Bob Nystrom
2014/10/31 20:03:28
Make a _skipBlanks() method for this since you do
nweiz
2014/11/04 22:19:37
Done.
| |
| 814 | |
| 815 if (_scanner.peekChar() == HASH) { | |
| 816 while (!_isBreakOrEnd) { | |
| 817 _scanner.readChar(); | |
| 818 } | |
| 819 } | |
|
Bob Nystrom
2014/10/31 20:03:28
Probably this too.
nweiz
2014/11/04 22:19:36
Done.
| |
| 820 | |
| 821 if (!_isBreakOrEnd) { | |
| 822 throw new YamlException( | |
| 823 "Expected comment or line break after directive.", | |
| 824 _scanner.spanFrom(start)); | |
| 825 } | |
| 826 | |
| 827 if (_isBreak) _skipLine(); | |
|
Bob Nystrom
2014/10/31 20:03:27
Do you need to check _isBreak here? Doesn't _skipL
nweiz
2014/11/04 22:19:36
Done.
| |
| 828 return token; | |
| 829 } | |
| 830 | |
| 831 /// Scans a directive name. | |
| 832 /// | |
| 833 /// %YAML 1.2 # a comment \n | |
| 834 /// ^^^^ | |
| 835 /// %TAG !yaml! tag:yaml.org,2002: \n | |
| 836 /// ^^^ | |
| 837 String _scanDirectiveName() { | |
| 838 var buffer = new StringBuffer(); | |
| 839 // libyaml only allows word characters in directive names, but the spec | |
| 840 // disagrees: http://yaml.org/spec/1.2/spec.html#ns-directive-name. | |
| 841 while (_isNonSpace) { | |
| 842 buffer.writeCharCode(_scanner.readChar()); | |
|
Bob Nystrom
2014/10/31 20:03:29
This seems inefficient. Can you just get a substri
nweiz
2014/11/04 22:19:36
Done.
| |
| 843 } | |
| 844 | |
| 845 var name = buffer.toString(); | |
| 846 if (name.isEmpty) { | |
| 847 throw new YamlException("Expected directive name.", _scanner.emptySpan); | |
| 848 } else if (!_isBlankOrEnd) { | |
|
Bob Nystrom
2014/10/31 20:03:28
What about:
%YAML#Comment.
I'd expect this to be
nweiz
2014/11/04 22:19:36
I don't think that's a likely enough error to warr
| |
| 849 throw new YamlException( | |
| 850 "Unexpected character in directive name.", _scanner.emptySpan); | |
| 851 } | |
| 852 | |
| 853 return name; | |
| 854 } | |
| 855 | |
| 856 /// Scans the value of a version directive. | |
| 857 /// | |
| 858 /// %YAML 1.2 # a comment \n | |
| 859 /// ^^^^^^ | |
| 860 Token _scanVersionDirectiveValue(LineScannerState start) { | |
| 861 while (_isBlank) { | |
| 862 _scanner.readChar(); | |
| 863 } | |
| 864 | |
| 865 var major = _scanVersionDirectiveNumber(); | |
| 866 _scanner.expect('.'); | |
| 867 var minor = _scanVersionDirectiveNumber(); | |
| 868 | |
| 869 return new VersionDirectiveToken(_scanner.spanFrom(start), major, minor); | |
| 870 } | |
| 871 | |
| 872 /// Scans the version number of a version directive. | |
| 873 /// | |
| 874 /// %YAML 1.2 # a comment \n | |
| 875 /// ^ | |
| 876 /// %YAML 1.2 # a comment \n | |
| 877 /// ^ | |
| 878 int _scanVersionDirectiveNumber() { | |
| 879 var buffer = new StringBuffer(); | |
| 880 while (_isDigit) { | |
| 881 buffer.writeCharCode(_scanner.readChar()); | |
| 882 } | |
| 883 | |
| 884 var number = buffer.toString(); | |
| 885 if (number.isEmpty) { | |
| 886 throw new YamlException("Expected version number.", _scanner.emptySpan); | |
| 887 } | |
| 888 | |
| 889 return int.parse(number); | |
| 890 } | |
| 891 | |
| 892 /// Scans the value of a tag directive. | |
| 893 /// | |
| 894 /// %TAG !yaml! tag:yaml.org,2002: \n | |
| 895 /// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
| 896 Token _scanTagDirectiveValue(LineScannerState start) { | |
| 897 while (_isBlank) { | |
| 898 _scanner.readChar(); | |
| 899 } | |
| 900 | |
| 901 var handle = _scanTagHandle(directive: true); | |
| 902 if (!_isBlank) { | |
| 903 throw new YamlException("Expected whitespace.", _scanner.emptySpan); | |
| 904 } | |
| 905 | |
| 906 while (_isBlank) { | |
| 907 _scanner.readChar(); | |
| 908 } | |
| 909 | |
| 910 var prefix = _scanTagUri(); | |
| 911 if (!_isBlankOrEnd) { | |
| 912 throw new YamlException("Expected whitespace.", _scanner.emptySpan); | |
| 913 } | |
| 914 | |
| 915 return new TagDirectiveToken(_scanner.spanFrom(start), handle, prefix); | |
| 916 } | |
| 917 | |
| 918 /// Scans a [TokenType.ANCHOR] token. | |
| 919 Token _scanAnchor({bool anchor: true}) { | |
| 920 var start = _scanner.state; | |
| 921 | |
| 922 // Eat the indicator character. | |
| 923 _scanner.readChar(); | |
| 924 | |
| 925 var buffer = new StringBuffer(); | |
| 926 // libyaml only allows word characters in anchor names, but the spec | |
| 927 // disagrees: http://yaml.org/spec/1.2/spec.html#ns-anchor-char. | |
| 928 while (_isAnchorChar) { | |
| 929 buffer.writeCharCode(_scanner.readChar()); | |
| 930 } | |
| 931 | |
| 932 var next = _scanner.peekChar(); | |
| 933 if (buffer.length == 0 || | |
| 934 (!_isBlankOrEnd && next != QUESTION && next != COLON && | |
| 935 next != COMMA && next != RIGHT_SQUARE && next != RIGHT_CURLY && | |
| 936 next != PERCENT && next != AT && next != GRAVE_ACCENT)) { | |
|
Bob Nystrom
2014/10/31 20:03:29
What are these specific character tests for?
nweiz
2014/11/04 22:19:37
They check whether the anchor is followed by some
| |
| 937 throw new YamlException("Expected alphanumeric character.", | |
| 938 _scanner.emptySpan); | |
| 939 } | |
| 940 | |
| 941 if (anchor) { | |
| 942 return new AnchorToken(_scanner.spanFrom(start), buffer.toString()); | |
| 943 } else { | |
| 944 return new AliasToken(_scanner.spanFrom(start), buffer.toString()); | |
| 945 } | |
| 946 } | |
| 947 | |
| 948 /// Scans a [TokenType.TAG] token. | |
| 949 Token _scanTag() { | |
| 950 var handle; | |
| 951 var suffix; | |
| 952 var start = _scanner.state; | |
| 953 | |
| 954 // Check if the tag is in the canonical form. | |
| 955 if (_scanner.peekChar(1) == LEFT_ANGLE) { | |
|
Bob Nystrom
2014/10/31 20:03:29
Does this fail on "!" (a bang by itself)?
nweiz
2014/11/04 22:19:36
No; [peekChar] returns null for out-of-range indic
| |
| 956 // Eat '!<'. | |
| 957 _scanner.readChar(); | |
| 958 _scanner.readChar(); | |
| 959 | |
| 960 handle = ''; | |
| 961 suffix = _scanTagUri(); | |
| 962 | |
| 963 _scanner.expect('>'); | |
| 964 } else { | |
| 965 // The tag has either the '!suffix' or the '!handle!suffix' form. | |
| 966 | |
| 967 // First, try to scan a handle. | |
| 968 handle = _scanTagHandle(); | |
| 969 | |
| 970 if (handle.length > 1 && handle.startsWith('!') && handle.endsWith('!')) { | |
| 971 suffix = _scanTagUri(flowSeparators: false); | |
| 972 } else { | |
| 973 suffix = _scanTagUri(head: handle, flowSeparators: false); | |
| 974 | |
| 975 // There was no explicit handle. | |
| 976 if (suffix.isEmpty) { | |
| 977 // This is the special '!' tag. | |
| 978 handle = null; | |
| 979 suffix = '!'; | |
| 980 } else { | |
| 981 handle = '!'; | |
| 982 } | |
| 983 } | |
| 984 } | |
| 985 | |
| 986 // libyaml insists on whitespace after a tag, but example 7.2 indicates | |
| 987 // that it's not required: http://yaml.org/spec/1.2/spec.html#id2786720. | |
| 988 | |
| 989 return new TagToken(_scanner.spanFrom(start), handle, suffix); | |
| 990 } | |
| 991 | |
| 992 /// Scans a tag handle. | |
| 993 String _scanTagHandle({bool directive: false}) { | |
| 994 _scanner.expect('!'); | |
| 995 | |
| 996 var buffer = new StringBuffer('!'); | |
| 997 | |
| 998 // libyaml only allows word characters in tags, but the spec disagrees: | |
| 999 // http://yaml.org/spec/1.2/spec.html#ns-tag-char. | |
| 1000 while (_isTagChar) { | |
| 1001 buffer.writeCharCode(_scanner.readChar()); | |
| 1002 } | |
| 1003 | |
| 1004 if (_scanner.peekChar() == EXCLAMATION) { | |
| 1005 buffer.writeCharCode(_scanner.readChar()); | |
| 1006 } else { | |
| 1007 // It's either the '!' tag or not really a tag handle. If it's a %TAG | |
| 1008 // directive, it's an error. If it's a tag token, it must be part of a | |
| 1009 // URI. | |
| 1010 if (directive && buffer.toString() != '!') _scanner.expect('!'); | |
| 1011 } | |
| 1012 | |
| 1013 return buffer.toString(); | |
| 1014 } | |
| 1015 | |
| 1016 /// Scans a tag URI. | |
| 1017 /// | |
| 1018 /// [head] is the initial portion of the tag that's already been scanned. | |
| 1019 /// [flowSeparators] indicates whether the tag URI can contain flow | |
| 1020 /// separators. | |
| 1021 String _scanTagUri({String head, bool flowSeparators: true}) { | |
| 1022 var length = head == null ? 0 : head.length; | |
| 1023 var buffer = new StringBuffer(); | |
| 1024 | |
| 1025 // Copy the head if needed. | |
| 1026 // | |
| 1027 // Note that we don't copy the leading '!' character. | |
| 1028 if (length > 1) buffer.write(head.substring(1)); | |
| 1029 | |
| 1030 // The set of characters that may appear in URI is as follows: | |
| 1031 // | |
| 1032 // '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&', | |
| 1033 // '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']', | |
| 1034 // '%'. | |
| 1035 // | |
| 1036 // In a shorthand tag annotation, the flow separators ',', '[', and ']' are | |
| 1037 // disallowed. | |
| 1038 var char = _scanner.peekChar(); | |
| 1039 while (_isTagChar || (flowSeparators && | |
| 1040 (char == COMMA || char == LEFT_SQUARE || char == RIGHT_SQUARE))) { | |
| 1041 buffer.writeCharCode(_scanner.readChar()); | |
| 1042 char = _scanner.peekChar(); | |
| 1043 } | |
| 1044 | |
| 1045 // libyaml manually decodes the URL, but we don't have to do that. | |
| 1046 return Uri.decodeFull(buffer.toString()); | |
| 1047 } | |
| 1048 | |
| 1049 /// Scans a block scalar. | |
| 1050 Token _scanBlockScalar({bool literal: false}) { | |
| 1051 var start = _scanner.state; | |
| 1052 | |
| 1053 // Eat the indicator '|' or '>'. | |
| 1054 _scanner.readChar(); | |
| 1055 | |
| 1056 // Check for a chomping indicator. | |
| 1057 var chomping = _Chomping.CLIP; | |
| 1058 var increment = 0; | |
| 1059 var char = _scanner.peekChar(); | |
| 1060 if (char == PLUS || char == HYPHEN) { | |
| 1061 chomping = char == PLUS ? _Chomping.KEEP : _Chomping.STRIP; | |
| 1062 _scanner.readChar(); | |
| 1063 | |
| 1064 // Check for an indentation indicator. | |
| 1065 if (_isDigit) { | |
| 1066 // Check that the indentation is greater than 0. | |
| 1067 if (_scanner.peekChar() == 0) { | |
|
Bob Nystrom
2014/10/31 20:03:29
NUMBER_0?
nweiz
2014/11/04 22:19:38
Done.
| |
| 1068 throw new YamlException( | |
| 1069 "0 may not be used as an indentation indicator.", | |
| 1070 _scanner.spanFrom(start)); | |
| 1071 } | |
| 1072 | |
| 1073 increment = _scanner.readChar() - NUMBER_0; | |
| 1074 } | |
| 1075 } else if (_isDigit) { | |
| 1076 // Do the same as above, but in the opposite order. | |
| 1077 if (_scanner.peekChar() == 0) { | |
|
Bob Nystrom
2014/10/31 20:03:29
Ditto.
nweiz
2014/11/04 22:19:36
Done.
| |
| 1078 throw new YamlException( | |
| 1079 "0 may not be used as an indentation indicator.", | |
| 1080 _scanner.spanFrom(start)); | |
| 1081 } | |
| 1082 | |
| 1083 increment = _scanner.readChar() - NUMBER_0; | |
| 1084 | |
| 1085 char = _scanner.peekChar(); | |
| 1086 if (char == PLUS || char == HYPHEN) { | |
| 1087 chomping = char == PLUS ? _Chomping.KEEP : _Chomping.STRIP; | |
| 1088 _scanner.readChar(); | |
| 1089 } | |
| 1090 } | |
| 1091 | |
| 1092 // Eat whitespace and comments to the end of the line. | |
| 1093 while (_isBlank) { | |
| 1094 _scanner.readChar(); | |
| 1095 } | |
| 1096 | |
| 1097 if (_scanner.peekChar() == HASH) { | |
| 1098 while (!_isBreakOrEnd) { | |
| 1099 _scanner.readChar(); | |
| 1100 } | |
| 1101 } | |
| 1102 | |
| 1103 // Check if we're at the end of the line. | |
| 1104 if (!_isBreakOrEnd) { | |
| 1105 throw new YamlException("Expected comment or line break.", | |
| 1106 _scanner.emptySpan); | |
| 1107 } | |
| 1108 | |
| 1109 if (_isBreak) _skipLine(); | |
| 1110 | |
| 1111 var indent = 0; | |
|
Bob Nystrom
2014/10/31 20:03:28
Document this little block.
nweiz
2014/11/04 22:19:37
Done.
| |
| 1112 if (increment != 0) { | |
| 1113 indent = _indent >= 0 ? _indent + increment : increment; | |
| 1114 } | |
| 1115 | |
| 1116 // Scan the leading line breaks to determine the indentation level if | |
| 1117 // needed. | |
| 1118 var pair = _scanBlockScalarBreaks(indent); | |
| 1119 indent = pair.first; | |
| 1120 var trailingBreaks = pair.last; | |
| 1121 | |
| 1122 // Scan the block scalar contents. | |
| 1123 var buffer = new StringBuffer(); | |
| 1124 var leadingBreak = ''; | |
| 1125 var leadingBlank = false; | |
| 1126 var trailingBlank = false; | |
| 1127 while (_scanner.column == indent && !_scanner.isDone) { | |
| 1128 // Check for a document indicator. libyaml doesn't do this, but the spec | |
| 1129 // mandates it. See example 9.5: | |
| 1130 // http://yaml.org/spec/1.2/spec.html#id2801606. | |
| 1131 if (_scanner.column == 0 && _isBlankOrEndAt(3) && | |
| 1132 (_scanner.matches('---') || _scanner.matches('...'))) { | |
| 1133 break; | |
| 1134 } | |
| 1135 | |
| 1136 // We are at the beginning of a non-empty line. | |
| 1137 | |
| 1138 // Is there trailing whitespace? | |
| 1139 trailingBlank = _isBlank; | |
| 1140 | |
| 1141 // Check if we need to fold the leading line break. | |
| 1142 if (!literal && leadingBreak.isNotEmpty && !leadingBlank && | |
| 1143 !trailingBlank) { | |
| 1144 // Do we need to join the lines with a space? | |
| 1145 if (trailingBreaks.isEmpty) buffer.writeCharCode(SP); | |
| 1146 leadingBreak = ''; | |
|
Bob Nystrom
2014/10/31 20:03:29
Move this after the if.
nweiz
2014/11/04 22:19:38
Done.
| |
| 1147 } else { | |
| 1148 buffer.write(leadingBreak); | |
| 1149 leadingBreak = ''; | |
| 1150 } | |
| 1151 | |
| 1152 // Append the remaining line breaks. | |
| 1153 buffer.write(trailingBreaks); | |
| 1154 | |
| 1155 // Is there leading whitespace? | |
| 1156 leadingBlank = _isBlank; | |
| 1157 | |
| 1158 while (!_isBreakOrEnd) { | |
| 1159 buffer.writeCharCode(_scanner.readChar()); | |
| 1160 } | |
| 1161 | |
| 1162 // libyaml always reads a line here, but this breaks on block scalars at | |
| 1163 // the end of the document that end without newlines. See example 8.1: | |
| 1164 // http://yaml.org/spec/1.2/spec.html#id2793888. | |
| 1165 if (!_scanner.isDone) leadingBreak = _readLine(); | |
| 1166 | |
| 1167 // Eat the following indentation and spaces. | |
| 1168 var pair = _scanBlockScalarBreaks(indent); | |
| 1169 indent = pair.first; | |
| 1170 trailingBreaks = pair.last; | |
| 1171 } | |
| 1172 | |
| 1173 // Chomp the tail. | |
| 1174 if (chomping != _Chomping.STRIP) { | |
|
Bob Nystrom
2014/10/31 20:03:28
Nit, but maybe make these single-line ifs?
nweiz
2014/11/04 22:19:37
Done.
| |
| 1175 buffer.write(leadingBreak); | |
| 1176 } | |
| 1177 if (chomping == _Chomping.KEEP) { | |
| 1178 buffer.write(trailingBreaks); | |
| 1179 } | |
| 1180 | |
| 1181 return new ScalarToken(_scanner.spanFrom(start), buffer.toString(), | |
| 1182 literal ? ScalarStyle.LITERAL : ScalarStyle.FOLDED); | |
| 1183 } | |
| 1184 | |
| 1185 /// Scans indentation spaces and line breaks for a block scalar. | |
| 1186 /// | |
| 1187 /// Determines the intendation level if needed. Returns the new indentation | |
| 1188 /// level and the text of the line breaks. | |
| 1189 Pair<int, String> _scanBlockScalarBreaks(int indent) { | |
| 1190 var maxIndent = 0; | |
| 1191 var breaks = new StringBuffer(); | |
| 1192 | |
| 1193 while (true) { | |
| 1194 while ((indent == 0 || _scanner.column < indent) && | |
| 1195 _scanner.peekChar() == SP) { | |
| 1196 _scanner.readChar(); | |
| 1197 } | |
| 1198 | |
| 1199 if (_scanner.column > maxIndent) maxIndent = _scanner.column; | |
| 1200 | |
| 1201 // libyaml throws an error here if a tab character is detected, but the | |
| 1202 // spec treats tabs like any other non-space character. See example 8.2: | |
| 1203 // http://yaml.org/spec/1.2/spec.html#id2794311. | |
| 1204 | |
| 1205 if (!_isBreak) break; | |
| 1206 breaks.write(_readLine()); | |
| 1207 } | |
| 1208 | |
| 1209 if (indent == 0) { | |
| 1210 indent = maxIndent; | |
| 1211 if (indent < _indent + 1) indent = _indent + 1; | |
| 1212 | |
| 1213 // libyaml forces indent to be at least 1 here, but that doesn't seem to | |
| 1214 // be supported by the spec. | |
| 1215 } | |
| 1216 | |
| 1217 return new Pair(indent, breaks.toString()); | |
| 1218 } | |
| 1219 | |
| 1220 // Scans a quoted scalar. | |
| 1221 Token _scanFlowScalar({bool singleQuote: false}) { | |
| 1222 var start = _scanner.state; | |
| 1223 var buffer = new StringBuffer(); | |
| 1224 | |
| 1225 // Eat the left quote. | |
| 1226 _scanner.readChar(); | |
| 1227 | |
| 1228 while (true) { | |
| 1229 // Check that there are no document indicators at the beginning of the | |
| 1230 // line. | |
| 1231 if (_scanner.column == 0 && _isBlankOrEndAt(3) && | |
| 1232 (_scanner.scan("---") || _scanner.scan("..."))) { | |
| 1233 _scanner.error("Unexpected document indicator."); | |
| 1234 } | |
|
Bob Nystrom
2014/10/31 20:03:28
Hoist this out into a function?
nweiz
2014/11/04 22:19:36
Done.
| |
| 1235 | |
| 1236 if (_scanner.isDone) { | |
| 1237 throw new YamlException("Unexpected end of file.", _scanner.emptySpan); | |
| 1238 } | |
| 1239 | |
| 1240 var leadingBlanks = false; | |
| 1241 while (!_isBlankOrEnd) { | |
| 1242 var char = _scanner.peekChar(); | |
| 1243 if (singleQuote && char == SINGLE_QUOTE && | |
| 1244 _scanner.peekChar(1) == SINGLE_QUOTE) { | |
| 1245 // An escaped single quote. | |
| 1246 _scanner.readChar(); | |
| 1247 _scanner.readChar(); | |
| 1248 buffer.writeCharCode(SINGLE_QUOTE); | |
| 1249 } else if (char == (singleQuote ? SINGLE_QUOTE : DOUBLE_QUOTE)) { | |
| 1250 // The closing quote. | |
| 1251 break; | |
| 1252 } else if (!singleQuote && char == BACKSLASH && _isBreakAt(1)) { | |
| 1253 // An escaped newline. | |
| 1254 _scanner.readChar(); | |
| 1255 _skipLine(); | |
| 1256 leadingBlanks = true; | |
| 1257 break; | |
| 1258 } else if (!singleQuote && char == BACKSLASH) { | |
| 1259 var escapeStart = _scanner.state; | |
| 1260 | |
| 1261 // An escape sequence. | |
| 1262 var codeLength = null; | |
| 1263 switch (_scanner.peekChar(1)) { | |
| 1264 case NUMBER_0: | |
| 1265 buffer.writeCharCode(NULL); | |
| 1266 break; | |
| 1267 case LETTER_A: | |
| 1268 buffer.writeCharCode(BELL); | |
| 1269 break; | |
| 1270 case LETTER_B: | |
| 1271 buffer.writeCharCode(BACKSPACE); | |
| 1272 break; | |
| 1273 case LETTER_T: | |
| 1274 case TAB: | |
|
Bob Nystrom
2014/10/31 20:03:29
Oh, YAML. You so crazy.
| |
| 1275 buffer.writeCharCode(TAB); | |
| 1276 break; | |
| 1277 case LETTER_N: | |
| 1278 buffer.writeCharCode(LF); | |
| 1279 break; | |
| 1280 case LETTER_V: | |
| 1281 buffer.writeCharCode(VERTICAL_TAB); | |
| 1282 break; | |
| 1283 case LETTER_F: | |
| 1284 buffer.writeCharCode(FORM_FEED); | |
| 1285 break; | |
| 1286 case LETTER_R: | |
| 1287 buffer.writeCharCode(CR); | |
| 1288 break; | |
| 1289 case LETTER_E: | |
| 1290 buffer.writeCharCode(ESCAPE); | |
| 1291 break; | |
| 1292 case SP: | |
| 1293 case DOUBLE_QUOTE: | |
| 1294 case SLASH: | |
| 1295 case BACKSLASH: | |
| 1296 // libyaml doesn't support an escaped forward slash, but it was | |
| 1297 // added in YAML 1.2. See section 5.7: | |
| 1298 // http://yaml.org/spec/1.2/spec.html#id2776092 | |
| 1299 buffer.writeCharCode(_scanner.peekChar(1)); | |
| 1300 break; | |
| 1301 case LETTER_CAP_N: | |
| 1302 buffer.writeCharCode(NEL); | |
| 1303 break; | |
| 1304 case UNDERSCORE: | |
| 1305 buffer.writeCharCode(NBSP); | |
| 1306 break; | |
| 1307 case LETTER_CAP_L: | |
| 1308 buffer.writeCharCode(LINE_SEPARATOR); | |
| 1309 break; | |
| 1310 case LETTER_CAP_P: | |
| 1311 buffer.writeCharCode(PARAGRAPH_SEPARATOR); | |
| 1312 break; | |
| 1313 case LETTER_X: | |
| 1314 codeLength = 2; | |
| 1315 break; | |
| 1316 case LETTER_U: | |
| 1317 codeLength = 4; | |
| 1318 break; | |
| 1319 case LETTER_CAP_U: | |
| 1320 codeLength = 8; | |
| 1321 break; | |
| 1322 default: | |
| 1323 throw new YamlException("Unknown escape character.", | |
| 1324 _scanner.spanFrom(escapeStart)); | |
| 1325 } | |
| 1326 | |
| 1327 _scanner.readChar(); | |
| 1328 _scanner.readChar(); | |
| 1329 | |
| 1330 if (codeLength != null) { | |
| 1331 var value = 0; | |
| 1332 for (var i = 0; i < codeLength; i++) { | |
| 1333 if (!_isHex) { | |
| 1334 _scanner.readChar(); | |
| 1335 throw new YamlException( | |
| 1336 "Expected $codeLength-digit hexidecimal number.", | |
| 1337 _scanner.spanFrom(escapeStart)); | |
| 1338 } | |
| 1339 | |
| 1340 value = (value << 4) + _asHex(_scanner.readChar()); | |
| 1341 } | |
| 1342 | |
| 1343 // Check the value and write the character. | |
| 1344 if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) { | |
| 1345 throw new YamlException( | |
| 1346 "Invalid Unicode character escape code.", | |
| 1347 _scanner.spanFrom(escapeStart)); | |
| 1348 } | |
| 1349 | |
| 1350 buffer.writeCharCode(value); | |
| 1351 } | |
| 1352 } else { | |
| 1353 buffer.writeCharCode(_scanner.readChar()); | |
| 1354 } | |
| 1355 } | |
| 1356 | |
| 1357 // Check if we're at the end of a scalar. | |
| 1358 if (_scanner.peekChar() == (singleQuote ? SINGLE_QUOTE : DOUBLE_QUOTE)) { | |
| 1359 break; | |
| 1360 } | |
| 1361 | |
| 1362 var whitespace = new StringBuffer(); | |
| 1363 var leadingBreak = ''; | |
| 1364 var trailingBreaks = new StringBuffer(); | |
| 1365 while (_isBlank || _isBreak) { | |
| 1366 if (_isBlank) { | |
| 1367 // Consume a space or a tab. | |
| 1368 if (!leadingBlanks) { | |
| 1369 whitespace.writeCharCode(_scanner.readChar()); | |
| 1370 } else { | |
| 1371 _scanner.readChar(); | |
| 1372 } | |
| 1373 } else { | |
| 1374 // Check if it's a first line break. | |
| 1375 if (!leadingBlanks) { | |
| 1376 whitespace.clear(); | |
| 1377 leadingBreak = _readLine(); | |
| 1378 leadingBlanks = true; | |
| 1379 } else { | |
| 1380 trailingBreaks.write(_readLine()); | |
| 1381 } | |
| 1382 } | |
| 1383 } | |
| 1384 | |
| 1385 // Join the whitespace or fold line breaks. | |
| 1386 if (leadingBlanks) { | |
| 1387 if (leadingBreak.isNotEmpty && trailingBreaks.isEmpty) { | |
| 1388 buffer.writeCharCode(SP); | |
| 1389 } else { | |
| 1390 buffer.write(trailingBreaks); | |
| 1391 } | |
| 1392 } else { | |
| 1393 buffer.write(whitespace); | |
| 1394 whitespace.clear(); | |
| 1395 } | |
| 1396 } | |
| 1397 | |
| 1398 // Eat the right quote. | |
| 1399 _scanner.readChar(); | |
| 1400 | |
| 1401 return new ScalarToken(_scanner.spanFrom(start), buffer.toString(), | |
| 1402 singleQuote ? ScalarStyle.SINGLE_QUOTED : ScalarStyle.DOUBLE_QUOTED); | |
| 1403 } | |
| 1404 | |
| 1405 /// Scans a plain scalar. | |
| 1406 Token _scanPlainScalar() { | |
| 1407 var start = _scanner.state; | |
| 1408 var buffer = new StringBuffer(); | |
| 1409 var leadingBreak = ''; | |
| 1410 var trailingBreaks = ''; | |
| 1411 var whitespace = new StringBuffer(); | |
| 1412 var indent = _indent + 1; | |
| 1413 | |
| 1414 while (true) { | |
| 1415 // Check for a document indicator. | |
| 1416 if (_scanner.column == 0 && _isBlankOrEndAt(3) && | |
| 1417 (_scanner.matches('---') || _scanner.matches('...'))) { | |
| 1418 break; | |
| 1419 } | |
| 1420 | |
| 1421 // Check for a comment. | |
| 1422 if (_scanner.peekChar() == HASH) break; | |
| 1423 | |
| 1424 if (_isPlainChar) { | |
| 1425 // Join the whitespace or fold line breaks. | |
| 1426 if (leadingBreak.isNotEmpty) { | |
| 1427 if (trailingBreaks.isEmpty) { | |
| 1428 buffer.writeCharCode(SP); | |
| 1429 } else { | |
| 1430 buffer.write(trailingBreaks); | |
| 1431 } | |
| 1432 leadingBreak = ''; | |
| 1433 trailingBreaks = ''; | |
| 1434 } else { | |
| 1435 buffer.write(whitespace); | |
| 1436 whitespace.clear(); | |
| 1437 } | |
| 1438 } | |
| 1439 | |
| 1440 // libyaml's notion of valid identifiers differs substantially from YAML | |
| 1441 // 1.2's. We use [_isPlainChar] instead of libyaml's character here. | |
| 1442 while (_isPlainChar) { | |
| 1443 buffer.writeCharCode(_scanner.readChar()); | |
| 1444 } | |
| 1445 | |
| 1446 // Is it the end? | |
| 1447 if (!_isBlank && !_isBreak) break; | |
| 1448 | |
| 1449 while (_isBlank || _isBreak) { | |
| 1450 if (_isBlank) { | |
| 1451 // Check for a tab character messing up the intendation. | |
| 1452 if (leadingBreak.isNotEmpty && _scanner.column < indent && | |
| 1453 _scanner.peekChar() == TAB) { | |
| 1454 _scanner.error("Expected a space but found a tab.", length: 1); | |
| 1455 } | |
| 1456 | |
| 1457 if (leadingBreak.isEmpty) { | |
| 1458 whitespace.writeCharCode(_scanner.readChar()); | |
| 1459 } else { | |
| 1460 _scanner.readChar(); | |
| 1461 } | |
| 1462 } else { | |
| 1463 // Check if it's a first line break. | |
| 1464 if (leadingBreak.isEmpty) { | |
| 1465 leadingBreak = _readLine(); | |
| 1466 whitespace.clear(); | |
| 1467 } else { | |
| 1468 trailingBreaks = _readLine(); | |
| 1469 } | |
| 1470 } | |
| 1471 } | |
| 1472 | |
| 1473 // Check the indentation level. | |
| 1474 if (_inBlockContext && _scanner.column < indent) break; | |
| 1475 } | |
| 1476 | |
| 1477 // Allow a simple key after a plain scalar with leading blanks. | |
| 1478 if (leadingBreak.isNotEmpty) _simpleKeyAllowed = true; | |
| 1479 | |
| 1480 return new ScalarToken(_scanner.spanFrom(start), buffer.toString(), | |
| 1481 ScalarStyle.PLAIN); | |
| 1482 } | |
| 1483 | |
| 1484 /// Moves past the current line break, if there is one. | |
| 1485 void _skipLine() { | |
| 1486 var char = _scanner.peekChar(); | |
| 1487 if (char != CR && char != LF) return; | |
| 1488 _scanner.readChar(); | |
| 1489 if (char == CR && _scanner.peekChar() == LF) _scanner.readChar(); | |
| 1490 } | |
| 1491 | |
| 1492 // Moves past the current line break and returns a newline. | |
| 1493 String _readLine() { | |
| 1494 var char = _scanner.peekChar(); | |
| 1495 | |
| 1496 // libyaml supports NEL, PS, and LS characters as line separators, but this | |
| 1497 // is explicitly forbidden in section 5.4 of the YAML spec. | |
| 1498 if (char != CR && char != LF) { | |
| 1499 throw new YamlException("Expected newline.", _scanner.emptySpan); | |
| 1500 } | |
| 1501 | |
| 1502 _scanner.readChar(); | |
| 1503 // CR LF | CR | LF -> LF | |
| 1504 if (char == CR && _scanner.peekChar() == LF) _scanner.readChar(); | |
| 1505 return "\n"; | |
| 1506 } | |
| 1507 | |
| 1508 // Returns whether the character at [offset] is whitespace. | |
| 1509 bool _isBlankAt(int offset) { | |
| 1510 var char = _scanner.peekChar(offset); | |
| 1511 return char == SP || char == TAB; | |
| 1512 } | |
| 1513 | |
| 1514 // Returns whether the character at [offset] is a line break. | |
| 1515 bool _isBreakAt(int offset) { | |
| 1516 // Libyaml considers NEL, LS, and PS to be line breaks as well, but that's | |
| 1517 // contrary to the spec. | |
| 1518 var char = _scanner.peekChar(offset); | |
| 1519 return char == CR || char == LF; | |
| 1520 } | |
| 1521 | |
| 1522 // Returns whether the character at [offset] is whitespace or past the end of | |
| 1523 // the source. | |
| 1524 bool _isBlankOrEndAt(int offset) { | |
| 1525 var char = _scanner.peekChar(offset); | |
| 1526 return char == null || char == SP || char == TAB || char == CR || | |
| 1527 char == LF; | |
| 1528 } | |
| 1529 | |
| 1530 /// Returns whether the character at [offset] is a plain character. | |
| 1531 /// | |
| 1532 /// See http://yaml.org/spec/1.2/spec.html#ns-plain-char(c). | |
| 1533 bool _isPlainCharAt(int offset) { | |
| 1534 switch (_scanner.peekChar(offset)) { | |
| 1535 case COLON: | |
| 1536 return _isPlainSafeAt(offset + 1); | |
| 1537 case HASH: | |
| 1538 var previous = _scanner.peekChar(offset - 1); | |
| 1539 return previous != SP && previous != TAB; | |
| 1540 default: | |
| 1541 return _isPlainSafeAt(offset); | |
| 1542 } | |
| 1543 } | |
| 1544 | |
| 1545 /// Returns whether the character at [offset] is a plain-safe character. | |
| 1546 /// | |
| 1547 /// See http://yaml.org/spec/1.2/spec.html#ns-plain-safe(c). | |
| 1548 bool _isPlainSafeAt(int offset) { | |
| 1549 var char = _scanner.peekChar(offset); | |
| 1550 switch (char) { | |
| 1551 case COMMA: | |
| 1552 case LEFT_SQUARE: | |
| 1553 case RIGHT_SQUARE: | |
| 1554 case LEFT_CURLY: | |
| 1555 case RIGHT_CURLY: | |
| 1556 // These characters are delimiters in a flow context and thus are only | |
| 1557 // safe in a block context. | |
| 1558 return _inBlockContext; | |
| 1559 case SP: | |
| 1560 case TAB: | |
| 1561 case LF: | |
| 1562 case CR: | |
| 1563 case BOM: | |
| 1564 return false; | |
| 1565 case NEL: | |
| 1566 return true; | |
| 1567 default: | |
| 1568 return char != null && | |
| 1569 ((char >= 0x00020 && char <= 0x00007E) || | |
| 1570 (char >= 0x000A0 && char <= 0x00D7FF) || | |
| 1571 (char >= 0x0E000 && char <= 0x00FFFD) || | |
| 1572 (char >= 0x10000 && char <= 0x10FFFF)); | |
| 1573 } | |
| 1574 } | |
| 1575 | |
| 1576 /// Returns the hexidecimal value of [char]. | |
| 1577 int _asHex(int char) { | |
| 1578 if (char <= NUMBER_9) return char - NUMBER_0; | |
| 1579 if (char <= LETTER_CAP_F) return 10 + char - LETTER_CAP_A; | |
| 1580 return 10 + char - LETTER_A; | |
| 1581 } | |
| 1582 } | |
| 1583 | |
| 1584 /// A record of the location of a potential simple key. | |
| 1585 class _SimpleKey { | |
| 1586 /// The index of the token that begins the simple key. | |
| 1587 /// | |
| 1588 /// This is the index relative to all tokens emitted, rather than relative to | |
| 1589 /// [_tokens]. | |
| 1590 final int tokenNumber; | |
| 1591 | |
| 1592 /// The source location of the beginning of the simple key. | |
| 1593 /// | |
| 1594 /// This is used for error reporting and for determining when a simple key is | |
| 1595 /// no longer on the current line. | |
| 1596 final SourceLocation location; | |
| 1597 | |
| 1598 /// Whether this key must exist for the document to be scanned. | |
| 1599 final bool required; | |
| 1600 | |
| 1601 _SimpleKey(this.tokenNumber, this.location, {bool required}) | |
| 1602 : required = required; | |
| 1603 } | |
| 1604 | |
| 1605 /// An enum of chomping indicators that describe how to handle trailing | |
| 1606 /// whitespace for a block scalar. | |
| 1607 /// | |
| 1608 /// See http://yaml.org/spec/1.2/spec.html#id2794534. | |
| 1609 class _Chomping { | |
| 1610 /// All trailing whitespace is discarded. | |
| 1611 static const STRIP = const _Chomping("STRIP"); | |
| 1612 | |
| 1613 /// A single trailing newline is retained. | |
| 1614 static const CLIP = const _Chomping("CLIP"); | |
| 1615 | |
| 1616 /// All trailing whitespace is preserved. | |
| 1617 static const KEEP = const _Chomping("KEEP"); | |
| 1618 | |
| 1619 final String name; | |
| 1620 | |
| 1621 const _Chomping(this.name); | |
| 1622 | |
| 1623 String toString() => name; | |
| 1624 } | |
| OLD | NEW |