yaml/lib/src/scanner.dart - Issue 1400473008: Roll Observatory packages and add a roll script

Side by Side Diff: yaml/lib/src/scanner.dart

Issue 1400473008: Roll Observatory packages and add a roll script (Closed) Base URL: git@github.com:dart-lang/observatory_pub_packages.git@master

Patch Set: Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.

4

5 library yaml.scanner;

6

7 import 'package:collection/collection.dart';

8 import 'package:string_scanner/string_scanner.dart';

9 import 'package:source_span/source_span.dart';

10

11 import 'style.dart';

12 import 'token.dart';

13 import 'utils.dart';

14 import 'yaml_exception.dart';

15

16 /// A scanner that reads a string of Unicode characters and emits [Token]s.

17 ///

18 /// This is based on the libyaml scanner, available at

19 /// https://github.com/yaml/libyaml/blob/master/src/scanner.c. The license for

20 /// that is available in ../../libyaml-license.txt.

21 class Scanner {

22 static const TAB = 0x9;

23 static const LF = 0xA;

24 static const CR = 0xD;

25 static const SP = 0x20;

26 static const DOLLAR = 0x24;

27 static const LEFT_PAREN = 0x28;

28 static const RIGHT_PAREN = 0x29;

29 static const PLUS = 0x2B;

30 static const COMMA = 0x2C;

31 static const HYPHEN = 0x2D;

32 static const PERIOD = 0x2E;

33 static const QUESTION = 0x3F;

34 static const COLON = 0x3A;

35 static const SEMICOLON = 0x3B;

36 static const EQUALS = 0x3D;

37 static const LEFT_SQUARE = 0x5B;

38 static const RIGHT_SQUARE = 0x5D;

39 static const LEFT_CURLY = 0x7B;

40 static const RIGHT_CURLY = 0x7D;

41 static const HASH = 0x23;

42 static const AMPERSAND = 0x26;

43 static const ASTERISK = 0x2A;

44 static const EXCLAMATION = 0x21;

45 static const VERTICAL_BAR = 0x7C;

46 static const LEFT_ANGLE = 0x3C;

47 static const RIGHT_ANGLE = 0x3E;

48 static const SINGLE_QUOTE = 0x27;

49 static const DOUBLE_QUOTE = 0x22;

50 static const PERCENT = 0x25;

51 static const AT = 0x40;

52 static const GRAVE_ACCENT = 0x60;

53 static const TILDE = 0x7E;

54

55 static const NULL = 0x0;

56 static const BELL = 0x7;

57 static const BACKSPACE = 0x8;

58 static const VERTICAL_TAB = 0xB;

59 static const FORM_FEED = 0xC;

60 static const ESCAPE = 0x1B;

61 static const SLASH = 0x2F;

62 static const BACKSLASH = 0x5C;

63 static const UNDERSCORE = 0x5F;

64 static const NEL = 0x85;

65 static const NBSP = 0xA0;

66 static const LINE_SEPARATOR = 0x2028;

67 static const PARAGRAPH_SEPARATOR = 0x2029;

68 static const BOM = 0xFEFF;

69

70 static const NUMBER_0 = 0x30;

71 static const NUMBER_9 = 0x39;

72

73 static const LETTER_A = 0x61;

74 static const LETTER_B = 0x62;

75 static const LETTER_E = 0x65;

76 static const LETTER_F = 0x66;

77 static const LETTER_N = 0x6E;

78 static const LETTER_R = 0x72;

79 static const LETTER_T = 0x74;

80 static const LETTER_U = 0x75;

81 static const LETTER_V = 0x76;

82 static const LETTER_X = 0x78;

83 static const LETTER_Z = 0x7A;

84

85 static const LETTER_CAP_A = 0x41;

86 static const LETTER_CAP_F = 0x46;

87 static const LETTER_CAP_L = 0x4C;

88 static const LETTER_CAP_N = 0x4E;

89 static const LETTER_CAP_P = 0x50;

90 static const LETTER_CAP_U = 0x55;

91 static const LETTER_CAP_X = 0x58;

92 static const LETTER_CAP_Z = 0x5A;

93

94 /// The underlying [SpanScanner] used to read characters from the source text.

95 ///

96 /// This is also used to track line and column information and to generate

97 /// [SourceSpan]s.

98 final SpanScanner _scanner;

99

100 /// Whether this scanner has produced a [TokenType.STREAM_START] token

101 /// indicating the beginning of the YAML stream.

102 var _streamStartProduced = false;

103

104 /// Whether this scanner has produced a [TokenType.STREAM_END] token

105 /// indicating the end of the YAML stream.

106 var _streamEndProduced = false;

107

108 /// The queue of tokens yet to be emitted.

109 ///

110 /// These are queued up in advance so that [TokenType.KEY] tokens can be

111 /// inserted once the scanner determines that a series of tokens represents a

112 /// mapping key.

113 final _tokens = new QueueList<Token>();

114

115 /// The number of tokens that have been emitted.

116 ///

117 /// This doesn't count tokens in [tokens].

118 var _tokensParsed = 0;

119

120 /// Whether the next token in [_tokens] is ready to be returned.

121 ///

122 /// It might not be ready if there may still be a [TokenType.KEY] inserted

123 /// before it.

124 var _tokenAvailable = false;

125

126 /// The stack of indent levels for the current nested block contexts.

127 ///

128 /// The YAML spec specifies that the initial indentation level is -1 spaces.

129 final _indents = <int>[-1];

130

131 /// Whether a simple key is allowed in this context.

132 ///

133 /// A simple key refers to any mapping key that doesn't have an explicit "?".

134 var _simpleKeyAllowed = true;

135

136 /// The stack of potential simple keys for each level of flow nesting.

137 ///

138 /// Entries in this list may be `null`, indicating that there is no valid

139 /// simple key for the associated level of nesting.

140 ///

141 /// When a ":" is parsed and there's a simple key available, a [TokenType.KEY]

142 /// token is inserted in [_tokens] before that key's token. This allows the

143 /// parser to tell that the key is intended to be a mapping key.

144 final _simpleKeys = <_SimpleKey>[null];

145

146 /// The current indentation level.

147 int get _indent => _indents.last;

148

149 /// Whether the scanner's currently positioned in a block-level structure (as

150 /// opposed to flow-level).

151 bool get _inBlockContext => _simpleKeys.length == 1;

152

153 /// Whether the current character is a line break or the end of the source.

154 bool get _isBreakOrEnd => _scanner.isDone \|\| _isBreak;

155

156 /// Whether the current character is a line break.

157 bool get _isBreak => _isBreakAt(0);

158

159 /// Whether the current character is whitespace or the end of the source.

160 bool get _isBlankOrEnd => _isBlankOrEndAt(0);

161

162 /// Whether the current character is whitespace.

163 bool get _isBlank => _isBlankAt(0);

164

165 /// Whether the current character is a valid tag name character.

166 ///

167 /// See http://yaml.org/spec/1.2/spec.html#ns-tag-name.

168 bool get _isTagChar {

169 var char = _scanner.peekChar();

170 if (char == null) return false;

171 switch (char) {

172 case HYPHEN:

173 case SEMICOLON:

174 case SLASH:

175 case COLON:

176 case AT:

177 case AMPERSAND:

178 case EQUALS:

179 case PLUS:

180 case DOLLAR:

181 case PERIOD:

182 case TILDE:

183 case QUESTION:

184 case ASTERISK:

185 case SINGLE_QUOTE:

186 case LEFT_PAREN:

187 case RIGHT_PAREN:

188 case PERCENT:

189 return true;

190 default:

191 return (char >= NUMBER_0 && char <= NUMBER_9) \|\|

192 (char >= LETTER_A && char <= LETTER_Z) \|\|

193 (char >= LETTER_CAP_A && char <= LETTER_CAP_Z);

194 }

195 }

196

197 /// Whether the current character is a valid anchor name character.

198 ///

199 /// See http://yaml.org/spec/1.2/spec.html#ns-anchor-name.

200 bool get _isAnchorChar {

201 if (!_isNonSpace) return false;

202

203 switch (_scanner.peekChar()) {

204 case COMMA:

205 case LEFT_SQUARE:

206 case RIGHT_SQUARE:

207 case LEFT_CURLY:

208 case RIGHT_CURLY:

209 return false;

210 default:

211 return true;

212 }

213 }

214

215 /// Whether the character at the current position is a decimal digit.

216 bool get _isDigit {

217 var char = _scanner.peekChar();

218 return char != null && (char >= NUMBER_0 && char <= NUMBER_9);

219 }

220

221 /// Whether the character at the current position is a hexidecimal

222 /// digit.

223 bool get _isHex {

224 var char = _scanner.peekChar();

225 if (char == null) return false;

226 return (char >= NUMBER_0 && char <= NUMBER_9) \|\|

227 (char >= LETTER_A && char <= LETTER_F) \|\|

228 (char >= LETTER_CAP_A && char <= LETTER_CAP_F);

229 }

230

231 /// Whether the character at the current position is a plain character.

232 ///

233 /// See http://yaml.org/spec/1.2/spec.html#ns-plain-char(c).

234 bool get _isPlainChar => _isPlainCharAt(0);

235

236 /// Whether the character at the current position is a printable character

237 /// other than a line break or byte-order mark.

238 ///

239 /// See http://yaml.org/spec/1.2/spec.html#nb-char.

240 bool get _isNonBreak {

241 var char = _scanner.peekChar();

242 if (char == null) return false;

243 switch (char) {

244 case LF:

245 case CR:

246 case BOM:

247 return false;

248 case TAB:

249 case NEL:

250 return true;

251 default:

252 return (char >= 0x00020 && char <= 0x00007E) \|\|

253 (char >= 0x000A0 && char <= 0x00D7FF) \|\|

254 (char >= 0x0E000 && char <= 0x00FFFD) \|\|

255 (char >= 0x10000 && char <= 0x10FFFF);

256 }

257 }

258

259 /// Whether the character at the current position is a printable character

260 /// other than whitespace.

261 ///

262 /// See http://yaml.org/spec/1.2/spec.html#nb-char.

263 bool get _isNonSpace {

264 var char = _scanner.peekChar();

265 if (char == null) return false;

266 switch (char) {

267 case LF:

268 case CR:

269 case BOM:

270 case SP:

271 return false;

272 case NEL:

273 return true;

274 default:

275 return (char >= 0x00020 && char <= 0x00007E) \|\|

276 (char >= 0x000A0 && char <= 0x00D7FF) \|\|

277 (char >= 0x0E000 && char <= 0x00FFFD) \|\|

278 (char >= 0x10000 && char <= 0x10FFFF);

279 }

280 }

281

282 /// Returns Whether or not the current character begins a documentation

283 /// indicator.

284 ///

285 /// If so, this sets the scanner's last match to that indicator.

286 bool get _isDocumentIndicator {

287 return _scanner.column == 0 && _isBlankOrEndAt(3) &&

288 (_scanner.matches('---') \|\| _scanner.matches('...'));

289 }

290

291 /// Creates a scanner that scans [source].

292 ///

293 /// [sourceUrl] can be a String or a [Uri].

294 Scanner(String source, {sourceUrl})

295 : _scanner = new SpanScanner.eager(source, sourceUrl: sourceUrl);

296

297 /// Consumes and returns the next token.

298 Token scan() {

299 if (_streamEndProduced) throw new StateError("Out of tokens.");

300 if (!_tokenAvailable) _fetchMoreTokens();

301

302 var token = _tokens.removeFirst();

303 _tokenAvailable = false;

304 _tokensParsed++;

305 _streamEndProduced = token is Token &&

306 token.type == TokenType.STREAM_END;

307 return token;

308 }

309

310 /// Consumes the next token and returns the one after that.

311 Token advance() {

312 scan();

313 return peek();

314 }

315

316 /// Returns the next token without consuming it.

317 Token peek() {

318 if (_streamEndProduced) return null;

319 if (!_tokenAvailable) _fetchMoreTokens();

320 return _tokens.first;

321 }

322

323 /// Ensures that [_tokens] contains at least one token which can be returned.

324 void _fetchMoreTokens() {

325 while (true) {

326 if (_tokens.isNotEmpty) {

327 _staleSimpleKeys();

328

329 // If the current token could be a simple key, we need to scan more

330 // tokens until we determine whether it is or not. Otherwise we might

331 // not emit the `KEY` token before we emit the value of the key.

332 if (!_simpleKeys.any((key) =>

333 key != null && key.tokenNumber == _tokensParsed)) {

334 break;

335 }

336 }

337

338 _fetchNextToken();

339 }

340 _tokenAvailable = true;

341 }

342

343 /// The dispatcher for token fetchers.

344 void _fetchNextToken() {

345 if (!_streamStartProduced) {

346 _fetchStreamStart();

347 return;

348 }

349

350 _scanToNextToken();

351 _staleSimpleKeys();

352 _unrollIndent(_scanner.column);

353

354 if (_scanner.isDone) {

355 _fetchStreamEnd();

356 return;

357 }

358

359 if (_scanner.column == 0) {

360 if (_scanner.peekChar() == PERCENT) {

361 _fetchDirective();

362 return;

363 }

364

365 if (_isBlankOrEndAt(3)) {

366 if (_scanner.matches('---')) {

367 _fetchDocumentIndicator(TokenType.DOCUMENT_START);

368 return;

369 }

370

371 if (_scanner.matches('...')) {

372 _fetchDocumentIndicator(TokenType.DOCUMENT_END);

373 return;

374 }

375 }

376 }

377

378 switch (_scanner.peekChar()) {

379 case LEFT_SQUARE:

380 _fetchFlowCollectionStart(TokenType.FLOW_SEQUENCE_START);

381 return;

382 case LEFT_CURLY:

383 _fetchFlowCollectionStart(TokenType.FLOW_MAPPING_START);

384 return;

385 case RIGHT_SQUARE:

386 _fetchFlowCollectionEnd(TokenType.FLOW_SEQUENCE_END);

387 return;

388 case RIGHT_CURLY:

389 _fetchFlowCollectionEnd(TokenType.FLOW_MAPPING_END);

390 return;

391 case COMMA:

392 _fetchFlowEntry();

393 return;

394 case ASTERISK:

395 _fetchAnchor(anchor: false);

396 return;

397 case AMPERSAND:

398 _fetchAnchor(anchor: true);

399 return;

400 case EXCLAMATION:

401 _fetchTag();

402 return;

403 case SINGLE_QUOTE:

404 _fetchFlowScalar(singleQuote: true);

405 return;

406 case DOUBLE_QUOTE:

407 _fetchFlowScalar(singleQuote: false);

408 return;

409 case VERTICAL_BAR:

410 if (!_inBlockContext) _invalidScalarCharacter();

411 _fetchBlockScalar(literal: true);

412 return;

413 case RIGHT_ANGLE:

414 if (!_inBlockContext) _invalidScalarCharacter();

415 _fetchBlockScalar(literal: false);

416 return;

417 case PERCENT:

418 case AT:

419 case GRAVE_ACCENT:

420 _invalidScalarCharacter();

421 return;

422

423 // These characters may sometimes begin plain scalars.

424 case HYPHEN:

425 if (_isPlainCharAt(1)) {

426 _fetchPlainScalar();

427 } else {

428 _fetchBlockEntry();

429 }

430 return;

431 case QUESTION:

432 if (_isPlainCharAt(1)) {

433 _fetchPlainScalar();

434 } else {

435 _fetchKey();

436 }

437 return;

438 case COLON:

439 if (!_inBlockContext && _tokens.isNotEmpty) {

440 // If a colon follows a "JSON-like" value (an explicit map or list, or

441 // a quoted string) it isn't required to have whitespace after it

442 // since it unambiguously describes a map.

443 var token = _tokens.last;

444 if (token.type == TokenType.FLOW_SEQUENCE_END \|\|

445 token.type == TokenType.FLOW_MAPPING_END \|\|

446 (token.type == TokenType.SCALAR && token.style.isQuoted)) {

447 _fetchValue();

448 return;

449 }

450 }

451

452 if (_isPlainCharAt(1)) {

453 _fetchPlainScalar();

454 } else {

455 _fetchValue();

456 }

457 return;

458 default:

459 if (!_isNonBreak) _invalidScalarCharacter();

460

461 _fetchPlainScalar();

462 return;

463 }

464

465 throw 'Inaccessible';

466 }

467

468 /// Throws an error about a disallowed character.

469 void _invalidScalarCharacter() =>

470 _scanner.error("Unexpected character.", length: 1);

471

472 /// Checks the list of potential simple keys and remove the positions that

473 /// cannot contain simple keys anymore.

474 void _staleSimpleKeys() {

475 for (var i = 0; i < _simpleKeys.length; i++) {

476 var key = _simpleKeys[i];

477 if (key == null) continue;

478

479 // libyaml requires that all simple keys be a single line and no longer

480 // than 1024 characters. However, in section 7.4.2 of the spec

481 // (http://yaml.org/spec/1.2/spec.html#id2790832), these restrictions are

482 // only applied when the curly braces are omitted. It's difficult to

483 // retain enough context to know which keys need to have the restriction

484 // placed on them, so for now we go the other direction and allow

485 // everything but multiline simple keys in a block context.

486 if (!_inBlockContext) continue;

487

488 if (key.line == _scanner.line) continue;

489

490 if (key.required) {

491 throw new YamlException("Expected ':'.", _scanner.emptySpan);

492 }

493

494 _simpleKeys[i] = null;

495 }

496 }

497

498 /// Checks if a simple key may start at the current position and saves it if

499 /// so.

500 void _saveSimpleKey() {

501 // A simple key is required at the current position if the scanner is in the

502 // block context and the current column coincides with the indentation

503 // level.

504 var required = _inBlockContext && _indent == _scanner.column;

505

506 // A simple key is required only when it is the first token in the current

507 // line. Therefore it is always allowed. But we add a check anyway.

508 assert(_simpleKeyAllowed \|\| !required);

509

510 if (!_simpleKeyAllowed) return;

511

512 // If the current position may start a simple key, save it.

513 _removeSimpleKey();

514 _simpleKeys[_simpleKeys.length - 1] = new _SimpleKey(

515 _tokensParsed + _tokens.length,

516 _scanner.line,

517 _scanner.column,

518 _scanner.location,

519 required: required);

520 }

521

522 /// Removes a potential simple key at the current flow level.

523 void _removeSimpleKey() {

524 var key = _simpleKeys.last;

525 if (key != null && key.required) {

526 throw new YamlException("Could not find expected ':' for simple key.",

527 key.location.pointSpan());

528 }

529

530 _simpleKeys[_simpleKeys.length - 1] = null;

531 }

532

533 /// Increases the flow level and resizes the simple key list.

534 void _increaseFlowLevel() {

535 _simpleKeys.add(null);

536 }

537

538 /// Decreases the flow level.

539 void _decreaseFlowLevel() {

540 if (_inBlockContext) return;

541 _simpleKeys.removeLast();

542 }

543

544 /// Pushes the current indentation level to the stack and sets the new level

545 /// if [column] is greater than [_indent].

546 ///

547 /// If it is, appends or inserts the specified token into [_tokens]. If

548 /// [tokenNumber] is provided, the corresponding token will be replaced;

549 /// otherwise, the token will be added at the end.

550 void _rollIndent(int column, TokenType type, SourceLocation location,

551 {int tokenNumber}) {

552 if (!_inBlockContext) return;

553 if (_indent != -1 && _indent >= column) return;

554

555 // Push the current indentation level to the stack and set the new

556 // indentation level.

557 _indents.add(column);

558

559 // Create a token and insert it into the queue.

560 var token = new Token(type, location.pointSpan());

561 if (tokenNumber == null) {

562 _tokens.add(token);

563 } else {

564 _tokens.insert(tokenNumber - _tokensParsed, token);

565 }

566 }

567

568 /// Pops indentation levels from [_indents] until the current level becomes

569 /// less than or equal to [column].

570 ///

571 /// For each indentation level, appends a [TokenType.BLOCK_END] token.

572 void _unrollIndent(int column) {

573 if (!_inBlockContext) return;

574

575 while (_indent > column) {

576 _tokens.add(new Token(TokenType.BLOCK_END, _scanner.emptySpan));

577 _indents.removeLast();

578 }

579 }

580

581 /// Pops indentation levels from [_indents] until the current level resets to

582 /// -1.

583 ///

584 /// For each indentation level, appends a [TokenType.BLOCK_END] token.

585 void _resetIndent() => _unrollIndent(-1);

586

587 /// Produces a [TokenType.STREAM_START] token.

588 void _fetchStreamStart() {

589 // Much of libyaml's initialization logic here is done in variable

590 // initializers instead.

591 _streamStartProduced = true;

592 _tokens.add(new Token(TokenType.STREAM_START, _scanner.emptySpan));

593 }

594

595 /// Produces a [TokenType.STREAM_END] token.

596 void _fetchStreamEnd() {

597 _resetIndent();

598 _removeSimpleKey();

599 _simpleKeyAllowed = false;

600 _tokens.add(new Token(TokenType.STREAM_END, _scanner.emptySpan));

601 }

602

603 /// Produces a [TokenType.VERSION_DIRECTIVE] or [TokenType.TAG_DIRECTIVE]

604 /// token.

605 void _fetchDirective() {

606 _resetIndent();

607 _removeSimpleKey();

608 _simpleKeyAllowed = false;

609 var directive = _scanDirective();

610 if (directive != null) _tokens.add(directive);

611 }

612

613 /// Produces a [TokenType.DOCUMENT_START] or [TokenType.DOCUMENT_END] token.

614 void _fetchDocumentIndicator(TokenType type) {

615 _resetIndent();

616 _removeSimpleKey();

617 _simpleKeyAllowed = false;

618

619 // Consume the indicator token.

620 var start = _scanner.state;

621 _scanner.readChar();

622 _scanner.readChar();

623 _scanner.readChar();

624

625 _tokens.add(new Token(type, _scanner.spanFrom(start)));

626 }

627

628 /// Produces a [TokenType.FLOW_SEQUENCE_START] or

629 /// [TokenType.FLOW_MAPPING_START] token.

630 void _fetchFlowCollectionStart(TokenType type) {

631 _saveSimpleKey();

632 _increaseFlowLevel();

633 _simpleKeyAllowed = true;

634 _addCharToken(type);

635 }

636

637 /// Produces a [TokenType.FLOW_SEQUENCE_END] or [TokenType.FLOW_MAPPING_END]

638 /// token.

639 void _fetchFlowCollectionEnd(TokenType type) {

640 _removeSimpleKey();

641 _decreaseFlowLevel();

642 _simpleKeyAllowed = false;

643 _addCharToken(type);

644 }

645

646 /// Produces a [TokenType.FLOW_ENTRY] token.

647 void _fetchFlowEntry() {

648 _removeSimpleKey();

649 _simpleKeyAllowed = true;

650 _addCharToken(TokenType.FLOW_ENTRY);

651 }

652

653 /// Produces a [TokenType.BLOCK_ENTRY] token.

654 void _fetchBlockEntry() {

655 if (_inBlockContext) {

656 if (!_simpleKeyAllowed) {

657 throw new YamlException(

658 "Block sequence entries are not allowed here.",

659 _scanner.emptySpan);

660 }

661

662 _rollIndent(

663 _scanner.column,

664 TokenType.BLOCK_SEQUENCE_START,

665 _scanner.location);

666 } else {

667 // It is an error for the '-' indicator to occur in the flow context, but

668 // we let the Parser detect and report it because it's able to point to

669 // the context.

670 }

671

672 _removeSimpleKey();

673 _simpleKeyAllowed = true;

674 _addCharToken(TokenType.BLOCK_ENTRY);

675 }

676

677 /// Produces the [TokenType.KEY] token.

678 void _fetchKey() {

679 if (_inBlockContext) {

680 if (!_simpleKeyAllowed) {

681 throw new YamlException("Mapping keys are not allowed here.",

682 _scanner.emptySpan);

683 }

684

685 _rollIndent(

686 _scanner.column,

687 TokenType.BLOCK_MAPPING_START,

688 _scanner.location);

689 }

690

691 // Simple keys are allowed after `?` in a block context.

692 _simpleKeyAllowed = _inBlockContext;

693 _addCharToken(TokenType.KEY);

694 }

695

696 /// Produces the [TokenType.VALUE] token.

697 void _fetchValue() {

698 var simpleKey = _simpleKeys.last;

699 if (simpleKey != null) {

700 // Add a [TokenType.KEY] directive before the first token of the simple

701 // key so the parser knows that it's part of a key/value pair.

702 _tokens.insert(simpleKey.tokenNumber - _tokensParsed,

703 new Token(TokenType.KEY, simpleKey.location.pointSpan()));

704

705 // In the block context, we may need to add the

706 // [TokenType.BLOCK_MAPPING_START] token.

707 _rollIndent(

708 simpleKey.column,

709 TokenType.BLOCK_MAPPING_START,

710 simpleKey.location,

711 tokenNumber: simpleKey.tokenNumber);

712

713 // Remove the simple key.

714 _simpleKeys[_simpleKeys.length - 1] = null;

715

716 // A simple key cannot follow another simple key.

717 _simpleKeyAllowed = false;

718 } else if (_inBlockContext) {

719 if (!_simpleKeyAllowed) {

720 throw new YamlException(

721 "Mapping values are not allowed here. Did you miss a colon "

722 "earlier?",

723 _scanner.emptySpan);

724 }

725

726 // If we're here, we've found the ':' indicator following a complex key.

727

728 _rollIndent(

729 _scanner.column,

730 TokenType.BLOCK_MAPPING_START,

731 _scanner.location);

732 _simpleKeyAllowed = true;

733 } else if (_simpleKeyAllowed) {

734 // If we're here, we've found the ':' indicator with an empty key. This

735 // behavior differs from libyaml, which disallows empty implicit keys.

736 _simpleKeyAllowed = false;

737 _addCharToken(TokenType.KEY);

738 }

739

740 _addCharToken(TokenType.VALUE);

741 }

742

743 /// Adds a token with [type] to [_tokens].

744 ///

745 /// The span of the new token is the current character.

746 void _addCharToken(TokenType type) {

747 var start = _scanner.state;

748 _scanner.readChar();

749 _tokens.add(new Token(type, _scanner.spanFrom(start)));

750 }

751

752 /// Produces a [TokenType.ALIAS] or [TokenType.ANCHOR] token.

753 void _fetchAnchor({bool anchor: true}) {

754 _saveSimpleKey();

755 _simpleKeyAllowed = false;

756 _tokens.add(_scanAnchor(anchor: anchor));

757 }

758

759 /// Produces a [TokenType.TAG] token.

760 void _fetchTag() {

761 _saveSimpleKey();

762 _simpleKeyAllowed = false;

763 _tokens.add(_scanTag());

764 }

765

766 /// Produces a [TokenType.SCALAR] token with style [ScalarStyle.LITERAL] or

767 /// [ScalarStyle.FOLDED].

768 void _fetchBlockScalar({bool literal: false}) {

769 _removeSimpleKey();

770 _simpleKeyAllowed = true;

771 _tokens.add(_scanBlockScalar(literal: literal));

772 }

773

774 /// Produces a [TokenType.SCALAR] token with style [ScalarStyle.SINGLE_QUOTED]

775 /// or [ScalarStyle.DOUBLE_QUOTED].

776 void _fetchFlowScalar({bool singleQuote: false}) {

777 _saveSimpleKey();

778 _simpleKeyAllowed = false;

779 _tokens.add(_scanFlowScalar(singleQuote: singleQuote));

780 }

781

782 /// Produces a [TokenType.SCALAR] token with style [ScalarStyle.PLAIN].

783 void _fetchPlainScalar() {

784 _saveSimpleKey();

785 _simpleKeyAllowed = false;

786 _tokens.add(_scanPlainScalar());

787 }

788

789 /// Eats whitespace and comments until the next token is found.

790 void _scanToNextToken() {

791 var afterLineBreak = false;

792 while (true) {

793 // Allow the BOM to start a line.

794 if (_scanner.column == 0) _scanner.scan("\uFEFF");

795

796 // Eat whitespace.

797 //

798 // libyaml disallows tabs after "-", "?", or ":", but the spec allows

799 // them. See section 6.2: http://yaml.org/spec/1.2/spec.html#id2778241.

800 while (_scanner.peekChar() == SP \|\|

801 ((!_inBlockContext \|\| !afterLineBreak) &&

802 _scanner.peekChar() == TAB)) {

803 _scanner.readChar();

804 }

805

806 if (_scanner.peekChar() == TAB) {

807 _scanner.error("Tab characters are not allowed as indentation.",

808 length: 1);

809 }

810

811 // Eat a comment until a line break.

812 _skipComment();

813

814 // If we're at a line break, eat it.

815 if (_isBreak) {

816 _skipLine();

817

818 // In the block context, a new line may start a simple key.

819 if (_inBlockContext) _simpleKeyAllowed = true;

820 afterLineBreak = true;

821 } else {

822 // Otherwise we've found a token.

823 break;

824 }

825 }

826 }

827

828 /// Scans a [TokenType.YAML_DIRECTIVE] or [TokenType.TAG_DIRECTIVE] token.

829 ///

830 /// %YAML 1.2 # a comment \n

831 /// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

832 /// %TAG !yaml! tag:yaml.org,2002: \n

833 /// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

834 Token _scanDirective() {

835 var start = _scanner.state;

836

837 // Eat '%'.

838 _scanner.readChar();

839

840 var token;

841 var name = _scanDirectiveName();

842 if (name == "YAML") {

843 token = _scanVersionDirectiveValue(start);

844 } else if (name == "TAG") {

845 token = _scanTagDirectiveValue(start);

846 } else {

847 warn("Warning: unknown directive.", _scanner.spanFrom(start));

848

849 // libyaml doesn't support unknown directives, but the spec says to ignore

850 // them and warn: http://yaml.org/spec/1.2/spec.html#id2781147.

851 while (!_isBreakOrEnd) {

852 _scanner.readChar();

853 }

854

855 return null;

856 }

857

858 // Eat the rest of the line, including any comments.

859 _skipBlanks();

860 _skipComment();

861

862 if (!_isBreakOrEnd) {

863 throw new YamlException(

864 "Expected comment or line break after directive.",

865 _scanner.spanFrom(start));

866 }

867

868 _skipLine();

869 return token;

870 }

871

872 /// Scans a directive name.

873 ///

874 /// %YAML 1.2 # a comment \n

875 /// ^^^^

876 /// %TAG !yaml! tag:yaml.org,2002: \n

877 /// ^^^

878 String _scanDirectiveName() {

879 // libyaml only allows word characters in directive names, but the spec

880 // disagrees: http://yaml.org/spec/1.2/spec.html#ns-directive-name.

881 var start = _scanner.position;

882 while (_isNonSpace) {

883 _scanner.readChar();

884 }

885

886 var name = _scanner.substring(start);

887 if (name.isEmpty) {

888 throw new YamlException("Expected directive name.", _scanner.emptySpan);

889 } else if (!_isBlankOrEnd) {

890 throw new YamlException(

891 "Unexpected character in directive name.", _scanner.emptySpan);

892 }

893

894 return name;

895 }

896

897 /// Scans the value of a version directive.

898 ///

899 /// %YAML 1.2 # a comment \n

900 /// ^^^^^^

901 Token _scanVersionDirectiveValue(LineScannerState start) {

902 _skipBlanks();

903

904 var major = _scanVersionDirectiveNumber();

905 _scanner.expect('.');

906 var minor = _scanVersionDirectiveNumber();

907

908 return new VersionDirectiveToken(_scanner.spanFrom(start), major, minor);

909 }

910

911 /// Scans the version number of a version directive.

912 ///

913 /// %YAML 1.2 # a comment \n

914 /// ^

915 /// %YAML 1.2 # a comment \n

916 /// ^

917 int _scanVersionDirectiveNumber() {

918 var start = _scanner.position;

919 while (_isDigit) {

920 _scanner.readChar();

921 }

922

923 var number = _scanner.substring(start);

924 if (number.isEmpty) {

925 throw new YamlException("Expected version number.", _scanner.emptySpan);

926 }

927

928 return int.parse(number);

929 }

930

931 /// Scans the value of a tag directive.

932 ///

933 /// %TAG !yaml! tag:yaml.org,2002: \n

934 /// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

935 Token _scanTagDirectiveValue(LineScannerState start) {

936 _skipBlanks();

937

938 var handle = _scanTagHandle(directive: true);

939 if (!_isBlank) {

940 throw new YamlException("Expected whitespace.", _scanner.emptySpan);

941 }

942

943 _skipBlanks();

944

945 var prefix = _scanTagUri();

946 if (!_isBlankOrEnd) {

947 throw new YamlException("Expected whitespace.", _scanner.emptySpan);

948 }

949

950 return new TagDirectiveToken(_scanner.spanFrom(start), handle, prefix);

951 }

952

953 /// Scans a [TokenType.ANCHOR] token.

954 Token _scanAnchor({bool anchor: true}) {

955 var start = _scanner.state;

956

957 // Eat the indicator character.

958 _scanner.readChar();

959

960 // libyaml only allows word characters in anchor names, but the spec

961 // disagrees: http://yaml.org/spec/1.2/spec.html#ns-anchor-char.

962 var startPosition = _scanner.position;

963 while (_isAnchorChar) {

964 _scanner.readChar();

965 }

966 var name = _scanner.substring(startPosition);

967

968 var next = _scanner.peekChar();

969 if (name.isEmpty \|\|

970 (!_isBlankOrEnd && next != QUESTION && next != COLON &&

971 next != COMMA && next != RIGHT_SQUARE && next != RIGHT_CURLY &&

972 next != PERCENT && next != AT && next != GRAVE_ACCENT)) {

973 throw new YamlException("Expected alphanumeric character.",

974 _scanner.emptySpan);

975 }

976

977 if (anchor) {

978 return new AnchorToken(_scanner.spanFrom(start), name);

979 } else {

980 return new AliasToken(_scanner.spanFrom(start), name);

981 }

982 }

983

984 /// Scans a [TokenType.TAG] token.

985 Token _scanTag() {

986 var handle;

987 var suffix;

988 var start = _scanner.state;

989

990 // Check if the tag is in the canonical form.

991 if (_scanner.peekChar(1) == LEFT_ANGLE) {

992 // Eat '!<'.

993 _scanner.readChar();

994 _scanner.readChar();

995

996 handle = '';

997 suffix = _scanTagUri();

998

999 _scanner.expect('>');

1000 } else {

1001 // The tag has either the '!suffix' or the '!handle!suffix' form.

1002

1003 // First, try to scan a handle.

1004 handle = _scanTagHandle();

1005

1006 if (handle.length > 1 && handle.startsWith('!') && handle.endsWith('!')) {

1007 suffix = _scanTagUri(flowSeparators: false);

1008 } else {

1009 suffix = _scanTagUri(head: handle, flowSeparators: false);

1010

1011 // There was no explicit handle.

1012 if (suffix.isEmpty) {

1013 // This is the special '!' tag.

1014 handle = null;

1015 suffix = '!';

1016 } else {

1017 handle = '!';

1018 }

1019 }

1020 }

1021

1022 // libyaml insists on whitespace after a tag, but example 7.2 indicates

1023 // that it's not required: http://yaml.org/spec/1.2/spec.html#id2786720.

1024

1025 return new TagToken(_scanner.spanFrom(start), handle, suffix);

1026 }

1027

1028 /// Scans a tag handle.

1029 String _scanTagHandle({bool directive: false}) {

1030 _scanner.expect('!');

1031

1032 var buffer = new StringBuffer('!');

1033

1034 // libyaml only allows word characters in tags, but the spec disagrees:

1035 // http://yaml.org/spec/1.2/spec.html#ns-tag-char.

1036 var start = _scanner.position;

1037 while (_isTagChar) {

1038 _scanner.readChar();

1039 }

1040 buffer.write(_scanner.substring(start));

1041

1042 if (_scanner.peekChar() == EXCLAMATION) {

1043 buffer.writeCharCode(_scanner.readChar());

1044 } else {

1045 // It's either the '!' tag or not really a tag handle. If it's a %TAG

1046 // directive, it's an error. If it's a tag token, it must be part of a

1047 // URI.

1048 if (directive && buffer.toString() != '!') _scanner.expect('!');

1049 }

1050

1051 return buffer.toString();

1052 }

1053

1054 /// Scans a tag URI.

1055 ///

1056 /// [head] is the initial portion of the tag that's already been scanned.

1057 /// [flowSeparators] indicates whether the tag URI can contain flow

1058 /// separators.

1059 String _scanTagUri({String head, bool flowSeparators: true}) {

1060 var length = head == null ? 0 : head.length;

1061 var buffer = new StringBuffer();

1062

1063 // Copy the head if needed.

1064 //

1065 // Note that we don't copy the leading '!' character.

1066 if (length > 1) buffer.write(head.substring(1));

1067

1068 // The set of characters that may appear in URI is as follows:

1069 //

1070 // '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',

1071 // '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',

1072 // '%'.

1073 //

1074 // In a shorthand tag annotation, the flow separators ',', '[', and ']' are

1075 // disallowed.

1076 var start = _scanner.position;

1077 var char = _scanner.peekChar();

1078 while (_isTagChar \|\| (flowSeparators &&

1079 (char == COMMA \|\| char == LEFT_SQUARE \|\| char == RIGHT_SQUARE))) {

1080 _scanner.readChar();

1081 char = _scanner.peekChar();

1082 }

1083

1084 // libyaml manually decodes the URL, but we don't have to do that.

1085 return Uri.decodeFull(_scanner.substring(start));

1086 }

1087

1088 /// Scans a block scalar.

1089 Token _scanBlockScalar({bool literal: false}) {

1090 var start = _scanner.state;

1091

1092 // Eat the indicator '\|' or '>'.

1093 _scanner.readChar();

1094

1095 // Check for a chomping indicator.

1096 var chomping = _Chomping.CLIP;

1097 var increment = 0;

1098 var char = _scanner.peekChar();

1099 if (char == PLUS \|\| char == HYPHEN) {

1100 chomping = char == PLUS ? _Chomping.KEEP : _Chomping.STRIP;

1101 _scanner.readChar();

1102

1103 // Check for an indentation indicator.

1104 if (_isDigit) {

1105 // Check that the indentation is greater than 0.

1106 if (_scanner.peekChar() == NUMBER_0) {

1107 throw new YamlException(

1108 "0 may not be used as an indentation indicator.",

1109 _scanner.spanFrom(start));

1110 }

1111

1112 increment = _scanner.readChar() - NUMBER_0;

1113 }

1114 } else if (_isDigit) {

1115 // Do the same as above, but in the opposite order.

1116 if (_scanner.peekChar() == NUMBER_0) {

1117 throw new YamlException(

1118 "0 may not be used as an indentation indicator.",

1119 _scanner.spanFrom(start));

1120 }

1121

1122 increment = _scanner.readChar() - NUMBER_0;

1123

1124 char = _scanner.peekChar();

1125 if (char == PLUS \|\| char == HYPHEN) {

1126 chomping = char == PLUS ? _Chomping.KEEP : _Chomping.STRIP;

1127 _scanner.readChar();

1128 }

1129 }

1130

1131 // Eat whitespace and comments to the end of the line.

1132 _skipBlanks();

1133 _skipComment();

1134

1135 // Check if we're at the end of the line.

1136 if (!_isBreakOrEnd) {

1137 throw new YamlException("Expected comment or line break.",

1138 _scanner.emptySpan);

1139 }

1140

1141 _skipLine();

1142

1143 // If the block scalar has an explicit indentation indicator, add that to

1144 // the current indentation to get the indentation level for the scalar's

1145 // contents.

1146 var indent = 0;

1147 if (increment != 0) {

1148 indent = _indent >= 0 ? _indent + increment : increment;

1149 }

1150

1151 // Scan the leading line breaks to determine the indentation level if

1152 // needed.

1153 var pair = _scanBlockScalarBreaks(indent);

1154 indent = pair.first;

1155 var trailingBreaks = pair.last;

1156

1157 // Scan the block scalar contents.

1158 var buffer = new StringBuffer();

1159 var leadingBreak = '';

1160 var leadingBlank = false;

1161 var trailingBlank = false;

1162 var end = _scanner.state;

1163 while (_scanner.column == indent && !_scanner.isDone) {

1164 // Check for a document indicator. libyaml doesn't do this, but the spec

1165 // mandates it. See example 9.5:

1166 // http://yaml.org/spec/1.2/spec.html#id2801606.

1167 if (_isDocumentIndicator) break;

1168

1169 // We are at the beginning of a non-empty line.

1170

1171 // Is there trailing whitespace?

1172 trailingBlank = _isBlank;

1173

1174 // Check if we need to fold the leading line break.

1175 if (!literal && leadingBreak.isNotEmpty && !leadingBlank &&

1176 !trailingBlank) {

1177 // Do we need to join the lines with a space?

1178 if (trailingBreaks.isEmpty) buffer.writeCharCode(SP);

1179 } else {

1180 buffer.write(leadingBreak);

1181 }

1182 leadingBreak = '';

1183

1184 // Append the remaining line breaks.

1185 buffer.write(trailingBreaks);

1186

1187 // Is there leading whitespace?

1188 leadingBlank = _isBlank;

1189

1190 var startPosition = _scanner.position;

1191 while (!_isBreakOrEnd) {

1192 _scanner.readChar();

1193 }

1194 buffer.write(_scanner.substring(startPosition));

1195 end = _scanner.state;

1196

1197 // libyaml always reads a line here, but this breaks on block scalars at

1198 // the end of the document that end without newlines. See example 8.1:

1199 // http://yaml.org/spec/1.2/spec.html#id2793888.

1200 if (!_scanner.isDone) leadingBreak = _readLine();

1201

1202 // Eat the following indentation and spaces.

1203 var pair = _scanBlockScalarBreaks(indent);

1204 indent = pair.first;

1205 trailingBreaks = pair.last;

1206 }

1207

1208 // Chomp the tail.

1209 if (chomping != _Chomping.STRIP) buffer.write(leadingBreak);

1210 if (chomping == _Chomping.KEEP) buffer.write(trailingBreaks);

1211

1212 return new ScalarToken(_scanner.spanFrom(start, end), buffer.toString(),

1213 literal ? ScalarStyle.LITERAL : ScalarStyle.FOLDED);

1214 }

1215

1216 /// Scans indentation spaces and line breaks for a block scalar.

1217 ///

1218 /// Determines the intendation level if needed. Returns the new indentation

1219 /// level and the text of the line breaks.

1220 Pair<int, String> _scanBlockScalarBreaks(int indent) {

1221 var maxIndent = 0;

1222 var breaks = new StringBuffer();

1223

1224 while (true) {

1225 while ((indent == 0 \|\| _scanner.column < indent) &&

1226 _scanner.peekChar() == SP) {

1227 _scanner.readChar();

1228 }

1229

1230 if (_scanner.column > maxIndent) maxIndent = _scanner.column;

1231

1232 // libyaml throws an error here if a tab character is detected, but the

1233 // spec treats tabs like any other non-space character. See example 8.2:

1234 // http://yaml.org/spec/1.2/spec.html#id2794311.

1235

1236 if (!_isBreak) break;

1237 breaks.write(_readLine());

1238 }

1239

1240 if (indent == 0) {

1241 indent = maxIndent;

1242 if (indent < _indent + 1) indent = _indent + 1;

1243

1244 // libyaml forces indent to be at least 1 here, but that doesn't seem to

1245 // be supported by the spec.

1246 }

1247

1248 return new Pair(indent, breaks.toString());

1249 }

1250

1251 // Scans a quoted scalar.

1252 Token _scanFlowScalar({bool singleQuote: false}) {

1253 var start = _scanner.state;

1254 var buffer = new StringBuffer();

1255

1256 // Eat the left quote.

1257 _scanner.readChar();

1258

1259 while (true) {

1260 // Check that there are no document indicators at the beginning of the

1261 // line.

1262 if (_isDocumentIndicator) {

1263 _scanner.error("Unexpected document indicator.");

1264 }

1265

1266 if (_scanner.isDone) {

1267 throw new YamlException("Unexpected end of file.", _scanner.emptySpan);

1268 }

1269

1270 var leadingBlanks = false;

1271 while (!_isBlankOrEnd) {

1272 var char = _scanner.peekChar();

1273 if (singleQuote && char == SINGLE_QUOTE &&

1274 _scanner.peekChar(1) == SINGLE_QUOTE) {

1275 // An escaped single quote.

1276 _scanner.readChar();

1277 _scanner.readChar();

1278 buffer.writeCharCode(SINGLE_QUOTE);

1279 } else if (char == (singleQuote ? SINGLE_QUOTE : DOUBLE_QUOTE)) {

1280 // The closing quote.

1281 break;

1282 } else if (!singleQuote && char == BACKSLASH && _isBreakAt(1)) {

1283 // An escaped newline.

1284 _scanner.readChar();

1285 _skipLine();

1286 leadingBlanks = true;

1287 break;

1288 } else if (!singleQuote && char == BACKSLASH) {

1289 var escapeStart = _scanner.state;

1290

1291 // An escape sequence.

1292 var codeLength = null;

1293 switch (_scanner.peekChar(1)) {

1294 case NUMBER_0:

1295 buffer.writeCharCode(NULL);

1296 break;

1297 case LETTER_A:

1298 buffer.writeCharCode(BELL);

1299 break;

1300 case LETTER_B:

1301 buffer.writeCharCode(BACKSPACE);

1302 break;

1303 case LETTER_T:

1304 case TAB:

1305 buffer.writeCharCode(TAB);

1306 break;

1307 case LETTER_N:

1308 buffer.writeCharCode(LF);

1309 break;

1310 case LETTER_V:

1311 buffer.writeCharCode(VERTICAL_TAB);

1312 break;

1313 case LETTER_F:

1314 buffer.writeCharCode(FORM_FEED);

1315 break;

1316 case LETTER_R:

1317 buffer.writeCharCode(CR);

1318 break;

1319 case LETTER_E:

1320 buffer.writeCharCode(ESCAPE);

1321 break;

1322 case SP:

1323 case DOUBLE_QUOTE:

1324 case SLASH:

1325 case BACKSLASH:

1326 // libyaml doesn't support an escaped forward slash, but it was

1327 // added in YAML 1.2. See section 5.7:

1328 // http://yaml.org/spec/1.2/spec.html#id2776092

1329 buffer.writeCharCode(_scanner.peekChar(1));

1330 break;

1331 case LETTER_CAP_N:

1332 buffer.writeCharCode(NEL);

1333 break;

1334 case UNDERSCORE:

1335 buffer.writeCharCode(NBSP);

1336 break;

1337 case LETTER_CAP_L:

1338 buffer.writeCharCode(LINE_SEPARATOR);

1339 break;

1340 case LETTER_CAP_P:

1341 buffer.writeCharCode(PARAGRAPH_SEPARATOR);

1342 break;

1343 case LETTER_X:

1344 codeLength = 2;

1345 break;

1346 case LETTER_U:

1347 codeLength = 4;

1348 break;

1349 case LETTER_CAP_U:

1350 codeLength = 8;

1351 break;

1352 default:

1353 throw new YamlException("Unknown escape character.",

1354 _scanner.spanFrom(escapeStart));

1355 }

1356

1357 _scanner.readChar();

1358 _scanner.readChar();

1359

1360 if (codeLength != null) {

1361 var value = 0;

1362 for (var i = 0; i < codeLength; i++) {

1363 if (!_isHex) {

1364 _scanner.readChar();

1365 throw new YamlException(

1366 "Expected $codeLength-digit hexidecimal number.",

1367 _scanner.spanFrom(escapeStart));

1368 }

1369

1370 value = (value << 4) + _asHex(_scanner.readChar());

1371 }

1372

1373 // Check the value and write the character.

1374 if ((value >= 0xD800 && value <= 0xDFFF) \|\| value > 0x10FFFF) {

1375 throw new YamlException(

1376 "Invalid Unicode character escape code.",

1377 _scanner.spanFrom(escapeStart));

1378 }

1379

1380 buffer.writeCharCode(value);

1381 }

1382 } else {

1383 buffer.writeCharCode(_scanner.readChar());

1384 }

1385 }

1386

1387 // Check if we're at the end of a scalar.

1388 if (_scanner.peekChar() == (singleQuote ? SINGLE_QUOTE : DOUBLE_QUOTE)) {

1389 break;

1390 }

1391

1392 var whitespace = new StringBuffer();

1393 var leadingBreak = '';

1394 var trailingBreaks = new StringBuffer();

1395 while (_isBlank \|\| _isBreak) {

1396 if (_isBlank) {

1397 // Consume a space or a tab.

1398 if (!leadingBlanks) {

1399 whitespace.writeCharCode(_scanner.readChar());

1400 } else {

1401 _scanner.readChar();

1402 }

1403 } else {

1404 // Check if it's a first line break.

1405 if (!leadingBlanks) {

1406 whitespace.clear();

1407 leadingBreak = _readLine();

1408 leadingBlanks = true;

1409 } else {

1410 trailingBreaks.write(_readLine());

1411 }

1412 }

1413 }

1414

1415 // Join the whitespace or fold line breaks.

1416 if (leadingBlanks) {

1417 if (leadingBreak.isNotEmpty && trailingBreaks.isEmpty) {

1418 buffer.writeCharCode(SP);

1419 } else {

1420 buffer.write(trailingBreaks);

1421 }

1422 } else {

1423 buffer.write(whitespace);

1424 whitespace.clear();

1425 }

1426 }

1427

1428 // Eat the right quote.

1429 _scanner.readChar();

1430

1431 return new ScalarToken(_scanner.spanFrom(start), buffer.toString(),

1432 singleQuote ? ScalarStyle.SINGLE_QUOTED : ScalarStyle.DOUBLE_QUOTED);

1433 }

1434

1435 /// Scans a plain scalar.

1436 Token _scanPlainScalar() {

1437 var start = _scanner.state;

1438 var end = _scanner.state;

1439 var buffer = new StringBuffer();

1440 var leadingBreak = '';

1441 var trailingBreaks = '';

1442 var whitespace = new StringBuffer();

1443 var indent = _indent + 1;

1444

1445 while (true) {

1446 // Check for a document indicator.

1447 if (_isDocumentIndicator) break;

1448

1449 // Check for a comment.

1450 if (_scanner.peekChar() == HASH) break;

1451

1452 if (_isPlainChar) {

1453 // Join the whitespace or fold line breaks.

1454 if (leadingBreak.isNotEmpty) {

1455 if (trailingBreaks.isEmpty) {

1456 buffer.writeCharCode(SP);

1457 } else {

1458 buffer.write(trailingBreaks);

1459 }

1460 leadingBreak = '';

1461 trailingBreaks = '';

1462 } else {

1463 buffer.write(whitespace);

1464 whitespace.clear();

1465 }

1466 }

1467

1468 // libyaml's notion of valid identifiers differs substantially from YAML

1469 // 1.2's. We use [_isPlainChar] instead of libyaml's character here.

1470 var startPosition = _scanner.position;

1471 while (_isPlainChar) {

1472 _scanner.readChar();

1473 }

1474 buffer.write(_scanner.substring(startPosition));

1475 end = _scanner.state;

1476

1477 // Is it the end?

1478 if (!_isBlank && !_isBreak) break;

1479

1480 while (_isBlank \|\| _isBreak) {

1481 if (_isBlank) {

1482 // Check for a tab character messing up the intendation.

1483 if (leadingBreak.isNotEmpty && _scanner.column < indent &&

1484 _scanner.peekChar() == TAB) {

1485 _scanner.error("Expected a space but found a tab.", length: 1);

1486 }

1487

1488 if (leadingBreak.isEmpty) {

1489 whitespace.writeCharCode(_scanner.readChar());

1490 } else {

1491 _scanner.readChar();

1492 }

1493 } else {

1494 // Check if it's a first line break.

1495 if (leadingBreak.isEmpty) {

1496 leadingBreak = _readLine();

1497 whitespace.clear();

1498 } else {

1499 trailingBreaks = _readLine();

1500 }

1501 }

1502 }

1503

1504 // Check the indentation level.

1505 if (_inBlockContext && _scanner.column < indent) break;

1506 }

1507

1508 // Allow a simple key after a plain scalar with leading blanks.

1509 if (leadingBreak.isNotEmpty) _simpleKeyAllowed = true;

1510

1511 return new ScalarToken(_scanner.spanFrom(start, end), buffer.toString(),

1512 ScalarStyle.PLAIN);

1513 }

1514

1515 /// Moves past the current line break, if there is one.

1516 void _skipLine() {

1517 var char = _scanner.peekChar();

1518 if (char != CR && char != LF) return;

1519 _scanner.readChar();

1520 if (char == CR && _scanner.peekChar() == LF) _scanner.readChar();

1521 }

1522

1523 // Moves past the current line break and returns a newline.

1524 String _readLine() {

1525 var char = _scanner.peekChar();

1526

1527 // libyaml supports NEL, PS, and LS characters as line separators, but this

1528 // is explicitly forbidden in section 5.4 of the YAML spec.

1529 if (char != CR && char != LF) {

1530 throw new YamlException("Expected newline.", _scanner.emptySpan);

1531 }

1532

1533 _scanner.readChar();

1534 // CR LF \| CR \| LF -> LF

1535 if (char == CR && _scanner.peekChar() == LF) _scanner.readChar();

1536 return "\n";

1537 }

1538

1539 // Returns whether the character at [offset] is whitespace.

1540 bool _isBlankAt(int offset) {

1541 var char = _scanner.peekChar(offset);

1542 return char == SP \|\| char == TAB;

1543 }

1544

1545 // Returns whether the character at [offset] is a line break.

1546 bool _isBreakAt(int offset) {

1547 // Libyaml considers NEL, LS, and PS to be line breaks as well, but that's

1548 // contrary to the spec.

1549 var char = _scanner.peekChar(offset);

1550 return char == CR \|\| char == LF;

1551 }

1552

1553 // Returns whether the character at [offset] is whitespace or past the end of

1554 // the source.

1555 bool _isBlankOrEndAt(int offset) {

1556 var char = _scanner.peekChar(offset);

1557 return char == null \|\| char == SP \|\| char == TAB \|\| char == CR \|\|

1558 char == LF;

1559 }

1560

1561 /// Returns whether the character at [offset] is a plain character.

1562 ///

1563 /// See http://yaml.org/spec/1.2/spec.html#ns-plain-char(c).

1564 bool _isPlainCharAt(int offset) {

1565 switch (_scanner.peekChar(offset)) {

1566 case COLON:

1567 return _isPlainSafeAt(offset + 1);

1568 case HASH:

1569 var previous = _scanner.peekChar(offset - 1);

1570 return previous != SP && previous != TAB;

1571 default:

1572 return _isPlainSafeAt(offset);

1573 }

1574 }

1575

1576 /// Returns whether the character at [offset] is a plain-safe character.

1577 ///

1578 /// See http://yaml.org/spec/1.2/spec.html#ns-plain-safe(c).

1579 bool _isPlainSafeAt(int offset) {

1580 var char = _scanner.peekChar(offset);

1581 switch (char) {

1582 case COMMA:

1583 case LEFT_SQUARE:

1584 case RIGHT_SQUARE:

1585 case LEFT_CURLY:

1586 case RIGHT_CURLY:

1587 // These characters are delimiters in a flow context and thus are only

1588 // safe in a block context.

1589 return _inBlockContext;

1590 case SP:

1591 case TAB:

1592 case LF:

1593 case CR:

1594 case BOM:

1595 return false;

1596 case NEL:

1597 return true;

1598 default:

1599 return char != null &&

1600 ((char >= 0x00020 && char <= 0x00007E) \|\|

1601 (char >= 0x000A0 && char <= 0x00D7FF) \|\|

1602 (char >= 0x0E000 && char <= 0x00FFFD) \|\|

1603 (char >= 0x10000 && char <= 0x10FFFF));

1604 }

1605 }

1606

1607 /// Returns the hexidecimal value of [char].

1608 int _asHex(int char) {

1609 if (char <= NUMBER_9) return char - NUMBER_0;

1610 if (char <= LETTER_CAP_F) return 10 + char - LETTER_CAP_A;

1611 return 10 + char - LETTER_A;

1612 }

1613

1614 /// Moves the scanner past any blank characters.

1615 void _skipBlanks() {

1616 while (_isBlank) {

1617 _scanner.readChar();

1618 }

1619 }

1620

1621 /// Moves the scanner past a comment, if one starts at the current position.

1622 void _skipComment() {

1623 if (_scanner.peekChar() != HASH) return;

1624 while (!_isBreakOrEnd) {

1625 _scanner.readChar();

1626 }

1627 }

1628 }

1629

1630 /// A record of the location of a potential simple key.

1631 class _SimpleKey {

1632 /// The index of the token that begins the simple key.

1633 ///

1634 /// This is the index relative to all tokens emitted, rather than relative to

1635 /// [_tokens].

1636 final int tokenNumber;

1637

1638 /// The source location of the beginning of the simple key.

1639 ///

1640 /// This is used for error reporting and for determining when a simple key is

1641 /// no longer on the current line.

1642 final SourceLocation location;

1643

1644 /// The line on which the key appears.

1645 ///

1646 /// We could get this from [location], but that requires a binary search

1647 /// whereas this is O(1).

1648 final int line;

1649

1650 /// The column on which the key appears.

1651 ///

1652 /// We could get this from [location], but that requires a binary search

1653 /// whereas this is O(1).

1654 final int column;

1655

1656 /// Whether this key must exist for the document to be scanned.

1657 final bool required;

1658

1659 _SimpleKey(this.tokenNumber, this.line, this.column, this.location,

1660 {bool required})

1661 : required = required;

1662 }

1663

1664 /// An enum of chomping indicators that describe how to handle trailing

1665 /// whitespace for a block scalar.

1666 ///

1667 /// See http://yaml.org/spec/1.2/spec.html#id2794534.

1668 class _Chomping {

1669 /// All trailing whitespace is discarded.

1670 static const STRIP = const _Chomping("STRIP");

1671

1672 /// A single trailing newline is retained.

1673 static const CLIP = const _Chomping("CLIP");

1674

1675 /// All trailing whitespace is preserved.

1676 static const KEEP = const _Chomping("KEEP");

1677

1678 final String name;

1679

1680 const _Chomping(this.name);

1681

1682 String toString() => name;

1683 }

OLD	NEW

« no previous file with comments | « yaml/lib/src/parser.dart ('k') | yaml/lib/src/style.dart » ('j') | no next file with comments »