pkg/csslib/lib/src/tokenizer_base.dart - Issue 268623002: [html5lib] implement querySelector/querySelectorAll

Side by Side Diff: pkg/csslib/lib/src/tokenizer_base.dart

Issue 268623002: [html5lib] implement querySelector/querySelectorAll (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Created 6 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4 // Generated by scripts/tokenizer_gen.py.	4 // Generated by scripts/tokenizer_gen.py.

5	5

6 part of csslib.parser;	6 part of csslib.parser;

7	7

8 /** Tokenizer state to support look ahead for Less' nested selectors. */	8 /** Tokenizer state to support look ahead for Less' nested selectors. */

9 class TokenizerState {	9 class TokenizerState {

10 final int index;	10 final int index;

11 final int startIndex;	11 final int startIndex;

12 final bool selectorExpression;	12 final bool inSelectorExpression;

	13 final bool inSelector;

13	14

14 TokenizerState(TokenizerBase base) :	15 TokenizerState(TokenizerBase base) :

15 this.index = base._index,	16 index = base._index,

16 this.startIndex = base._startIndex,	17 startIndex = base._startIndex,

17 this.selectorExpression = base.selectorExpression;	18 inSelectorExpression = base.inSelectorExpression,

	19 inSelector = base.inSelector;

18 }	20 }

19	21

20 /**	22 /**

21 * The base class for our tokenizer. The hand coded parts are in this file, with	23 * The base class for our tokenizer. The hand coded parts are in this file, with

22 * the generated parts in the subclass Tokenizer.	24 * the generated parts in the subclass Tokenizer.

23 */	25 */

24 abstract class TokenizerBase {	26 abstract class TokenizerBase {

25 final SourceFile _file;	27 final SourceFile _file;

26 final bool _skipWhitespace;

27 final String _text;	28 final String _text;

28	29

	30 bool _skipWhitespace;

	31

29 /**	32 /**

30 * Changes tokenization when in a pseudo function expression. If true then	33 * Changes tokenization when in a pseudo function expression. If true then

31 * minus signs are handled as operators instead of identifiers.	34 * minus signs are handled as operators instead of identifiers.

32 */	35 */

33 bool selectorExpression = false;	36 bool inSelectorExpression = false;

	37

	38 /**

	39 * Changes tokenization when in selectors. If true, it prevents identifiers

	40 * from being treated as units. This would break things like ":lang(fr)" or

	41 * the HTML (unknown) tag name "px", which is legal to use in a selector.

	42 */

	43 // TODO(jmesserly): is this a problem elsewhere? "fr" for example will be

	44 // processed as a "fraction" unit token, preventing it from working in

	45 // places where an identifier is expected. This was breaking selectors like:

	46 // :lang(fr)

	47 // The assumption that "fr" always means fraction (and similar issue with

	48 // other units) doesn't seem valid. We probably should defer this

	49 // analysis until we reach places in the parser where units are expected.

	50 // I'm not sure this is tokenizing as described in the specs:

	51 // http://dev.w3.org/csswg/css-syntax/

	52 // http://dev.w3.org/csswg/selectors4/

	53 bool inSelector = false;

34	54

35 int _index;	55 int _index;

36 int _startIndex;	56 int _startIndex;

37	57

38 static const String _CDATA_START = '<![CDATA[';	58 static const String _CDATA_START = '<![CDATA[';

39 static const String _CDATA_END = ']]>';	59 static const String _CDATA_END = ']]>';

40	60

41 TokenizerBase(this._file, this._text, this._skipWhitespace,	61 TokenizerBase(this._file, this._text, this._skipWhitespace,

42 [this._index = 0]);	62 [this._index = 0]);

43	63

44 Token next();	64 Token next();

45 int getIdentifierKind();	65 int getIdentifierKind();

46	66

47 /** Snapshot of Tokenizer scanning state. */	67 /** Snapshot of Tokenizer scanning state. */

48 TokenizerState get mark => new TokenizerState(this);	68 TokenizerState get mark => new TokenizerState(this);

49	69

50 /** Restore Tokenizer scanning state. */	70 /** Restore Tokenizer scanning state. */

51 void restore(TokenizerState markedData) {	71 void restore(TokenizerState markedData) {

52 _index = markedData.index;	72 _index = markedData.index;

53 _startIndex = markedData.startIndex;	73 _startIndex = markedData.startIndex;

54 selectorExpression = markedData.selectorExpression;	74 inSelectorExpression = markedData.inSelectorExpression;

	75 inSelector = markedData.inSelector;

55 }	76 }

56	77

57 int _nextChar() {	78 int _nextChar() {

58 if (_index < _text.length) {	79 if (_index < _text.length) {

59 return _text.codeUnitAt(_index++);	80 return _text.codeUnitAt(_index++);

60 } else {	81 } else {

61 return 0;	82 return 0;

62 }	83 }

63 }	84 }

64	85

(...skipping 53 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
118 return next();	139 return next();

119 } else {	140 } else {

120 return _finishToken(TokenKind.WHITESPACE);	141 return _finishToken(TokenKind.WHITESPACE);

121 }	142 }

122 }	143 }

123	144

124 }	145 }

125 return _finishToken(TokenKind.END_OF_FILE);	146 return _finishToken(TokenKind.END_OF_FILE);

126 }	147 }

127	148

128 Token finishSingleLineComment() {

129 while (true) {

130 int ch = _nextChar();

131 if (ch == 0 \|\| ch == TokenChar.NEWLINE \|\| ch == TokenChar.RETURN) {

132 if (_skipWhitespace) {

133 return next();

134 } else {

135 return _finishToken(TokenKind.COMMENT);

136 }

137 }

138 }

139 }

140

141 Token finishMultiLineComment() {	149 Token finishMultiLineComment() {

142 int nesting = 1;	150 int nesting = 1;

143 do {	151 do {

144 int ch = _nextChar();	152 int ch = _nextChar();

145 if (ch == 0) {	153 if (ch == 0) {

146 return _errorToken();	154 return _errorToken();

147 } else if (ch == TokenChar.ASTERISK) {	155 } else if (ch == TokenChar.ASTERISK) {

148 if (_maybeEatChar(TokenChar.SLASH)) {	156 if (_maybeEatChar(TokenChar.SLASH)) {

149 nesting--;	157 nesting--;

150 }	158 }

(...skipping 268 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
419 }	427 }

420	428

421 Token finishDot() {	429 Token finishDot() {

422 if (TokenizerHelpers.isDigit(_peekChar())) {	430 if (TokenizerHelpers.isDigit(_peekChar())) {

423 eatDigits();	431 eatDigits();

424 return finishNumberExtra(TokenKind.DOUBLE);	432 return finishNumberExtra(TokenKind.DOUBLE);

425 } else {	433 } else {

426 return _finishToken(TokenKind.DOT);	434 return _finishToken(TokenKind.DOT);

427 }	435 }

428 }	436 }

429

430 Token finishIdentifier(int ch) {

431 while (_index < _text.length) {

432 if (!TokenizerHelpers.isIdentifierPart(_text.codeUnitAt(_index++))) {

433 _index--;

434 break;

435 }

436 }

437 int kind = getIdentifierKind();

438 if (kind == TokenKind.IDENTIFIER) {

439 return _finishToken(TokenKind.IDENTIFIER);

440 } else {

441 return _finishToken(kind);

442 }

443 }

444 }	437 }

445	438

OLD	NEW

« no previous file with comments | « pkg/csslib/lib/src/tokenizer.dart ('k') | pkg/csslib/lib/src/tokenkind.dart » ('j') | no next file with comments »