observatory_pub_packages/csslib/src/tokenizer_base.dart - Issue 816693004: Add observatory_pub_packages snapshot to third_party

Side by Side Diff: observatory_pub_packages/csslib/src/tokenizer_base.dart

Issue 816693004: Add observatory_pub_packages snapshot to third_party (Closed) Base URL: http://dart.googlecode.com/svn/third_party/

Patch Set: Created 6 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

	2 // for details. All rights reserved. Use of this source code is governed by a

	3 // BSD-style license that can be found in the LICENSE file.

	4 // Generated by scripts/tokenizer_gen.py.

	5

	6 part of csslib.parser;

	7

	8 /** Tokenizer state to support look ahead for Less' nested selectors. */

	9 class TokenizerState {

	10 final int index;

	11 final int startIndex;

	12 final bool inSelectorExpression;

	13 final bool inSelector;

	14

	15 TokenizerState(TokenizerBase base) :

	16 index = base._index,

	17 startIndex = base._startIndex,

	18 inSelectorExpression = base.inSelectorExpression,

	19 inSelector = base.inSelector;

	20 }

	21

	22 /**

	23 * The base class for our tokenizer. The hand coded parts are in this file, with

	24 * the generated parts in the subclass Tokenizer.

	25 */

	26 abstract class TokenizerBase {

	27 final SourceFile _file;

	28 final String _text;

	29

	30 bool _skipWhitespace;

	31

	32 /**

	33 * Changes tokenization when in a pseudo function expression. If true then

	34 * minus signs are handled as operators instead of identifiers.

	35 */

	36 bool inSelectorExpression = false;

	37

	38 /**

	39 * Changes tokenization when in selectors. If true, it prevents identifiers

	40 * from being treated as units. This would break things like ":lang(fr)" or

	41 * the HTML (unknown) tag name "px", which is legal to use in a selector.

	42 */

	43 // TODO(jmesserly): is this a problem elsewhere? "fr" for example will be

	44 // processed as a "fraction" unit token, preventing it from working in

	45 // places where an identifier is expected. This was breaking selectors like:

	46 // :lang(fr)

	47 // The assumption that "fr" always means fraction (and similar issue with

	48 // other units) doesn't seem valid. We probably should defer this

	49 // analysis until we reach places in the parser where units are expected.

	50 // I'm not sure this is tokenizing as described in the specs:

	51 // http://dev.w3.org/csswg/css-syntax/

	52 // http://dev.w3.org/csswg/selectors4/

	53 bool inSelector = false;

	54

	55 int _index;

	56 int _startIndex;

	57

	58 static const String _CDATA_START = '<![CDATA[';

	59 static const String _CDATA_END = ']]>';

	60

	61 TokenizerBase(this._file, this._text, this._skipWhitespace,

	62 [this._index = 0]);

	63

	64 Token next();

	65 int getIdentifierKind();

	66

	67 /** Snapshot of Tokenizer scanning state. */

	68 TokenizerState get mark => new TokenizerState(this);

	69

	70 /** Restore Tokenizer scanning state. */

	71 void restore(TokenizerState markedData) {

	72 _index = markedData.index;

	73 _startIndex = markedData.startIndex;

	74 inSelectorExpression = markedData.inSelectorExpression;

	75 inSelector = markedData.inSelector;

	76 }

	77

	78 int _nextChar() {

	79 if (_index < _text.length) {

	80 return _text.codeUnitAt(_index++);

	81 } else {

	82 return 0;

	83 }

	84 }

	85

	86 int _peekChar() {

	87 if (_index < _text.length) {

	88 return _text.codeUnitAt(_index);

	89 } else {

	90 return 0;

	91 }

	92 }

	93

	94 bool _maybeEatChar(int ch) {

	95 if (_index < _text.length) {

	96 if (_text.codeUnitAt(_index) == ch) {

	97 _index++;

	98 return true;

	99 } else {

	100 return false;

	101 }

	102 } else {

	103 return false;

	104 }

	105 }

	106

	107 String _tokenText() {

	108 if (_index < _text.length) {

	109 return _text.substring(_startIndex, _index);

	110 } else {

	111 return _text.substring(_startIndex, _text.length);

	112 }

	113 }

	114

	115 Token _finishToken(int kind) {

	116 return new Token(kind, _file.span(_startIndex, _index));

	117 }

	118

	119 Token _errorToken([String message = null]) {

	120 return new ErrorToken(

	121 TokenKind.ERROR, _file.span(_startIndex, _index), message);

	122 }

	123

	124 Token finishWhitespace() {

	125 _index--;

	126 while (_index < _text.length) {

	127 final ch = _text.codeUnitAt(_index++);

	128 if (ch == TokenChar.SPACE \|\|

	129 ch == TokenChar.TAB \|\|

	130 ch == TokenChar.RETURN) {

	131 // do nothing

	132 } else if (ch == TokenChar.NEWLINE) {

	133 if (!_skipWhitespace) {

	134 return _finishToken(TokenKind.WHITESPACE); // note the newline?

	135 }

	136 } else {

	137 _index--;

	138 if (_skipWhitespace) {

	139 return next();

	140 } else {

	141 return _finishToken(TokenKind.WHITESPACE);

	142 }

	143 }

	144

	145 }

	146 return _finishToken(TokenKind.END_OF_FILE);

	147 }

	148

	149 Token finishMultiLineComment() {

	150 int nesting = 1;

	151 do {

	152 int ch = _nextChar();

	153 if (ch == 0) {

	154 return _errorToken();

	155 } else if (ch == TokenChar.ASTERISK) {

	156 if (_maybeEatChar(TokenChar.SLASH)) {

	157 nesting--;

	158 }

	159 } else if (ch == TokenChar.SLASH) {

	160 if (_maybeEatChar(TokenChar.ASTERISK)) {

	161 nesting++;

	162 }

	163 }

	164 } while (nesting > 0);

	165

	166 if (_skipWhitespace) {

	167 return next();

	168 } else {

	169 return _finishToken(TokenKind.COMMENT);

	170 }

	171 }

	172

	173 void eatDigits() {

	174 while (_index < _text.length) {

	175 if (TokenizerHelpers.isDigit(_text.codeUnitAt(_index))) {

	176 _index++;

	177 } else {

	178 return;

	179 }

	180 }

	181 }

	182

	183 static int _hexDigit(int c) {

	184 if(c >= 48/0/ && c <= 57/9/) {

	185 return c - 48;

	186 } else if (c >= 97/a/ && c <= 102/f/) {

	187 return c - 87;

	188 } else if (c >= 65/A/ && c <= 70/F/) {

	189 return c - 55;

	190 } else {

	191 return -1;

	192 }

	193 }

	194

	195 int readHex([int hexLength]) {

	196 int maxIndex;

	197 if (hexLength == null) {

	198 maxIndex = _text.length - 1;

	199 } else {

	200 // TODO(jimhug): What if this is too long?

	201 maxIndex = _index + hexLength;

	202 if (maxIndex >= _text.length) return -1;

	203 }

	204 var result = 0;

	205 while (_index < maxIndex) {

	206 final digit = _hexDigit(_text.codeUnitAt(_index));

	207 if (digit == -1) {

	208 if (hexLength == null) {

	209 return result;

	210 } else {

	211 return -1;

	212 }

	213 }

	214 _hexDigit(_text.codeUnitAt(_index));

	215 // Multiply by 16 rather than shift by 4 since that will result in a

	216 // correct value for numbers that exceed the 32 bit precision of JS

	217 // 'integers'.

	218 // TODO: Figure out a better solution to integer truncation. Issue 638.

	219 result = (result * 16) + digit;

	220 _index++;

	221 }

	222

	223 return result;

	224 }

	225

	226 Token finishNumber() {

	227 eatDigits();

	228

	229 if (_peekChar() == TokenChar.DOT) {

	230 // Handle the case of 1.toString().

	231 _nextChar();

	232 if (TokenizerHelpers.isDigit(_peekChar())) {

	233 eatDigits();

	234 return finishNumberExtra(TokenKind.DOUBLE);

	235 } else {

	236 _index--;

	237 }

	238 }

	239

	240 return finishNumberExtra(TokenKind.INTEGER);

	241 }

	242

	243 Token finishNumberExtra(int kind) {

	244 if (_maybeEatChar(101/e/) \|\| _maybeEatChar(69/E/)) {

	245 kind = TokenKind.DOUBLE;

	246 _maybeEatChar(TokenKind.MINUS);

	247 _maybeEatChar(TokenKind.PLUS);

	248 eatDigits();

	249 }

	250 if (_peekChar() != 0 && TokenizerHelpers.isIdentifierStart(_peekChar())) {

	251 _nextChar();

	252 return _errorToken("illegal character in number");

	253 }

	254

	255 return _finishToken(kind);

	256 }

	257

	258 Token _makeStringToken(List<int> buf, bool isPart) {

	259 final s = new String.fromCharCodes(buf);

	260 final kind = isPart ? TokenKind.STRING_PART : TokenKind.STRING;

	261 return new LiteralToken(kind, _file.span(_startIndex, _index), s);

	262 }

	263

	264 Token makeIEFilter(int start, int end) {

	265 var filter = _text.substring(start, end);

	266 return new LiteralToken(TokenKind.STRING, _file.span(start, end), filter);

	267 }

	268

	269 Token _makeRawStringToken(bool isMultiline) {

	270 var s;

	271 if (isMultiline) {

	272 // Skip initial newline in multiline strings

	273 int start = _startIndex + 4;

	274 if (_text[start] == '\n') start++;

	275 s = _text.substring(start, _index - 3);

	276 } else {

	277 s = _text.substring(_startIndex + 2, _index - 1);

	278 }

	279 return new LiteralToken(TokenKind.STRING,

	280 _file.span(_startIndex, _index), s);

	281 }

	282

	283 Token finishMultilineString(int quote) {

	284 var buf = <int>[];

	285 while (true) {

	286 int ch = _nextChar();

	287 if (ch == 0) {

	288 return _errorToken();

	289 } else if (ch == quote) {

	290 if (_maybeEatChar(quote)) {

	291 if (_maybeEatChar(quote)) {

	292 return _makeStringToken(buf, false);

	293 }

	294 buf.add(quote);

	295 }

	296 buf.add(quote);

	297 } else if (ch == TokenChar.BACKSLASH) {

	298 var escapeVal = readEscapeSequence();

	299 if (escapeVal == -1) {

	300 return _errorToken("invalid hex escape sequence");

	301 } else {

	302 buf.add(escapeVal);

	303 }

	304 } else {

	305 buf.add(ch);

	306 }

	307 }

	308 }

	309

	310 Token _finishOpenBrace() {

	311 return _finishToken(TokenKind.LBRACE);

	312 }

	313

	314 Token _finishCloseBrace() {

	315 return _finishToken(TokenKind.RBRACE);

	316 }

	317

	318 Token finishString(int quote) {

	319 if (_maybeEatChar(quote)) {

	320 if (_maybeEatChar(quote)) {

	321 // skip an initial newline

	322 _maybeEatChar(TokenChar.NEWLINE);

	323 return finishMultilineString(quote);

	324 } else {

	325 return _makeStringToken(new List<int>(), false);

	326 }

	327 }

	328 return finishStringBody(quote);

	329 }

	330

	331 Token finishRawString(int quote) {

	332 if (_maybeEatChar(quote)) {

	333 if (_maybeEatChar(quote)) {

	334 return finishMultilineRawString(quote);

	335 } else {

	336 return _makeStringToken(<int>[], false);

	337 }

	338 }

	339 while (true) {

	340 int ch = _nextChar();

	341 if (ch == quote) {

	342 return _makeRawStringToken(false);

	343 } else if (ch == 0) {

	344 return _errorToken();

	345 }

	346 }

	347 }

	348

	349 Token finishMultilineRawString(int quote) {

	350 while (true) {

	351 int ch = _nextChar();

	352 if (ch == 0) {

	353 return _errorToken();

	354 } else if (ch == quote && _maybeEatChar(quote) && _maybeEatChar(quote)) {

	355 return _makeRawStringToken(true);

	356 }

	357 }

	358 }

	359

	360 Token finishStringBody(int quote) {

	361 var buf = new List<int>();

	362 while (true) {

	363 int ch = _nextChar();

	364 if (ch == quote) {

	365 return _makeStringToken(buf, false);

	366 } else if (ch == 0) {

	367 return _errorToken();

	368 } else if (ch == TokenChar.BACKSLASH) {

	369 var escapeVal = readEscapeSequence();

	370 if (escapeVal == -1) {

	371 return _errorToken("invalid hex escape sequence");

	372 } else {

	373 buf.add(escapeVal);

	374 }

	375 } else {

	376 buf.add(ch);

	377 }

	378 }

	379 }

	380

	381 int readEscapeSequence() {

	382 final ch = _nextChar();

	383 int hexValue;

	384 switch (ch) {

	385 case 110/n/:

	386 return TokenChar.NEWLINE;

	387 case 114/r/:

	388 return TokenChar.RETURN;

	389 case 102/f/:

	390 return TokenChar.FF;

	391 case 98/b/:

	392 return TokenChar.BACKSPACE;

	393 case 116/t/:

	394 return TokenChar.TAB;

	395 case 118/v/:

	396 return TokenChar.FF;

	397 case 120/x/:

	398 hexValue = readHex(2);

	399 break;

	400 case 117/u/:

	401 if (_maybeEatChar(TokenChar.LBRACE)) {

	402 hexValue = readHex();

	403 if (!_maybeEatChar(TokenChar.RBRACE)) {

	404 return -1;

	405 }

	406 } else {

	407 hexValue = readHex(4);

	408 }

	409 break;

	410 default: return ch;

	411 }

	412

	413 if (hexValue == -1) return -1;

	414

	415 // According to the Unicode standard the high and low surrogate halves

	416 // used by UTF-16 (U+D800 through U+DFFF) and values above U+10FFFF

	417 // are not legal Unicode values.

	418 if (hexValue < 0xD800 \|\| hexValue > 0xDFFF && hexValue <= 0xFFFF) {

	419 return hexValue;

	420 } else if (hexValue <= 0x10FFFF){

	421 messages.error('unicode values greater than 2 bytes not implemented yet',

	422 _file.span(_startIndex, _startIndex + 1));

	423 return -1;

	424 } else {

	425 return -1;

	426 }

	427 }

	428

	429 Token finishDot() {

	430 if (TokenizerHelpers.isDigit(_peekChar())) {

	431 eatDigits();

	432 return finishNumberExtra(TokenKind.DOUBLE);

	433 } else {

	434 return _finishToken(TokenKind.DOT);

	435 }

	436 }

	437 }

	438

OLD	NEW

« no previous file with comments | « observatory_pub_packages/csslib/src/tokenizer.dart ('k') | observatory_pub_packages/csslib/src/tokenkind.dart » ('j') | no next file with comments »