mojo/public/dart/third_party/csslib/lib/src/tokenizer.dart - Issue 1346773002: Stop running pub get at gclient sync time and fix build bugs

Side by Side Diff: mojo/public/dart/third_party/csslib/lib/src/tokenizer.dart

Issue 1346773002: Stop running pub get at gclient sync time and fix build bugs (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

	2 // for details. All rights reserved. Use of this source code is governed by a

	3 // BSD-style license that can be found in the LICENSE file.

	4

	5 part of csslib.parser;

	6

	7 class Tokenizer extends TokenizerBase {

	8 /** U+ prefix for unicode characters. */

	9 final UNICODE_U = 'U'.codeUnitAt(0);

	10 final UNICODE_LOWER_U = 'u'.codeUnitAt(0);

	11 final UNICODE_PLUS = '+'.codeUnitAt(0);

	12

	13 final QUESTION_MARK = '?'.codeUnitAt(0);

	14

	15 /** CDATA keyword. */

	16 final List CDATA_NAME = 'CDATA'.codeUnits;

	17

	18 Tokenizer(SourceFile file, String text, bool skipWhitespace, [int index = 0])

	19 : super(file, text, skipWhitespace, index);

	20

	21 Token next({unicodeRange: false}) {

	22 // keep track of our starting position

	23 _startIndex = _index;

	24

	25 int ch;

	26 ch = _nextChar();

	27 switch (ch) {

	28 case TokenChar.NEWLINE:

	29 case TokenChar.RETURN:

	30 case TokenChar.SPACE:

	31 case TokenChar.TAB:

	32 return finishWhitespace();

	33 case TokenChar.END_OF_FILE:

	34 return _finishToken(TokenKind.END_OF_FILE);

	35 case TokenChar.AT:

	36 int peekCh = _peekChar();

	37 if (TokenizerHelpers.isIdentifierStart(peekCh)) {

	38 var oldIndex = _index;

	39 var oldStartIndex = _startIndex;

	40

	41 _startIndex = _index;

	42 ch = _nextChar();

	43 finishIdentifier();

	44

	45 // Is it a directive?

	46 int tokId = TokenKind.matchDirectives(

	47 _text, _startIndex, _index - _startIndex);

	48 if (tokId == -1) {

	49 // No, is it a margin directive?

	50 tokId = TokenKind.matchMarginDirectives(

	51 _text, _startIndex, _index - _startIndex);

	52 }

	53

	54 if (tokId != -1) {

	55 return _finishToken(tokId);

	56 } else {

	57 // Didn't find a CSS directive or margin directive so the @name is

	58 // probably the Less definition '@name: value_variable_definition'.

	59 _startIndex = oldStartIndex;

	60 _index = oldIndex;

	61 }

	62 }

	63 return _finishToken(TokenKind.AT);

	64 case TokenChar.DOT:

	65 int start = _startIndex; // Start where the dot started.

	66 if (maybeEatDigit()) {

	67 // looks like a number dot followed by digit(s).

	68 Token number = finishNumber();

	69 if (number.kind == TokenKind.INTEGER) {

	70 // It's a number but it's preceeded by a dot, so make it a double.

	71 _startIndex = start;

	72 return _finishToken(TokenKind.DOUBLE);

	73 } else {

	74 // Don't allow dot followed by a double (e.g, '..1').

	75 return _errorToken();

	76 }

	77 }

	78 // It's really a dot.

	79 return _finishToken(TokenKind.DOT);

	80 case TokenChar.LPAREN:

	81 return _finishToken(TokenKind.LPAREN);

	82 case TokenChar.RPAREN:

	83 return _finishToken(TokenKind.RPAREN);

	84 case TokenChar.LBRACE:

	85 return _finishToken(TokenKind.LBRACE);

	86 case TokenChar.RBRACE:

	87 return _finishToken(TokenKind.RBRACE);

	88 case TokenChar.LBRACK:

	89 return _finishToken(TokenKind.LBRACK);

	90 case TokenChar.RBRACK:

	91 if (_maybeEatChar(TokenChar.RBRACK) &&

	92 _maybeEatChar(TokenChar.GREATER)) {

	93 // ]]>

	94 return next();

	95 }

	96 return _finishToken(TokenKind.RBRACK);

	97 case TokenChar.HASH:

	98 return _finishToken(TokenKind.HASH);

	99 case TokenChar.PLUS:

	100 if (maybeEatDigit()) return finishNumber();

	101 return _finishToken(TokenKind.PLUS);

	102 case TokenChar.MINUS:

	103 if (inSelectorExpression \|\| unicodeRange) {

	104 // If parsing in pseudo function expression then minus is an operator

	105 // not part of identifier e.g., interval value range (e.g. U+400-4ff)

	106 // or minus operator in selector expression.

	107 return _finishToken(TokenKind.MINUS);

	108 } else if (maybeEatDigit()) {

	109 return finishNumber();

	110 } else if (TokenizerHelpers.isIdentifierStart(ch)) {

	111 return finishIdentifier();

	112 }

	113 return _finishToken(TokenKind.MINUS);

	114 case TokenChar.GREATER:

	115 return _finishToken(TokenKind.GREATER);

	116 case TokenChar.TILDE:

	117 if (_maybeEatChar(TokenChar.EQUALS)) {

	118 return _finishToken(TokenKind.INCLUDES); // ~=

	119 }

	120 return _finishToken(TokenKind.TILDE);

	121 case TokenChar.ASTERISK:

	122 if (_maybeEatChar(TokenChar.EQUALS)) {

	123 return _finishToken(TokenKind.SUBSTRING_MATCH); // *=

	124 }

	125 return _finishToken(TokenKind.ASTERISK);

	126 case TokenChar.AMPERSAND:

	127 return _finishToken(TokenKind.AMPERSAND);

	128 case TokenChar.NAMESPACE:

	129 if (_maybeEatChar(TokenChar.EQUALS)) {

	130 return _finishToken(TokenKind.DASH_MATCH); // \|=

	131 }

	132 return _finishToken(TokenKind.NAMESPACE);

	133 case TokenChar.COLON:

	134 return _finishToken(TokenKind.COLON);

	135 case TokenChar.COMMA:

	136 return _finishToken(TokenKind.COMMA);

	137 case TokenChar.SEMICOLON:

	138 return _finishToken(TokenKind.SEMICOLON);

	139 case TokenChar.PERCENT:

	140 return _finishToken(TokenKind.PERCENT);

	141 case TokenChar.SINGLE_QUOTE:

	142 return _finishToken(TokenKind.SINGLE_QUOTE);

	143 case TokenChar.DOUBLE_QUOTE:

	144 return _finishToken(TokenKind.DOUBLE_QUOTE);

	145 case TokenChar.SLASH:

	146 if (_maybeEatChar(TokenChar.ASTERISK)) return finishMultiLineComment();

	147 return _finishToken(TokenKind.SLASH);

	148 case TokenChar.LESS: // <!--

	149 if (_maybeEatChar(TokenChar.BANG)) {

	150 if (_maybeEatChar(TokenChar.MINUS) &&

	151 _maybeEatChar(TokenChar.MINUS)) {

	152 return finishMultiLineComment();

	153 } else if (_maybeEatChar(TokenChar.LBRACK) &&

	154 _maybeEatChar(CDATA_NAME[0]) &&

	155 _maybeEatChar(CDATA_NAME[1]) &&

	156 _maybeEatChar(CDATA_NAME[2]) &&

	157 _maybeEatChar(CDATA_NAME[3]) &&

	158 _maybeEatChar(CDATA_NAME[4]) &&

	159 _maybeEatChar(TokenChar.LBRACK)) {

	160 // <![CDATA[

	161 return next();

	162 }

	163 }

	164 return _finishToken(TokenKind.LESS);

	165 case TokenChar.EQUALS:

	166 return _finishToken(TokenKind.EQUALS);

	167 case TokenChar.CARET:

	168 if (_maybeEatChar(TokenChar.EQUALS)) {

	169 return _finishToken(TokenKind.PREFIX_MATCH); // ^=

	170 }

	171 return _finishToken(TokenKind.CARET);

	172 case TokenChar.DOLLAR:

	173 if (_maybeEatChar(TokenChar.EQUALS)) {

	174 return _finishToken(TokenKind.SUFFIX_MATCH); // $=

	175 }

	176 return _finishToken(TokenKind.DOLLAR);

	177 case TokenChar.BANG:

	178 Token tok = finishIdentifier();

	179 return (tok == null) ? _finishToken(TokenKind.BANG) : tok;

	180 default:

	181 // TODO(jmesserly): this is used for IE8 detection; I'm not sure it's

	182 // appropriate outside of a few specific places; certainly shouldn't

	183 // be parsed in selectors.

	184 if (!inSelector && ch == TokenChar.BACKSLASH) {

	185 return _finishToken(TokenKind.BACKSLASH);

	186 }

	187

	188 if (unicodeRange) {

	189 // Three types of unicode ranges:

	190 // - single code point (e.g. U+416)

	191 // - interval value range (e.g. U+400-4ff)

	192 // - range where trailing ‘?’ characters imply ‘any digit value’

	193 // (e.g. U+4??)

	194 if (maybeEatHexDigit()) {

	195 var t = finishHexNumber();

	196 // Any question marks then it's a HEX_RANGE not HEX_NUMBER.

	197 if (maybeEatQuestionMark()) finishUnicodeRange();

	198 return t;

	199 } else if (maybeEatQuestionMark()) {

	200 // HEX_RANGE U+N???

	201 return finishUnicodeRange();

	202 } else {

	203 return _errorToken();

	204 }

	205 } else if ((ch == UNICODE_U \|\| ch == UNICODE_LOWER_U) &&

	206 (_peekChar() == UNICODE_PLUS)) {

	207 // Unicode range: U+uNumber[-U+uNumber]

	208 // uNumber = 0..10FFFF

	209 _nextChar(); // Skip +

	210 _startIndex = _index; // Starts at the number

	211 return _finishToken(TokenKind.UNICODE_RANGE);

	212 } else if (varDef(ch)) {

	213 return _finishToken(TokenKind.VAR_DEFINITION);

	214 } else if (varUsage(ch)) {

	215 return _finishToken(TokenKind.VAR_USAGE);

	216 } else if (TokenizerHelpers.isIdentifierStart(ch)) {

	217 return finishIdentifier();

	218 } else if (TokenizerHelpers.isDigit(ch)) {

	219 return finishNumber();

	220 }

	221 return _errorToken();

	222 }

	223 }

	224

	225 bool varDef(int ch) {

	226 return ch == 'v'.codeUnitAt(0) &&

	227 _maybeEatChar('a'.codeUnitAt(0)) &&

	228 _maybeEatChar('r'.codeUnitAt(0)) &&

	229 _maybeEatChar('-'.codeUnitAt(0));

	230 }

	231

	232 bool varUsage(int ch) {

	233 return ch == 'v'.codeUnitAt(0) &&

	234 _maybeEatChar('a'.codeUnitAt(0)) &&

	235 _maybeEatChar('r'.codeUnitAt(0)) &&

	236 (_peekChar() == '-'.codeUnitAt(0));

	237 }

	238

	239 Token _errorToken([String message = null]) {

	240 return _finishToken(TokenKind.ERROR);

	241 }

	242

	243 int getIdentifierKind() {

	244 // Is the identifier a unit type?

	245 int tokId = -1;

	246

	247 // Don't match units in selectors or selector expressions.

	248 if (!inSelectorExpression && !inSelector) {

	249 tokId = TokenKind.matchUnits(_text, _startIndex, _index - _startIndex);

	250 }

	251 if (tokId == -1) {

	252 tokId = (_text.substring(_startIndex, _index) == '!important')

	253 ? TokenKind.IMPORTANT

	254 : -1;

	255 }

	256

	257 return tokId >= 0 ? tokId : TokenKind.IDENTIFIER;

	258 }

	259

	260 Token finishIdentifier() {

	261 // If we encounter an escape sequence, remember it so we can post-process

	262 // to unescape.

	263 var chars = [];

	264

	265 // backup so we can start with the first character

	266 int validateFrom = _index;

	267 _index = _startIndex;

	268 while (_index < _text.length) {

	269 int ch = _text.codeUnitAt(_index);

	270

	271 // If the previous character was "\" we need to escape. T

	272 // http://www.w3.org/TR/CSS21/syndata.html#characters

	273 // if followed by hexadecimal digits, create the appropriate character.

	274 // otherwise, include the character in the identifier and don't treat it

	275 // specially.

	276 if (ch == 92 /\/ && _inString) {

	277 int startHex = ++_index;

	278 eatHexDigits(startHex + 6);

	279 if (_index != startHex) {

	280 // Parse the hex digits and add that character.

	281 chars.add(int.parse('0x' + _text.substring(startHex, _index)));

	282

	283 if (_index == _text.length) break;

	284

	285 // if we stopped the hex because of a whitespace char, skip it

	286 ch = _text.codeUnitAt(_index);

	287 if (_index - startHex != 6 &&

	288 (ch == TokenChar.SPACE \|\|

	289 ch == TokenChar.TAB \|\|

	290 ch == TokenChar.RETURN \|\|

	291 ch == TokenChar.NEWLINE)) {

	292 _index++;

	293 }

	294 } else {

	295 // not a digit, just add the next character literally

	296 if (_index == _text.length) break;

	297 chars.add(_text.codeUnitAt(_index++));

	298 }

	299 } else if (_index < validateFrom \|\|

	300 (inSelectorExpression

	301 ? TokenizerHelpers.isIdentifierPartExpr(ch)

	302 : TokenizerHelpers.isIdentifierPart(ch))) {

	303 chars.add(ch);

	304 _index++;

	305 } else {

	306 // Not an identifier or escaped character.

	307 break;

	308 }

	309 }

	310

	311 var span = _file.span(_startIndex, _index);

	312 var text = new String.fromCharCodes(chars);

	313

	314 return new IdentifierToken(text, getIdentifierKind(), span);

	315 }

	316

	317 Token finishNumber() {

	318 eatDigits();

	319

	320 if (_peekChar() == 46 /./) {

	321 // Handle the case of 1.toString().

	322 _nextChar();

	323 if (TokenizerHelpers.isDigit(_peekChar())) {

	324 eatDigits();

	325 return _finishToken(TokenKind.DOUBLE);

	326 } else {

	327 _index -= 1;

	328 }

	329 }

	330

	331 return _finishToken(TokenKind.INTEGER);

	332 }

	333

	334 bool maybeEatDigit() {

	335 if (_index < _text.length &&

	336 TokenizerHelpers.isDigit(_text.codeUnitAt(_index))) {

	337 _index += 1;

	338 return true;

	339 }

	340 return false;

	341 }

	342

	343 Token finishHexNumber() {

	344 eatHexDigits(_text.length);

	345 return _finishToken(TokenKind.HEX_INTEGER);

	346 }

	347

	348 void eatHexDigits(int end) {

	349 end = math.min(end, _text.length);

	350 while (_index < end) {

	351 if (TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) {

	352 _index += 1;

	353 } else {

	354 return;

	355 }

	356 }

	357 }

	358

	359 bool maybeEatHexDigit() {

	360 if (_index < _text.length &&

	361 TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) {

	362 _index += 1;

	363 return true;

	364 }

	365 return false;

	366 }

	367

	368 bool maybeEatQuestionMark() {

	369 if (_index < _text.length && _text.codeUnitAt(_index) == QUESTION_MARK) {

	370 _index += 1;

	371 return true;

	372 }

	373 return false;

	374 }

	375

	376 void eatQuestionMarks() {

	377 while (_index < _text.length) {

	378 if (_text.codeUnitAt(_index) == QUESTION_MARK) {

	379 _index += 1;

	380 } else {

	381 return;

	382 }

	383 }

	384 }

	385

	386 Token finishUnicodeRange() {

	387 eatQuestionMarks();

	388 return _finishToken(TokenKind.HEX_RANGE);

	389 }

	390

	391 Token finishMultiLineComment() {

	392 while (true) {

	393 int ch = _nextChar();

	394 if (ch == 0) {

	395 return _finishToken(TokenKind.INCOMPLETE_COMMENT);

	396 } else if (ch == 42 /''*/) {

	397 if (_maybeEatChar(47 /'/'/)) {

	398 if (_inString) {

	399 return next();

	400 } else {

	401 return _finishToken(TokenKind.COMMENT);

	402 }

	403 }

	404 } else if (ch == TokenChar.MINUS) {

	405 /* Check if close part of Comment Definition --> (CDC). */

	406 if (_maybeEatChar(TokenChar.MINUS)) {

	407 if (_maybeEatChar(TokenChar.GREATER)) {

	408 if (_inString) {

	409 return next();

	410 } else {

	411 return _finishToken(TokenKind.HTML_COMMENT);

	412 }

	413 }

	414 }

	415 }

	416 }

	417 return _errorToken();

	418 }

	419 }

	420

	421 /** Static helper methods. */

	422 class TokenizerHelpers {

	423 static bool isIdentifierStart(int c) {

	424 return isIdentifierStartExpr(c) \|\| c == 45 /-/;

	425 }

	426

	427 static bool isDigit(int c) {

	428 return (c >= 48 /0/ && c <= 57 /9/);

	429 }

	430

	431 static bool isHexDigit(int c) {

	432 return (isDigit(c) \|\|

	433 (c >= 97 /a/ && c <= 102 /f/) \|\|

	434 (c >= 65 /A/ && c <= 70 /F/));

	435 }

	436

	437 static bool isIdentifierPart(int c) {

	438 return isIdentifierPartExpr(c) \|\| c == 45 /-/;

	439 }

	440

	441 /** Pseudo function expressions identifiers can't have a minus sign. */

	442 static bool isIdentifierStartExpr(int c) {

	443 return ((c >= 97 /a/ && c <= 122 /z/) \|\|

	444 (c >= 65 /A/ && c <= 90 /Z/) \|\|

	445 // Note: Unicode 10646 chars U+00A0 or higher are allowed, see:

	446 // http://www.w3.org/TR/CSS21/syndata.html#value-def-identifier

	447 // http://www.w3.org/TR/CSS21/syndata.html#characters

	448 // Also, escaped character should be allowed.

	449 c == 95 /_/ \|\| c >= 0xA0 \|\| c == 92 /\/);

	450 }

	451

	452 /** Pseudo function expressions identifiers can't have a minus sign. */

	453 static bool isIdentifierPartExpr(int c) {

	454 return (isIdentifierStartExpr(c) \|\| isDigit(c));

	455 }

	456 }

OLD	NEW