observatory_pub_packages/csslib/src/tokenizer.dart - Issue 816693004: Add observatory_pub_packages snapshot to third_party

Side by Side Diff: observatory_pub_packages/csslib/src/tokenizer.dart

Issue 816693004: Add observatory_pub_packages snapshot to third_party (Closed) Base URL: http://dart.googlecode.com/svn/third_party/

Patch Set: Created 6 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

	2 // for details. All rights reserved. Use of this source code is governed by a

	3 // BSD-style license that can be found in the LICENSE file.

	4

	5 part of csslib.parser;

	6

	7 class Tokenizer extends TokenizerBase {

	8 /** U+ prefix for unicode characters. */

	9 final UNICODE_U = 'U'.codeUnitAt(0);

	10 final UNICODE_LOWER_U = 'u'.codeUnitAt(0);

	11 final UNICODE_PLUS = '+'.codeUnitAt(0);

	12

	13 final QUESTION_MARK = '?'.codeUnitAt(0);

	14

	15 /** CDATA keyword. */

	16 final List CDATA_NAME = 'CDATA'.codeUnits;

	17

	18 Tokenizer(SourceFile file, String text, bool skipWhitespace,

	19 [int index = 0])

	20 : super(file, text, skipWhitespace, index);

	21

	22 Token next({unicodeRange: false}) {

	23 // keep track of our starting position

	24 _startIndex = _index;

	25

	26 int ch;

	27 ch = _nextChar();

	28 switch (ch) {

	29 case TokenChar.NEWLINE:

	30 case TokenChar.RETURN:

	31 case TokenChar.SPACE:

	32 case TokenChar.TAB:

	33 return finishWhitespace();

	34 case TokenChar.END_OF_FILE:

	35 return _finishToken(TokenKind.END_OF_FILE);

	36 case TokenChar.AT:

	37 int peekCh = _peekChar();

	38 if (TokenizerHelpers.isIdentifierStart(peekCh)) {

	39 var oldIndex = _index;

	40 var oldStartIndex = _startIndex;

	41

	42 _startIndex = _index;

	43 ch = _nextChar();

	44 Token ident = finishIdentifier();

	45

	46 // Is it a directive?

	47 int tokId = TokenKind.matchDirectives(_text, _startIndex,

	48 _index - _startIndex);

	49 if (tokId == -1) {

	50 // No, is it a margin directive?

	51 tokId = TokenKind.matchMarginDirectives(_text, _startIndex,

	52 _index - _startIndex);

	53 }

	54

	55 if (tokId != -1) {

	56 return _finishToken(tokId);

	57 } else {

	58 // Didn't find a CSS directive or margin directive so the @name is

	59 // probably the Less definition '@name: value_variable_definition'.

	60 _startIndex = oldStartIndex;

	61 _index = oldIndex;

	62 }

	63 }

	64 return _finishToken(TokenKind.AT);

	65 case TokenChar.DOT:

	66 int start = _startIndex; // Start where the dot started.

	67 if (maybeEatDigit()) {

	68 // looks like a number dot followed by digit(s).

	69 Token number = finishNumber();

	70 if (number.kind == TokenKind.INTEGER) {

	71 // It's a number but it's preceeded by a dot, so make it a double.

	72 _startIndex = start;

	73 return _finishToken(TokenKind.DOUBLE);

	74 } else {

	75 // Don't allow dot followed by a double (e.g, '..1').

	76 return _errorToken();

	77 }

	78 }

	79 // It's really a dot.

	80 return _finishToken(TokenKind.DOT);

	81 case TokenChar.LPAREN:

	82 return _finishToken(TokenKind.LPAREN);

	83 case TokenChar.RPAREN:

	84 return _finishToken(TokenKind.RPAREN);

	85 case TokenChar.LBRACE:

	86 return _finishToken(TokenKind.LBRACE);

	87 case TokenChar.RBRACE:

	88 return _finishToken(TokenKind.RBRACE);

	89 case TokenChar.LBRACK:

	90 return _finishToken(TokenKind.LBRACK);

	91 case TokenChar.RBRACK:

	92 if (_maybeEatChar(TokenChar.RBRACK) &&

	93 _maybeEatChar(TokenChar.GREATER)) {

	94 // ]]>

	95 return next();

	96 }

	97 return _finishToken(TokenKind.RBRACK);

	98 case TokenChar.HASH:

	99 return _finishToken(TokenKind.HASH);

	100 case TokenChar.PLUS:

	101 if (maybeEatDigit()) return finishNumber();

	102 return _finishToken(TokenKind.PLUS);

	103 case TokenChar.MINUS:

	104 if (inSelectorExpression \|\| unicodeRange) {

	105 // If parsing in pseudo function expression then minus is an operator

	106 // not part of identifier e.g., interval value range (e.g. U+400-4ff)

	107 // or minus operator in selector expression.

	108 return _finishToken(TokenKind.MINUS);

	109 } else if (maybeEatDigit()) {

	110 return finishNumber();

	111 } else if (TokenizerHelpers.isIdentifierStart(ch)) {

	112 return finishIdentifier();

	113 }

	114 return _finishToken(TokenKind.MINUS);

	115 case TokenChar.GREATER:

	116 return _finishToken(TokenKind.GREATER);

	117 case TokenChar.TILDE:

	118 if (_maybeEatChar(TokenChar.EQUALS)) {

	119 return _finishToken(TokenKind.INCLUDES); // ~=

	120 }

	121 return _finishToken(TokenKind.TILDE);

	122 case TokenChar.ASTERISK:

	123 if (_maybeEatChar(TokenChar.EQUALS)) {

	124 return _finishToken(TokenKind.SUBSTRING_MATCH); // *=

	125 }

	126 return _finishToken(TokenKind.ASTERISK);

	127 case TokenChar.AMPERSAND:

	128 return _finishToken(TokenKind.AMPERSAND);

	129 case TokenChar.NAMESPACE:

	130 if (_maybeEatChar(TokenChar.EQUALS)) {

	131 return _finishToken(TokenKind.DASH_MATCH); // \|=

	132 }

	133 return _finishToken(TokenKind.NAMESPACE);

	134 case TokenChar.COLON:

	135 return _finishToken(TokenKind.COLON);

	136 case TokenChar.COMMA:

	137 return _finishToken(TokenKind.COMMA);

	138 case TokenChar.SEMICOLON:

	139 return _finishToken(TokenKind.SEMICOLON);

	140 case TokenChar.PERCENT:

	141 return _finishToken(TokenKind.PERCENT);

	142 case TokenChar.SINGLE_QUOTE:

	143 return _finishToken(TokenKind.SINGLE_QUOTE);

	144 case TokenChar.DOUBLE_QUOTE:

	145 return _finishToken(TokenKind.DOUBLE_QUOTE);

	146 case TokenChar.SLASH:

	147 if (_maybeEatChar(TokenChar.ASTERISK)) return finishMultiLineComment();

	148 return _finishToken(TokenKind.SLASH);

	149 case TokenChar.LESS: // <!--

	150 if (_maybeEatChar(TokenChar.BANG)) {

	151 if (_maybeEatChar(TokenChar.MINUS) &&

	152 _maybeEatChar(TokenChar.MINUS)) {

	153 return finishMultiLineComment();

	154 } else if (_maybeEatChar(TokenChar.LBRACK) &&

	155 _maybeEatChar(CDATA_NAME[0]) &&

	156 _maybeEatChar(CDATA_NAME[1]) &&

	157 _maybeEatChar(CDATA_NAME[2]) &&

	158 _maybeEatChar(CDATA_NAME[3]) &&

	159 _maybeEatChar(CDATA_NAME[4]) &&

	160 _maybeEatChar(TokenChar.LBRACK)) {

	161 // <![CDATA[

	162 return next();

	163 }

	164 }

	165 return _finishToken(TokenKind.LESS);

	166 case TokenChar.EQUALS:

	167 return _finishToken(TokenKind.EQUALS);

	168 case TokenChar.CARET:

	169 if (_maybeEatChar(TokenChar.EQUALS)) {

	170 return _finishToken(TokenKind.PREFIX_MATCH); // ^=

	171 }

	172 return _finishToken(TokenKind.CARET);

	173 case TokenChar.DOLLAR:

	174 if (_maybeEatChar(TokenChar.EQUALS)) {

	175 return _finishToken(TokenKind.SUFFIX_MATCH); // $=

	176 }

	177 return _finishToken(TokenKind.DOLLAR);

	178 case TokenChar.BANG:

	179 Token tok = finishIdentifier();

	180 return (tok == null) ? _finishToken(TokenKind.BANG) : tok;

	181 default:

	182 // TODO(jmesserly): this is used for IE8 detection; I'm not sure it's

	183 // appropriate outside of a few specific places; certainly shouldn't

	184 // be parsed in selectors.

	185 if (!inSelector && ch == TokenChar.BACKSLASH) {

	186 return _finishToken(TokenKind.BACKSLASH);

	187 }

	188

	189 if (unicodeRange) {

	190 // Three types of unicode ranges:

	191 // - single code point (e.g. U+416)

	192 // - interval value range (e.g. U+400-4ff)

	193 // - range where trailing ‘?’ characters imply ‘any digit value’

	194 // (e.g. U+4??)

	195 if (maybeEatHexDigit()) {

	196 var t = finishHexNumber();

	197 // Any question marks then it's a HEX_RANGE not HEX_NUMBER.

	198 if (maybeEatQuestionMark()) finishUnicodeRange();

	199 return t;

	200 } else if (maybeEatQuestionMark()) {

	201 // HEX_RANGE U+N???

	202 return finishUnicodeRange();

	203 } else {

	204 return _errorToken();

	205 }

	206 } else if ((ch == UNICODE_U \|\| ch == UNICODE_LOWER_U) &&

	207 (_peekChar() == UNICODE_PLUS)) {

	208 // Unicode range: U+uNumber[-U+uNumber]

	209 // uNumber = 0..10FFFF

	210 _nextChar(); // Skip +

	211 _startIndex = _index; // Starts at the number

	212 return _finishToken(TokenKind.UNICODE_RANGE);

	213 } else if (varDef(ch)) {

	214 return _finishToken(TokenKind.VAR_DEFINITION);

	215 } else if (varUsage(ch)) {

	216 return _finishToken(TokenKind.VAR_USAGE);

	217 } else if (TokenizerHelpers.isIdentifierStart(ch)) {

	218 return finishIdentifier();

	219 } else if (TokenizerHelpers.isDigit(ch)) {

	220 return finishNumber();

	221 }

	222 return _errorToken();

	223 }

	224 }

	225

	226 bool varDef(int ch) {

	227 return ch == 'v'.codeUnitAt(0) && _maybeEatChar('a'.codeUnitAt(0)) &&

	228 _maybeEatChar('r'.codeUnitAt(0)) && _maybeEatChar('-'.codeUnitAt(0));

	229 }

	230

	231 bool varUsage(int ch) {

	232 return ch == 'v'.codeUnitAt(0) && _maybeEatChar('a'.codeUnitAt(0)) &&

	233 _maybeEatChar('r'.codeUnitAt(0)) && (_peekChar() == '-'.codeUnitAt(0));

	234 }

	235

	236 Token _errorToken([String message = null]) {

	237 return _finishToken(TokenKind.ERROR);

	238 }

	239

	240 int getIdentifierKind() {

	241 // Is the identifier a unit type?

	242 int tokId = -1;

	243

	244 // Don't match units in selectors or selector expressions.

	245 if (!inSelectorExpression && !inSelector) {

	246 tokId = TokenKind.matchUnits(_text, _startIndex, _index - _startIndex);

	247 }

	248 if (tokId == -1) {

	249 tokId = (_text.substring(_startIndex, _index) == '!important') ?

	250 TokenKind.IMPORTANT : -1;

	251 }

	252

	253 return tokId >= 0 ? tokId : TokenKind.IDENTIFIER;

	254 }

	255

	256 Token finishIdentifier() {

	257 // If we encounter an escape sequence, remember it so we can post-process

	258 // to unescape.

	259 bool hasEscapedChars = false;

	260 var chars = [];

	261

	262 // backup so we can start with the first character

	263 int validateFrom = _index;

	264 _index = _startIndex;

	265 while (_index < _text.length) {

	266 int ch = _text.codeUnitAt(_index);

	267

	268 // If the previous character was "\" we need to escape. T

	269 // http://www.w3.org/TR/CSS21/syndata.html#characters

	270 // if followed by hexadecimal digits, create the appropriate character.

	271 // otherwise, include the character in the identifier and don't treat it

	272 // specially.

	273 if (ch == 92/\/) {

	274 int startHex = ++_index;

	275 eatHexDigits(startHex + 6);

	276 if (_index != startHex) {

	277 // Parse the hex digits and add that character.

	278 chars.add(int.parse('0x' + _text.substring(startHex, _index)));

	279

	280 if (_index == _text.length) break;

	281

	282 // if we stopped the hex because of a whitespace char, skip it

	283 ch = _text.codeUnitAt(_index);

	284 if (_index - startHex != 6 &&

	285 (ch == TokenChar.SPACE \|\| ch == TokenChar.TAB \|\|

	286 ch == TokenChar.RETURN \|\| ch == TokenChar.NEWLINE)) {

	287 _index++;

	288 }

	289 } else {

	290 // not a digit, just add the next character literally

	291 if (_index == _text.length) break;

	292 chars.add(_text.codeUnitAt(_index++));

	293 }

	294 } else if (_index < validateFrom \|\| (inSelectorExpression

	295 ? TokenizerHelpers.isIdentifierPartExpr(ch)

	296 : TokenizerHelpers.isIdentifierPart(ch))) {

	297 chars.add(ch);

	298 _index++;

	299 } else {

	300 // Not an identifier or escaped character.

	301 break;

	302 }

	303 }

	304

	305 var span = _file.span(_startIndex, _index);

	306 var text = new String.fromCharCodes(chars);

	307

	308 return new IdentifierToken(text, getIdentifierKind(), span);

	309 }

	310

	311 Token finishNumber() {

	312 eatDigits();

	313

	314 if (_peekChar() == 46/./) {

	315 // Handle the case of 1.toString().

	316 _nextChar();

	317 if (TokenizerHelpers.isDigit(_peekChar())) {

	318 eatDigits();

	319 return _finishToken(TokenKind.DOUBLE);

	320 } else {

	321 _index -= 1;

	322 }

	323 }

	324

	325 return _finishToken(TokenKind.INTEGER);

	326 }

	327

	328 bool maybeEatDigit() {

	329 if (_index < _text.length

	330 && TokenizerHelpers.isDigit(_text.codeUnitAt(_index))) {

	331 _index += 1;

	332 return true;

	333 }

	334 return false;

	335 }

	336

	337 Token finishHexNumber() {

	338 eatHexDigits(_text.length);

	339 return _finishToken(TokenKind.HEX_INTEGER);

	340 }

	341

	342 void eatHexDigits(int end) {

	343 end = math.min(end, _text.length);

	344 while (_index < end) {

	345 if (TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) {

	346 _index += 1;

	347 } else {

	348 return;

	349 }

	350 }

	351 }

	352

	353 bool maybeEatHexDigit() {

	354 if (_index < _text.length

	355 && TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) {

	356 _index += 1;

	357 return true;

	358 }

	359 return false;

	360 }

	361

	362 bool maybeEatQuestionMark() {

	363 if (_index < _text.length &&

	364 _text.codeUnitAt(_index) == QUESTION_MARK) {

	365 _index += 1;

	366 return true;

	367 }

	368 return false;

	369 }

	370

	371 void eatQuestionMarks() {

	372 while (_index < _text.length) {

	373 if (_text.codeUnitAt(_index) == QUESTION_MARK) {

	374 _index += 1;

	375 } else {

	376 return;

	377 }

	378 }

	379 }

	380

	381 Token finishUnicodeRange() {

	382 eatQuestionMarks();

	383 return _finishToken(TokenKind.HEX_RANGE);

	384 }

	385

	386 Token finishMultiLineComment() {

	387 while (true) {

	388 int ch = _nextChar();

	389 if (ch == 0) {

	390 return _finishToken(TokenKind.INCOMPLETE_COMMENT);

	391 } else if (ch == 42/''*/) {

	392 if (_maybeEatChar(47/'/'/)) {

	393 if (_skipWhitespace) {

	394 return next();

	395 } else {

	396 return _finishToken(TokenKind.COMMENT);

	397 }

	398 }

	399 } else if (ch == TokenChar.MINUS) {

	400 /* Check if close part of Comment Definition --> (CDC). */

	401 if (_maybeEatChar(TokenChar.MINUS)) {

	402 if (_maybeEatChar(TokenChar.GREATER)) {

	403 if (_skipWhitespace) {

	404 return next();

	405 } else {

	406 return _finishToken(TokenKind.HTML_COMMENT);

	407 }

	408 }

	409 }

	410 }

	411 }

	412 return _errorToken();

	413 }

	414

	415 }

	416

	417 /** Static helper methods. */

	418 class TokenizerHelpers {

	419 static bool isIdentifierStart(int c) {

	420 return isIdentifierStartExpr(c) \|\| c == 45 /-/;

	421 }

	422

	423 static bool isDigit(int c) {

	424 return (c >= 48/0/ && c <= 57/9/);

	425 }

	426

	427 static bool isHexDigit(int c) {

	428 return (isDigit(c) \|\| (c >= 97/a/ && c <= 102/f/)

	429 \|\| (c >= 65/A/ && c <= 70/F/));

	430 }

	431

	432 static bool isIdentifierPart(int c) {

	433 return isIdentifierPartExpr(c) \|\| c == 45 /-/;

	434 }

	435

	436 /** Pseudo function expressions identifiers can't have a minus sign. */

	437 static bool isIdentifierStartExpr(int c) {

	438 return ((c >= 97/a/ && c <= 122/z/) \|\| (c >= 65/A/ && c <= 90/Z/) \|\|

	439 // Note: Unicode 10646 chars U+00A0 or higher are allowed, see:

	440 // http://www.w3.org/TR/CSS21/syndata.html#value-def-identifier

	441 // http://www.w3.org/TR/CSS21/syndata.html#characters

	442 // Also, escaped character should be allowed.

	443 c == 95/_/ \|\| c >= 0xA0 \|\| c == 92/\/);

	444 }

	445

	446 /** Pseudo function expressions identifiers can't have a minus sign. */

	447 static bool isIdentifierPartExpr(int c) {

	448 return (isIdentifierStartExpr(c) \|\| isDigit(c));

	449 }

	450 }

OLD	NEW

« no previous file with comments | « observatory_pub_packages/csslib/src/token.dart ('k') | observatory_pub_packages/csslib/src/tokenizer_base.dart » ('j') | no next file with comments »