csslib/lib/src/tokenizer.dart - Issue 1400473008: Roll Observatory packages and add a roll script

Side by Side Diff: csslib/lib/src/tokenizer.dart

Issue 1400473008: Roll Observatory packages and add a roll script (Closed) Base URL: git@github.com:dart-lang/observatory_pub_packages.git@master

Patch Set: Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.

4

5 part of csslib.parser;

6

7 class Tokenizer extends TokenizerBase {

8 /** U+ prefix for unicode characters. */

9 final UNICODE_U = 'U'.codeUnitAt(0);

10 final UNICODE_LOWER_U = 'u'.codeUnitAt(0);

11 final UNICODE_PLUS = '+'.codeUnitAt(0);

12

13 final QUESTION_MARK = '?'.codeUnitAt(0);

14

15 /** CDATA keyword. */

16 final List CDATA_NAME = 'CDATA'.codeUnits;

17

18 Tokenizer(SourceFile file, String text, bool skipWhitespace, [int index = 0])

19 : super(file, text, skipWhitespace, index);

20

21 Token next({unicodeRange: false}) {

22 // keep track of our starting position

23 _startIndex = _index;

24

25 int ch;

26 ch = _nextChar();

27 switch (ch) {

28 case TokenChar.NEWLINE:

29 case TokenChar.RETURN:

30 case TokenChar.SPACE:

31 case TokenChar.TAB:

32 return finishWhitespace();

33 case TokenChar.END_OF_FILE:

34 return _finishToken(TokenKind.END_OF_FILE);

35 case TokenChar.AT:

36 int peekCh = _peekChar();

37 if (TokenizerHelpers.isIdentifierStart(peekCh)) {

38 var oldIndex = _index;

39 var oldStartIndex = _startIndex;

40

41 _startIndex = _index;

42 ch = _nextChar();

43 finishIdentifier();

44

45 // Is it a directive?

46 int tokId = TokenKind.matchDirectives(

47 _text, _startIndex, _index - _startIndex);

48 if (tokId == -1) {

49 // No, is it a margin directive?

50 tokId = TokenKind.matchMarginDirectives(

51 _text, _startIndex, _index - _startIndex);

52 }

53

54 if (tokId != -1) {

55 return _finishToken(tokId);

56 } else {

57 // Didn't find a CSS directive or margin directive so the @name is

58 // probably the Less definition '@name: value_variable_definition'.

59 _startIndex = oldStartIndex;

60 _index = oldIndex;

61 }

62 }

63 return _finishToken(TokenKind.AT);

64 case TokenChar.DOT:

65 int start = _startIndex; // Start where the dot started.

66 if (maybeEatDigit()) {

67 // looks like a number dot followed by digit(s).

68 Token number = finishNumber();

69 if (number.kind == TokenKind.INTEGER) {

70 // It's a number but it's preceeded by a dot, so make it a double.

71 _startIndex = start;

72 return _finishToken(TokenKind.DOUBLE);

73 } else {

74 // Don't allow dot followed by a double (e.g, '..1').

75 return _errorToken();

76 }

77 }

78 // It's really a dot.

79 return _finishToken(TokenKind.DOT);

80 case TokenChar.LPAREN:

81 return _finishToken(TokenKind.LPAREN);

82 case TokenChar.RPAREN:

83 return _finishToken(TokenKind.RPAREN);

84 case TokenChar.LBRACE:

85 return _finishToken(TokenKind.LBRACE);

86 case TokenChar.RBRACE:

87 return _finishToken(TokenKind.RBRACE);

88 case TokenChar.LBRACK:

89 return _finishToken(TokenKind.LBRACK);

90 case TokenChar.RBRACK:

91 if (_maybeEatChar(TokenChar.RBRACK) &&

92 _maybeEatChar(TokenChar.GREATER)) {

93 // ]]>

94 return next();

95 }

96 return _finishToken(TokenKind.RBRACK);

97 case TokenChar.HASH:

98 return _finishToken(TokenKind.HASH);

99 case TokenChar.PLUS:

100 if (maybeEatDigit()) return finishNumber();

101 return _finishToken(TokenKind.PLUS);

102 case TokenChar.MINUS:

103 if (inSelectorExpression \|\| unicodeRange) {

104 // If parsing in pseudo function expression then minus is an operator

105 // not part of identifier e.g., interval value range (e.g. U+400-4ff)

106 // or minus operator in selector expression.

107 return _finishToken(TokenKind.MINUS);

108 } else if (maybeEatDigit()) {

109 return finishNumber();

110 } else if (TokenizerHelpers.isIdentifierStart(ch)) {

111 return finishIdentifier();

112 }

113 return _finishToken(TokenKind.MINUS);

114 case TokenChar.GREATER:

115 return _finishToken(TokenKind.GREATER);

116 case TokenChar.TILDE:

117 if (_maybeEatChar(TokenChar.EQUALS)) {

118 return _finishToken(TokenKind.INCLUDES); // ~=

119 }

120 return _finishToken(TokenKind.TILDE);

121 case TokenChar.ASTERISK:

122 if (_maybeEatChar(TokenChar.EQUALS)) {

123 return _finishToken(TokenKind.SUBSTRING_MATCH); // *=

124 }

125 return _finishToken(TokenKind.ASTERISK);

126 case TokenChar.AMPERSAND:

127 return _finishToken(TokenKind.AMPERSAND);

128 case TokenChar.NAMESPACE:

129 if (_maybeEatChar(TokenChar.EQUALS)) {

130 return _finishToken(TokenKind.DASH_MATCH); // \|=

131 }

132 return _finishToken(TokenKind.NAMESPACE);

133 case TokenChar.COLON:

134 return _finishToken(TokenKind.COLON);

135 case TokenChar.COMMA:

136 return _finishToken(TokenKind.COMMA);

137 case TokenChar.SEMICOLON:

138 return _finishToken(TokenKind.SEMICOLON);

139 case TokenChar.PERCENT:

140 return _finishToken(TokenKind.PERCENT);

141 case TokenChar.SINGLE_QUOTE:

142 return _finishToken(TokenKind.SINGLE_QUOTE);

143 case TokenChar.DOUBLE_QUOTE:

144 return _finishToken(TokenKind.DOUBLE_QUOTE);

145 case TokenChar.SLASH:

146 if (_maybeEatChar(TokenChar.ASTERISK)) return finishMultiLineComment();

147 return _finishToken(TokenKind.SLASH);

148 case TokenChar.LESS: // <!--

149 if (_maybeEatChar(TokenChar.BANG)) {

150 if (_maybeEatChar(TokenChar.MINUS) &&

151 _maybeEatChar(TokenChar.MINUS)) {

152 return finishMultiLineComment();

153 } else if (_maybeEatChar(TokenChar.LBRACK) &&

154 _maybeEatChar(CDATA_NAME[0]) &&

155 _maybeEatChar(CDATA_NAME[1]) &&

156 _maybeEatChar(CDATA_NAME[2]) &&

157 _maybeEatChar(CDATA_NAME[3]) &&

158 _maybeEatChar(CDATA_NAME[4]) &&

159 _maybeEatChar(TokenChar.LBRACK)) {

160 // <![CDATA[

161 return next();

162 }

163 }

164 return _finishToken(TokenKind.LESS);

165 case TokenChar.EQUALS:

166 return _finishToken(TokenKind.EQUALS);

167 case TokenChar.CARET:

168 if (_maybeEatChar(TokenChar.EQUALS)) {

169 return _finishToken(TokenKind.PREFIX_MATCH); // ^=

170 }

171 return _finishToken(TokenKind.CARET);

172 case TokenChar.DOLLAR:

173 if (_maybeEatChar(TokenChar.EQUALS)) {

174 return _finishToken(TokenKind.SUFFIX_MATCH); // $=

175 }

176 return _finishToken(TokenKind.DOLLAR);

177 case TokenChar.BANG:

178 Token tok = finishIdentifier();

179 return (tok == null) ? _finishToken(TokenKind.BANG) : tok;

180 default:

181 // TODO(jmesserly): this is used for IE8 detection; I'm not sure it's

182 // appropriate outside of a few specific places; certainly shouldn't

183 // be parsed in selectors.

184 if (!inSelector && ch == TokenChar.BACKSLASH) {

185 return _finishToken(TokenKind.BACKSLASH);

186 }

187

188 if (unicodeRange) {

189 // Three types of unicode ranges:

190 // - single code point (e.g. U+416)

191 // - interval value range (e.g. U+400-4ff)

192 // - range where trailing ‘?’ characters imply ‘any digit value’

193 // (e.g. U+4??)

194 if (maybeEatHexDigit()) {

195 var t = finishHexNumber();

196 // Any question marks then it's a HEX_RANGE not HEX_NUMBER.

197 if (maybeEatQuestionMark()) finishUnicodeRange();

198 return t;

199 } else if (maybeEatQuestionMark()) {

200 // HEX_RANGE U+N???

201 return finishUnicodeRange();

202 } else {

203 return _errorToken();

204 }

205 } else if ((ch == UNICODE_U \|\| ch == UNICODE_LOWER_U) &&

206 (_peekChar() == UNICODE_PLUS)) {

207 // Unicode range: U+uNumber[-U+uNumber]

208 // uNumber = 0..10FFFF

209 _nextChar(); // Skip +

210 _startIndex = _index; // Starts at the number

211 return _finishToken(TokenKind.UNICODE_RANGE);

212 } else if (varDef(ch)) {

213 return _finishToken(TokenKind.VAR_DEFINITION);

214 } else if (varUsage(ch)) {

215 return _finishToken(TokenKind.VAR_USAGE);

216 } else if (TokenizerHelpers.isIdentifierStart(ch)) {

217 return finishIdentifier();

218 } else if (TokenizerHelpers.isDigit(ch)) {

219 return finishNumber();

220 }

221 return _errorToken();

222 }

223 }

224

225 bool varDef(int ch) {

226 return ch == 'v'.codeUnitAt(0) &&

227 _maybeEatChar('a'.codeUnitAt(0)) &&

228 _maybeEatChar('r'.codeUnitAt(0)) &&

229 _maybeEatChar('-'.codeUnitAt(0));

230 }

231

232 bool varUsage(int ch) {

233 return ch == 'v'.codeUnitAt(0) &&

234 _maybeEatChar('a'.codeUnitAt(0)) &&

235 _maybeEatChar('r'.codeUnitAt(0)) &&

236 (_peekChar() == '-'.codeUnitAt(0));

237 }

238

239 Token _errorToken([String message = null]) {

240 return _finishToken(TokenKind.ERROR);

241 }

242

243 int getIdentifierKind() {

244 // Is the identifier a unit type?

245 int tokId = -1;

246

247 // Don't match units in selectors or selector expressions.

248 if (!inSelectorExpression && !inSelector) {

249 tokId = TokenKind.matchUnits(_text, _startIndex, _index - _startIndex);

250 }

251 if (tokId == -1) {

252 tokId = (_text.substring(_startIndex, _index) == '!important')

253 ? TokenKind.IMPORTANT

254 : -1;

255 }

256

257 return tokId >= 0 ? tokId : TokenKind.IDENTIFIER;

258 }

259

260 Token finishIdentifier() {

261 // If we encounter an escape sequence, remember it so we can post-process

262 // to unescape.

263 var chars = [];

264

265 // backup so we can start with the first character

266 int validateFrom = _index;

267 _index = _startIndex;

268 while (_index < _text.length) {

269 int ch = _text.codeUnitAt(_index);

270

271 // If the previous character was "\" we need to escape. T

272 // http://www.w3.org/TR/CSS21/syndata.html#characters

273 // if followed by hexadecimal digits, create the appropriate character.

274 // otherwise, include the character in the identifier and don't treat it

275 // specially.

276 if (ch == 92 /\/ && _inString) {

277 int startHex = ++_index;

278 eatHexDigits(startHex + 6);

279 if (_index != startHex) {

280 // Parse the hex digits and add that character.

281 chars.add(int.parse('0x' + _text.substring(startHex, _index)));

282

283 if (_index == _text.length) break;

284

285 // if we stopped the hex because of a whitespace char, skip it

286 ch = _text.codeUnitAt(_index);

287 if (_index - startHex != 6 &&

288 (ch == TokenChar.SPACE \|\|

289 ch == TokenChar.TAB \|\|

290 ch == TokenChar.RETURN \|\|

291 ch == TokenChar.NEWLINE)) {

292 _index++;

293 }

294 } else {

295 // not a digit, just add the next character literally

296 if (_index == _text.length) break;

297 chars.add(_text.codeUnitAt(_index++));

298 }

299 } else if (_index < validateFrom \|\|

300 (inSelectorExpression

301 ? TokenizerHelpers.isIdentifierPartExpr(ch)

302 : TokenizerHelpers.isIdentifierPart(ch))) {

303 chars.add(ch);

304 _index++;

305 } else {

306 // Not an identifier or escaped character.

307 break;

308 }

309 }

310

311 var span = _file.span(_startIndex, _index);

312 var text = new String.fromCharCodes(chars);

313

314 return new IdentifierToken(text, getIdentifierKind(), span);

315 }

316

317 Token finishNumber() {

318 eatDigits();

319

320 if (_peekChar() == 46 /./) {

321 // Handle the case of 1.toString().

322 _nextChar();

323 if (TokenizerHelpers.isDigit(_peekChar())) {

324 eatDigits();

325 return _finishToken(TokenKind.DOUBLE);

326 } else {

327 _index -= 1;

328 }

329 }

330

331 return _finishToken(TokenKind.INTEGER);

332 }

333

334 bool maybeEatDigit() {

335 if (_index < _text.length &&

336 TokenizerHelpers.isDigit(_text.codeUnitAt(_index))) {

337 _index += 1;

338 return true;

339 }

340 return false;

341 }

342

343 Token finishHexNumber() {

344 eatHexDigits(_text.length);

345 return _finishToken(TokenKind.HEX_INTEGER);

346 }

347

348 void eatHexDigits(int end) {

349 end = math.min(end, _text.length);

350 while (_index < end) {

351 if (TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) {

352 _index += 1;

353 } else {

354 return;

355 }

356 }

357 }

358

359 bool maybeEatHexDigit() {

360 if (_index < _text.length &&

361 TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) {

362 _index += 1;

363 return true;

364 }

365 return false;

366 }

367

368 bool maybeEatQuestionMark() {

369 if (_index < _text.length && _text.codeUnitAt(_index) == QUESTION_MARK) {

370 _index += 1;

371 return true;

372 }

373 return false;

374 }

375

376 void eatQuestionMarks() {

377 while (_index < _text.length) {

378 if (_text.codeUnitAt(_index) == QUESTION_MARK) {

379 _index += 1;

380 } else {

381 return;

382 }

383 }

384 }

385

386 Token finishUnicodeRange() {

387 eatQuestionMarks();

388 return _finishToken(TokenKind.HEX_RANGE);

389 }

390

391 Token finishMultiLineComment() {

392 while (true) {

393 int ch = _nextChar();

394 if (ch == 0) {

395 return _finishToken(TokenKind.INCOMPLETE_COMMENT);

396 } else if (ch == 42 /''*/) {

397 if (_maybeEatChar(47 /'/'/)) {

398 if (_inString) {

399 return next();

400 } else {

401 return _finishToken(TokenKind.COMMENT);

402 }

403 }

404 } else if (ch == TokenChar.MINUS) {

405 /* Check if close part of Comment Definition --> (CDC). */

406 if (_maybeEatChar(TokenChar.MINUS)) {

407 if (_maybeEatChar(TokenChar.GREATER)) {

408 if (_inString) {

409 return next();

410 } else {

411 return _finishToken(TokenKind.HTML_COMMENT);

412 }

413 }

414 }

415 }

416 }

417 return _errorToken();

418 }

419 }

420

421 /** Static helper methods. */

422 class TokenizerHelpers {

423 static bool isIdentifierStart(int c) {

424 return isIdentifierStartExpr(c) \|\| c == 45 /-/;

425 }

426

427 static bool isDigit(int c) {

428 return (c >= 48 /0/ && c <= 57 /9/);

429 }

430

431 static bool isHexDigit(int c) {

432 return (isDigit(c) \|\|

433 (c >= 97 /a/ && c <= 102 /f/) \|\|

434 (c >= 65 /A/ && c <= 70 /F/));

435 }

436

437 static bool isIdentifierPart(int c) {

438 return isIdentifierPartExpr(c) \|\| c == 45 /-/;

439 }

440

441 /** Pseudo function expressions identifiers can't have a minus sign. */

442 static bool isIdentifierStartExpr(int c) {

443 return ((c >= 97 /a/ && c <= 122 /z/) \|\|

444 (c >= 65 /A/ && c <= 90 /Z/) \|\|

445 // Note: Unicode 10646 chars U+00A0 or higher are allowed, see:

446 // http://www.w3.org/TR/CSS21/syndata.html#value-def-identifier

447 // http://www.w3.org/TR/CSS21/syndata.html#characters

448 // Also, escaped character should be allowed.

449 c == 95 /_/ \|\| c >= 0xA0 \|\| c == 92 /\/);

450 }

451

452 /** Pseudo function expressions identifiers can't have a minus sign. */

453 static bool isIdentifierPartExpr(int c) {

454 return (isIdentifierStartExpr(c) \|\| isDigit(c));

455 }

456 }

OLD	NEW

« no previous file with comments | « csslib/lib/src/token.dart ('k') | csslib/lib/src/tokenizer_base.dart » ('j') | no next file with comments »