csslib/lib/src/tokenizer_base.dart - Issue 1400473008: Roll Observatory packages and add a roll script

Side by Side Diff: csslib/lib/src/tokenizer_base.dart

Issue 1400473008: Roll Observatory packages and add a roll script (Closed) Base URL: git@github.com:dart-lang/observatory_pub_packages.git@master

Patch Set: Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.

4 // Generated by scripts/tokenizer_gen.py.

5

6 part of csslib.parser;

7

8 /** Tokenizer state to support look ahead for Less' nested selectors. */

9 class TokenizerState {

10 final int index;

11 final int startIndex;

12 final bool inSelectorExpression;

13 final bool inSelector;

14

15 TokenizerState(TokenizerBase base)

16 : index = base._index,

17 startIndex = base._startIndex,

18 inSelectorExpression = base.inSelectorExpression,

19 inSelector = base.inSelector;

20 }

21

22 /**

23 * The base class for our tokenizer. The hand coded parts are in this file, with

24 * the generated parts in the subclass Tokenizer.

25 */

26 abstract class TokenizerBase {

27 final SourceFile _file;

28 final String _text;

29

30 bool _inString;

31

32 /**

33 * Changes tokenization when in a pseudo function expression. If true then

34 * minus signs are handled as operators instead of identifiers.

35 */

36 bool inSelectorExpression = false;

37

38 /**

39 * Changes tokenization when in selectors. If true, it prevents identifiers

40 * from being treated as units. This would break things like ":lang(fr)" or

41 * the HTML (unknown) tag name "px", which is legal to use in a selector.

42 */

43 // TODO(jmesserly): is this a problem elsewhere? "fr" for example will be

44 // processed as a "fraction" unit token, preventing it from working in

45 // places where an identifier is expected. This was breaking selectors like:

46 // :lang(fr)

47 // The assumption that "fr" always means fraction (and similar issue with

48 // other units) doesn't seem valid. We probably should defer this

49 // analysis until we reach places in the parser where units are expected.

50 // I'm not sure this is tokenizing as described in the specs:

51 // http://dev.w3.org/csswg/css-syntax/

52 // http://dev.w3.org/csswg/selectors4/

53 bool inSelector = false;

54

55 int _index = 0;

56 int _startIndex = 0;

57

58 TokenizerBase(this._file, this._text, this._inString,

59 [this._index = 0]);

60

61 Token next();

62 int getIdentifierKind();

63

64 /** Snapshot of Tokenizer scanning state. */

65 TokenizerState get mark => new TokenizerState(this);

66

67 /** Restore Tokenizer scanning state. */

68 void restore(TokenizerState markedData) {

69 _index = markedData.index;

70 _startIndex = markedData.startIndex;

71 inSelectorExpression = markedData.inSelectorExpression;

72 inSelector = markedData.inSelector;

73 }

74

75 int _nextChar() {

76 if (_index < _text.length) {

77 return _text.codeUnitAt(_index++);

78 } else {

79 return 0;

80 }

81 }

82

83 int _peekChar() {

84 if (_index < _text.length) {

85 return _text.codeUnitAt(_index);

86 } else {

87 return 0;

88 }

89 }

90

91 bool _maybeEatChar(int ch) {

92 if (_index < _text.length) {

93 if (_text.codeUnitAt(_index) == ch) {

94 _index++;

95 return true;

96 } else {

97 return false;

98 }

99 } else {

100 return false;

101 }

102 }

103

104 Token _finishToken(int kind) {

105 return new Token(kind, _file.span(_startIndex, _index));

106 }

107

108 Token _errorToken([String message = null]) {

109 return new ErrorToken(

110 TokenKind.ERROR, _file.span(_startIndex, _index), message);

111 }

112

113 Token finishWhitespace() {

114 _index--;

115 while (_index < _text.length) {

116 final ch = _text.codeUnitAt(_index++);

117 if (ch == TokenChar.SPACE \|\|

118 ch == TokenChar.TAB \|\|

119 ch == TokenChar.RETURN) {

120 // do nothing

121 } else if (ch == TokenChar.NEWLINE) {

122 if (!_inString) {

123 return _finishToken(TokenKind.WHITESPACE); // note the newline?

124 }

125 } else {

126 _index--;

127 if (_inString) {

128 return next();

129 } else {

130 return _finishToken(TokenKind.WHITESPACE);

131 }

132 }

133 }

134 return _finishToken(TokenKind.END_OF_FILE);

135 }

136

137 Token finishMultiLineComment() {

138 int nesting = 1;

139 do {

140 int ch = _nextChar();

141 if (ch == 0) {

142 return _errorToken();

143 } else if (ch == TokenChar.ASTERISK) {

144 if (_maybeEatChar(TokenChar.SLASH)) {

145 nesting--;

146 }

147 } else if (ch == TokenChar.SLASH) {

148 if (_maybeEatChar(TokenChar.ASTERISK)) {

149 nesting++;

150 }

151 }

152 } while (nesting > 0);

153

154 if (_inString) {

155 return next();

156 } else {

157 return _finishToken(TokenKind.COMMENT);

158 }

159 }

160

161 void eatDigits() {

162 while (_index < _text.length) {

163 if (TokenizerHelpers.isDigit(_text.codeUnitAt(_index))) {

164 _index++;

165 } else {

166 return;

167 }

168 }

169 }

170

171 static int _hexDigit(int c) {

172 if (c >= 48 /0/ && c <= 57 /9/) {

173 return c - 48;

174 } else if (c >= 97 /a/ && c <= 102 /f/) {

175 return c - 87;

176 } else if (c >= 65 /A/ && c <= 70 /F/) {

177 return c - 55;

178 } else {

179 return -1;

180 }

181 }

182

183 int readHex([int hexLength]) {

184 int maxIndex;

185 if (hexLength == null) {

186 maxIndex = _text.length - 1;

187 } else {

188 // TODO(jimhug): What if this is too long?

189 maxIndex = _index + hexLength;

190 if (maxIndex >= _text.length) return -1;

191 }

192 var result = 0;

193 while (_index < maxIndex) {

194 final digit = _hexDigit(_text.codeUnitAt(_index));

195 if (digit == -1) {

196 if (hexLength == null) {

197 return result;

198 } else {

199 return -1;

200 }

201 }

202 _hexDigit(_text.codeUnitAt(_index));

203 // Multiply by 16 rather than shift by 4 since that will result in a

204 // correct value for numbers that exceed the 32 bit precision of JS

205 // 'integers'.

206 // TODO: Figure out a better solution to integer truncation. Issue 638.

207 result = (result * 16) + digit;

208 _index++;

209 }

210

211 return result;

212 }

213

214 Token finishNumber() {

215 eatDigits();

216

217 if (_peekChar() == TokenChar.DOT) {

218 // Handle the case of 1.toString().

219 _nextChar();

220 if (TokenizerHelpers.isDigit(_peekChar())) {

221 eatDigits();

222 return finishNumberExtra(TokenKind.DOUBLE);

223 } else {

224 _index--;

225 }

226 }

227

228 return finishNumberExtra(TokenKind.INTEGER);

229 }

230

231 Token finishNumberExtra(int kind) {

232 if (_maybeEatChar(101 /e/) \|\| _maybeEatChar(69 /E/)) {

233 kind = TokenKind.DOUBLE;

234 _maybeEatChar(TokenKind.MINUS);

235 _maybeEatChar(TokenKind.PLUS);

236 eatDigits();

237 }

238 if (_peekChar() != 0 && TokenizerHelpers.isIdentifierStart(_peekChar())) {

239 _nextChar();

240 return _errorToken("illegal character in number");

241 }

242

243 return _finishToken(kind);

244 }

245

246 Token _makeStringToken(List<int> buf, bool isPart) {

247 final s = new String.fromCharCodes(buf);

248 final kind = isPart ? TokenKind.STRING_PART : TokenKind.STRING;

249 return new LiteralToken(kind, _file.span(_startIndex, _index), s);

250 }

251

252 Token makeIEFilter(int start, int end) {

253 var filter = _text.substring(start, end);

254 return new LiteralToken(TokenKind.STRING, _file.span(start, end), filter);

255 }

256

257 Token _makeRawStringToken(bool isMultiline) {

258 var s;

259 if (isMultiline) {

260 // Skip initial newline in multiline strings

261 int start = _startIndex + 4;

262 if (_text[start] == '\n') start++;

263 s = _text.substring(start, _index - 3);

264 } else {

265 s = _text.substring(_startIndex + 2, _index - 1);

266 }

267 return new LiteralToken(

268 TokenKind.STRING, _file.span(_startIndex, _index), s);

269 }

270

271 Token finishMultilineString(int quote) {

272 var buf = <int>[];

273 while (true) {

274 int ch = _nextChar();

275 if (ch == 0) {

276 return _errorToken();

277 } else if (ch == quote) {

278 if (_maybeEatChar(quote)) {

279 if (_maybeEatChar(quote)) {

280 return _makeStringToken(buf, false);

281 }

282 buf.add(quote);

283 }

284 buf.add(quote);

285 } else if (ch == TokenChar.BACKSLASH) {

286 var escapeVal = readEscapeSequence();

287 if (escapeVal == -1) {

288 return _errorToken("invalid hex escape sequence");

289 } else {

290 buf.add(escapeVal);

291 }

292 } else {

293 buf.add(ch);

294 }

295 }

296 }

297

298 Token finishString(int quote) {

299 if (_maybeEatChar(quote)) {

300 if (_maybeEatChar(quote)) {

301 // skip an initial newline

302 _maybeEatChar(TokenChar.NEWLINE);

303 return finishMultilineString(quote);

304 } else {

305 return _makeStringToken(new List<int>(), false);

306 }

307 }

308 return finishStringBody(quote);

309 }

310

311 Token finishRawString(int quote) {

312 if (_maybeEatChar(quote)) {

313 if (_maybeEatChar(quote)) {

314 return finishMultilineRawString(quote);

315 } else {

316 return _makeStringToken(<int>[], false);

317 }

318 }

319 while (true) {

320 int ch = _nextChar();

321 if (ch == quote) {

322 return _makeRawStringToken(false);

323 } else if (ch == 0) {

324 return _errorToken();

325 }

326 }

327 }

328

329 Token finishMultilineRawString(int quote) {

330 while (true) {

331 int ch = _nextChar();

332 if (ch == 0) {

333 return _errorToken();

334 } else if (ch == quote && _maybeEatChar(quote) && _maybeEatChar(quote)) {

335 return _makeRawStringToken(true);

336 }

337 }

338 }

339

340 Token finishStringBody(int quote) {

341 var buf = new List<int>();

342 while (true) {

343 int ch = _nextChar();

344 if (ch == quote) {

345 return _makeStringToken(buf, false);

346 } else if (ch == 0) {

347 return _errorToken();

348 } else if (ch == TokenChar.BACKSLASH) {

349 var escapeVal = readEscapeSequence();

350 if (escapeVal == -1) {

351 return _errorToken("invalid hex escape sequence");

352 } else {

353 buf.add(escapeVal);

354 }

355 } else {

356 buf.add(ch);

357 }

358 }

359 }

360

361 int readEscapeSequence() {

362 final ch = _nextChar();

363 int hexValue;

364 switch (ch) {

365 case 110 /n/ :

366 return TokenChar.NEWLINE;

367 case 114 /r/ :

368 return TokenChar.RETURN;

369 case 102 /f/ :

370 return TokenChar.FF;

371 case 98 /b/ :

372 return TokenChar.BACKSPACE;

373 case 116 /t/ :

374 return TokenChar.TAB;

375 case 118 /v/ :

376 return TokenChar.FF;

377 case 120 /x/ :

378 hexValue = readHex(2);

379 break;

380 case 117 /u/ :

381 if (_maybeEatChar(TokenChar.LBRACE)) {

382 hexValue = readHex();

383 if (!_maybeEatChar(TokenChar.RBRACE)) {

384 return -1;

385 }

386 } else {

387 hexValue = readHex(4);

388 }

389 break;

390 default:

391 return ch;

392 }

393

394 if (hexValue == -1) return -1;

395

396 // According to the Unicode standard the high and low surrogate halves

397 // used by UTF-16 (U+D800 through U+DFFF) and values above U+10FFFF

398 // are not legal Unicode values.

399 if (hexValue < 0xD800 \|\| hexValue > 0xDFFF && hexValue <= 0xFFFF) {

400 return hexValue;

401 } else if (hexValue <= 0x10FFFF) {

402 messages.error('unicode values greater than 2 bytes not implemented yet',

403 _file.span(_startIndex, _startIndex + 1));

404 return -1;

405 } else {

406 return -1;

407 }

408 }

409

410 Token finishDot() {

411 if (TokenizerHelpers.isDigit(_peekChar())) {

412 eatDigits();

413 return finishNumberExtra(TokenKind.DOUBLE);

414 } else {

415 return _finishToken(TokenKind.DOT);

416 }

417 }

418 }

OLD	NEW

« no previous file with comments | « csslib/lib/src/tokenizer.dart ('k') | csslib/lib/src/tokenkind.dart » ('j') | no next file with comments »