Source/core/css/CSSTokenizer-in.cpp - Issue 469183002: Move parser-related files in core/css to core/css/parser

Side by Side Diff: Source/core/css/CSSTokenizer-in.cpp

Issue 469183002: Move parser-related files in core/css to core/css/parser (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@pp2

Patch Set: Created 6 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 /*

2 * Copyright (C) 2003 Lars Knoll (knoll@kde.org)

3 * Copyright (C) 2005 Allan Sandfeld Jensen (kde@carewolf.com)

4 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Apple Inc. All rights reserved.

5 * Copyright (C) 2007 Nicholas Shanks <webkit@nickshanks.com>

6 * Copyright (C) 2008 Eric Seidel <eric@webkit.org>

7 * Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmo bile.com/)

8 * Copyright (C) 2012 Adobe Systems Incorporated. All rights reserved.

9 * Copyright (C) 2012 Intel Corporation. All rights reserved.

10 *

11 * This library is free software; you can redistribute it and/or

12 * modify it under the terms of the GNU Library General Public

13 * License as published by the Free Software Foundation; either

14 * version 2 of the License, or (at your option) any later version.

15 *

16 * This library is distributed in the hope that it will be useful,

17 * but WITHOUT ANY WARRANTY; without even the implied warranty of

18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

19 * Library General Public License for more details.

20 *

21 * You should have received a copy of the GNU Library General Public License

22 * along with this library; see the file COPYING.LIB. If not, write to

23 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,

24 * Boston, MA 02110-1301, USA.

25 */

26

27 #include "config.h"

28 #include "core/css/CSSTokenizer.h"

29

30 #include "core/css/CSSKeyframeRule.h"

31 #include "core/css/parser/BisonCSSParser.h"

32 #include "core/css/CSSParserValues.h"

33 #include "core/css/MediaQuery.h"

34 #include "core/css/StyleRule.h"

35 #include "core/html/parser/HTMLParserIdioms.h"

36 #include "core/svg/SVGParserUtilities.h"

37

38 namespace blink {

39

40 #include "core/CSSGrammar.h"

41

42 enum CharacterType {

43 // Types for the main switch.

44

45 // The first 4 types must be grouped together, as they

46 // represent the allowed chars in an identifier.

47 CharacterCaselessU,

48 CharacterIdentifierStart,

49 CharacterNumber,

50 CharacterDash,

51

52 CharacterOther,

53 CharacterNull,

54 CharacterWhiteSpace,

55 CharacterEndMediaQueryOrSupports,

56 CharacterEndNthChild,

57 CharacterQuote,

58 CharacterExclamationMark,

59 CharacterHashmark,

60 CharacterDollar,

61 CharacterAsterisk,

62 CharacterPlus,

63 CharacterDot,

64 CharacterSlash,

65 CharacterLess,

66 CharacterAt,

67 CharacterBackSlash,

68 CharacterXor,

69 CharacterVerticalBar,

70 CharacterTilde,

71 };

72

73 // 128 ASCII codes

74 static const CharacterType typesOfASCIICharacters[128] = {

75 /* 0 - Null */ CharacterNull,

76 /* 1 - Start of Heading */ CharacterOther,

77 /* 2 - Start of Text */ CharacterOther,

78 /* 3 - End of Text */ CharacterOther,

79 /* 4 - End of Transm. */ CharacterOther,

80 /* 5 - Enquiry */ CharacterOther,

81 /* 6 - Acknowledgment */ CharacterOther,

82 /* 7 - Bell */ CharacterOther,

83 /* 8 - Back Space */ CharacterOther,

84 /* 9 - Horizontal Tab */ CharacterWhiteSpace,

85 /* 10 - Line Feed */ CharacterWhiteSpace,

86 /* 11 - Vertical Tab */ CharacterOther,

87 /* 12 - Form Feed */ CharacterWhiteSpace,

88 /* 13 - Carriage Return */ CharacterWhiteSpace,

89 /* 14 - Shift Out */ CharacterOther,

90 /* 15 - Shift In */ CharacterOther,

91 /* 16 - Data Line Escape */ CharacterOther,

92 /* 17 - Device Control 1 */ CharacterOther,

93 /* 18 - Device Control 2 */ CharacterOther,

94 /* 19 - Device Control 3 */ CharacterOther,

95 /* 20 - Device Control 4 */ CharacterOther,

96 /* 21 - Negative Ack. */ CharacterOther,

97 /* 22 - Synchronous Idle */ CharacterOther,

98 /* 23 - End of Transmit */ CharacterOther,

99 /* 24 - Cancel */ CharacterOther,

100 /* 25 - End of Medium */ CharacterOther,

101 /* 26 - Substitute */ CharacterOther,

102 /* 27 - Escape */ CharacterOther,

103 /* 28 - File Separator */ CharacterOther,

104 /* 29 - Group Separator */ CharacterOther,

105 /* 30 - Record Separator */ CharacterOther,

106 /* 31 - Unit Separator */ CharacterOther,

107 /* 32 - Space */ CharacterWhiteSpace,

108 /* 33 - ! */ CharacterExclamationMark,

109 /* 34 - " */ CharacterQuote,

110 /* 35 - # */ CharacterHashmark,

111 /* 36 - $ */ CharacterDollar,

112 /* 37 - % */ CharacterOther,

113 /* 38 - & */ CharacterOther,

114 /* 39 - ' */ CharacterQuote,

115 /* 40 - ( */ CharacterOther,

116 /* 41 - ) */ CharacterEndNthChild,

117 /* 42 - * */ CharacterAsterisk,

118 /* 43 - + */ CharacterPlus,

119 /* 44 - , */ CharacterOther,

120 /* 45 - - */ CharacterDash,

121 /* 46 - . */ CharacterDot,

122 /* 47 - / */ CharacterSlash,

123 /* 48 - 0 */ CharacterNumber,

124 /* 49 - 1 */ CharacterNumber,

125 /* 50 - 2 */ CharacterNumber,

126 /* 51 - 3 */ CharacterNumber,

127 /* 52 - 4 */ CharacterNumber,

128 /* 53 - 5 */ CharacterNumber,

129 /* 54 - 6 */ CharacterNumber,

130 /* 55 - 7 */ CharacterNumber,

131 /* 56 - 8 */ CharacterNumber,

132 /* 57 - 9 */ CharacterNumber,

133 /* 58 - : */ CharacterOther,

134 /* 59 - ; */ CharacterEndMediaQueryOrSupports,

135 /* 60 - < */ CharacterLess,

136 /* 61 - = */ CharacterOther,

137 /* 62 - > */ CharacterOther,

138 /* 63 - ? */ CharacterOther,

139 /* 64 - @ */ CharacterAt,

140 /* 65 - A */ CharacterIdentifierStart,

141 /* 66 - B */ CharacterIdentifierStart,

142 /* 67 - C */ CharacterIdentifierStart,

143 /* 68 - D */ CharacterIdentifierStart,

144 /* 69 - E */ CharacterIdentifierStart,

145 /* 70 - F */ CharacterIdentifierStart,

146 /* 71 - G */ CharacterIdentifierStart,

147 /* 72 - H */ CharacterIdentifierStart,

148 /* 73 - I */ CharacterIdentifierStart,

149 /* 74 - J */ CharacterIdentifierStart,

150 /* 75 - K */ CharacterIdentifierStart,

151 /* 76 - L */ CharacterIdentifierStart,

152 /* 77 - M */ CharacterIdentifierStart,

153 /* 78 - N */ CharacterIdentifierStart,

154 /* 79 - O */ CharacterIdentifierStart,

155 /* 80 - P */ CharacterIdentifierStart,

156 /* 81 - Q */ CharacterIdentifierStart,

157 /* 82 - R */ CharacterIdentifierStart,

158 /* 83 - S */ CharacterIdentifierStart,

159 /* 84 - T */ CharacterIdentifierStart,

160 /* 85 - U */ CharacterCaselessU,

161 /* 86 - V */ CharacterIdentifierStart,

162 /* 87 - W */ CharacterIdentifierStart,

163 /* 88 - X */ CharacterIdentifierStart,

164 /* 89 - Y */ CharacterIdentifierStart,

165 /* 90 - Z */ CharacterIdentifierStart,

166 /* 91 - [ */ CharacterOther,

167 /* 92 - \ */ CharacterBackSlash,

168 /* 93 - ] */ CharacterOther,

169 /* 94 - ^ */ CharacterXor,

170 /* 95 - _ */ CharacterIdentifierStart,

171 /* 96 - ` */ CharacterOther,

172 /* 97 - a */ CharacterIdentifierStart,

173 /* 98 - b */ CharacterIdentifierStart,

174 /* 99 - c */ CharacterIdentifierStart,

175 /* 100 - d */ CharacterIdentifierStart,

176 /* 101 - e */ CharacterIdentifierStart,

177 /* 102 - f */ CharacterIdentifierStart,

178 /* 103 - g */ CharacterIdentifierStart,

179 /* 104 - h */ CharacterIdentifierStart,

180 /* 105 - i */ CharacterIdentifierStart,

181 /* 106 - j */ CharacterIdentifierStart,

182 /* 107 - k */ CharacterIdentifierStart,

183 /* 108 - l */ CharacterIdentifierStart,

184 /* 109 - m */ CharacterIdentifierStart,

185 /* 110 - n */ CharacterIdentifierStart,

186 /* 111 - o */ CharacterIdentifierStart,

187 /* 112 - p */ CharacterIdentifierStart,

188 /* 113 - q */ CharacterIdentifierStart,

189 /* 114 - r */ CharacterIdentifierStart,

190 /* 115 - s */ CharacterIdentifierStart,

191 /* 116 - t */ CharacterIdentifierStart,

192 /* 117 - u */ CharacterCaselessU,

193 /* 118 - v */ CharacterIdentifierStart,

194 /* 119 - w */ CharacterIdentifierStart,

195 /* 120 - x */ CharacterIdentifierStart,

196 /* 121 - y */ CharacterIdentifierStart,

197 /* 122 - z */ CharacterIdentifierStart,

198 /* 123 - { */ CharacterEndMediaQueryOrSupports,

199 /* 124 - \| */ CharacterVerticalBar,

200 /* 125 - } */ CharacterOther,

201 /* 126 - ~ */ CharacterTilde,

202 /* 127 - Delete */ CharacterOther,

203 };

204

205 // Utility functions for the CSS tokenizer.

206

207 template <typename CharacterType>

208 static inline bool isCSSLetter(CharacterType character)

209 {

210 return character >= 128 \|\| typesOfASCIICharacters[character] <= CharacterDas h;

211 }

212

213 template <typename CharacterType>

214 static inline bool isCSSEscape(CharacterType character)

215 {

216 return character >= ' ' && character != 127;

217 }

218

219 template <typename CharacterType>

220 static inline bool isURILetter(CharacterType character)

221 {

222 return (character >= '*' && character != 127) \|\| (character >= '#' && charac ter <= '&') \|\| character == '!';

223 }

224

225 template <typename CharacterType>

226 static inline bool isIdentifierStartAfterDash(CharacterType* currentCharacter)

227 {

228 return isASCIIAlpha(currentCharacter[0]) \|\| currentCharacter[0] == '_' \|\| cu rrentCharacter[0] >= 128

229 \|\| (currentCharacter[0] == '\\' && isCSSEscape(currentCharacter[1]));

230 }

231

232 template <typename CharacterType>

233 static inline bool isEqualToCSSIdentifier(CharacterType* cssString, const char* constantString)

234 {

235 // Compare an character memory data with a zero terminated string.

236 do {

237 // The input must be part of an identifier if constantChar or constStrin g

238 // contains '-'. Otherwise toASCIILowerUnchecked('\r') would be equal to '-'.

239 ASSERT((constantString >= 'a' && constantString <= 'z') \|\| *constantSt ring == '-');

240 ASSERT(constantString != '-' \|\| isCSSLetter(cssString));

241 if (toASCIILowerUnchecked(cssString++) != (constantString++))

242 return false;

243 } while (*constantString);

244 return true;

245 }

246

247 template <typename CharacterType>

248 static inline bool isEqualToCSSCaseSensitiveIdentifier(CharacterType* string, co nst char* constantString)

249 {

250 ASSERT(*constantString);

251

252 do {

253 if (string++ != constantString++)

254 return false;

255 } while (*constantString);

256 return true;

257 }

258

259 template <typename CharacterType>

260 static CharacterType* checkAndSkipEscape(CharacterType* currentCharacter)

261 {

262 // Returns with 0, if escape check is failed. Otherwise

263 // it returns with the following character.

264 ASSERT(*currentCharacter == '\\');

265

266 ++currentCharacter;

267 if (!isCSSEscape(*currentCharacter))

268 return 0;

269

270 if (isASCIIHexDigit(*currentCharacter)) {

271 int length = 6;

272

273 do {

274 ++currentCharacter;

275 } while (isASCIIHexDigit(*currentCharacter) && --length);

276

277 // Optional space after the escape sequence.

278 if (isHTMLSpace<CharacterType>(*currentCharacter))

279 ++currentCharacter;

280 return currentCharacter;

281 }

282 return currentCharacter + 1;

283 }

284

285 template <typename CharacterType>

286 static inline CharacterType* skipWhiteSpace(CharacterType* currentCharacter)

287 {

288 while (isHTMLSpace<CharacterType>(*currentCharacter))

289 ++currentCharacter;

290 return currentCharacter;

291 }

292

293 // Main CSS tokenizer functions.

294

295 template <>

296 inline LChar*& CSSTokenizer::currentCharacter<LChar>()

297 {

298 return m_currentCharacter8;

299 }

300

301 template <>

302 inline UChar*& CSSTokenizer::currentCharacter<UChar>()

303 {

304 return m_currentCharacter16;

305 }

306

307 UChar* CSSTokenizer::allocateStringBuffer16(size_t len)

308 {

309 // Allocates and returns a CSSTokenizer owned buffer for storing

310 // UTF-16 data. Used to get a suitable life span for UTF-16

311 // strings, identifiers and URIs created by the tokenizer.

312 OwnPtr<UChar[]> buffer = adoptArrayPtr(new UChar[len]);

313

314 UChar* bufferPtr = buffer.get();

315

316 m_cssStrings16.append(buffer.release());

317 return bufferPtr;

318 }

319

320 template <>

321 inline LChar* CSSTokenizer::dataStart<LChar>()

322 {

323 return m_dataStart8.get();

324 }

325

326 template <>

327 inline UChar* CSSTokenizer::dataStart<UChar>()

328 {

329 return m_dataStart16.get();

330 }

331

332 template <typename CharacterType>

333 inline CSSParserLocation CSSTokenizer::tokenLocation()

334 {

335 CSSParserLocation location;

336 location.token.init(tokenStart<CharacterType>(), currentCharacter<CharacterT ype>() - tokenStart<CharacterType>());

337 location.lineNumber = m_tokenStartLineNumber;

338 location.offset = tokenStart<CharacterType>() - dataStart<CharacterType>();

339 return location;

340 }

341

342 CSSParserLocation CSSTokenizer::currentLocation()

343 {

344 if (is8BitSource())

345 return tokenLocation<LChar>();

346 return tokenLocation<UChar>();

347 }

348

349 template <typename CharacterType>

350 inline bool CSSTokenizer::isIdentifierStart()

351 {

352 // Check whether an identifier is started.

353 return isIdentifierStartAfterDash((*currentCharacter<CharacterType>() != '-' ) ? currentCharacter<CharacterType>() : currentCharacter<CharacterType>() + 1);

354 }

355

356 enum CheckStringValidationMode {

357 AbortIfInvalid,

358 SkipInvalid

359 };

360

361 template <typename CharacterType>

362 static inline CharacterType* checkAndSkipString(CharacterType* currentCharacter, int quote, CheckStringValidationMode mode)

363 {

364 // If mode is AbortIfInvalid and the string check fails it returns

365 // with 0. Otherwise it returns with a pointer to the first

366 // character after the string.

367 while (true) {

368 if (UNLIKELY(*currentCharacter == quote)) {

369 // String parsing is successful.

370 return currentCharacter + 1;

371 }

372 if (UNLIKELY(!*currentCharacter)) {

373 // String parsing is successful up to end of input.

374 return currentCharacter;

375 }

376 if (mode == AbortIfInvalid && UNLIKELY(currentCharacter <= '\r' && (cu rrentCharacter == '\n' \|\| (*currentCharacter \| 0x1) == '\r'))) {

377 // String parsing is failed for character '\n', '\f' or '\r'.

378 return 0;

379 }

380

381 if (LIKELY(currentCharacter[0] != '\\')) {

382 ++currentCharacter;

383 } else if (currentCharacter[1] == '\n' \|\| currentCharacter[1] == '\f') {

384 currentCharacter += 2;

385 } else if (currentCharacter[1] == '\r') {

386 currentCharacter += currentCharacter[2] == '\n' ? 3 : 2;

387 } else {

388 CharacterType* next = checkAndSkipEscape(currentCharacter);

389 if (!next) {

390 if (mode == AbortIfInvalid)

391 return 0;

392 next = currentCharacter + 1;

393 }

394 currentCharacter = next;

395 }

396 }

397 }

398

399 template <typename CharacterType>

400 unsigned CSSTokenizer::parseEscape(CharacterType*& src)

401 {

402 ASSERT(*src == '\\' && isCSSEscape(src[1]));

403

404 unsigned unicode = 0;

405

406 ++src;

407 if (isASCIIHexDigit(*src)) {

408

409 int length = 6;

410

411 do {

412 unicode = (unicode << 4) + toASCIIHexValue(*src++);

413 } while (--length && isASCIIHexDigit(*src));

414

415 // Characters above 0x10ffff are not handled.

416 if (unicode > 0x10ffff)

417 unicode = 0xfffd;

418

419 // Optional space after the escape sequence.

420 if (isHTMLSpace<CharacterType>(*src))

421 ++src;

422

423 return unicode;

424 }

425

426 return *src++;

427 }

428

429 template <>

430 inline void CSSTokenizer::UnicodeToChars<LChar>(LChar*& result, unsigned unicode )

431 {

432 ASSERT(unicode <= 0xff);

433 *result = unicode;

434

435 ++result;

436 }

437

438 template <>

439 inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode )

440 {

441 // Replace unicode with a surrogate pairs when it is bigger than 0xffff

442 if (U16_LENGTH(unicode) == 2) {

443 *result++ = U16_LEAD(unicode);

444 *result = U16_TRAIL(unicode);

445 } else {

446 *result = unicode;

447 }

448

449 ++result;

450 }

451

452 template <typename SrcCharacterType>

453 size_t CSSTokenizer::peekMaxIdentifierLen(SrcCharacterType* src)

454 {

455 // The decoded form of an identifier (after resolving escape

456 // sequences) will not contain more characters (ASCII or UTF-16

457 // codepoints) than the input. This code can therefore ignore

458 // escape sequences completely.

459 SrcCharacterType* start = src;

460 do {

461 if (LIKELY(*src != '\\'))

462 src++;

463 else

464 parseEscape<SrcCharacterType>(src);

465 } while (isCSSLetter(src[0]) \|\| (src[0] == '\\' && isCSSEscape(src[1])));

466

467 return src - start;

468 }

469

470 template <typename SrcCharacterType, typename DestCharacterType>

471 inline bool CSSTokenizer::parseIdentifierInternal(SrcCharacterType& src, DestCh aracterType& result, bool& hasEscape)

472 {

473 hasEscape = false;

474 do {

475 if (LIKELY(*src != '\\')) {

476 result++ = src++;

477 } else {

478 hasEscape = true;

479 SrcCharacterType* savedEscapeStart = src;

480 unsigned unicode = parseEscape<SrcCharacterType>(src);

481 if (unicode > 0xff && sizeof(DestCharacterType) == 1) {

482 src = savedEscapeStart;

483 return false;

484 }

485 UnicodeToChars(result, unicode);

486 }

487 } while (isCSSLetter(src[0]) \|\| (src[0] == '\\' && isCSSEscape(src[1])));

488

489 return true;

490 }

491

492 template <typename CharacterType>

493 inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserStrin g& resultString, bool& hasEscape)

494 {

495 // If a valid identifier start is found, we can safely

496 // parse the identifier until the next invalid character.

497 ASSERT(isIdentifierStart<CharacterType>());

498

499 CharacterType* start = currentCharacter<CharacterType>();

500 if (UNLIKELY(!parseIdentifierInternal(currentCharacter<CharacterType>(), res ult, hasEscape))) {

501 // Found an escape we couldn't handle with 8 bits, copy what has been re cognized and continue

502 ASSERT(is8BitSource());

503 UChar* result16 = allocateStringBuffer16((result - start) + peekMaxIdent ifierLen(currentCharacter<CharacterType>()));

504 UChar* start16 = result16;

505 int i = 0;

506 for (; i < result - start; i++)

507 result16[i] = start[i];

508

509 result16 += i;

510

511 parseIdentifierInternal(currentCharacter<CharacterType>(), result16, has Escape);

512

513 resultString.init(start16, result16 - start16);

514

515 return;

516 }

517

518 resultString.init(start, result - start);

519 }

520

521 template <typename SrcCharacterType>

522 size_t CSSTokenizer::peekMaxStringLen(SrcCharacterType* src, UChar quote)

523 {

524 // The decoded form of a CSS string (after resolving escape

525 // sequences) will not contain more characters (ASCII or UTF-16

526 // codepoints) than the input. This code can therefore ignore

527 // escape sequences completely and just return the length of the

528 // input string (possibly including terminating quote if any).

529 SrcCharacterType* end = checkAndSkipString(src, quote, SkipInvalid);

530 return end ? end - src : 0;

531 }

532

533 template <typename SrcCharacterType, typename DestCharacterType>

534 inline bool CSSTokenizer::parseStringInternal(SrcCharacterType& src, DestCharac terType& result, UChar quote)

535 {

536 while (true) {

537 if (UNLIKELY(*src == quote)) {

538 // String parsing is done.

539 ++src;

540 return true;

541 }

542 if (UNLIKELY(!*src)) {

543 // String parsing is done, but don't advance pointer if at the end o f input.

544 return true;

545 }

546 if (LIKELY(src[0] != '\\')) {

547 result++ = src++;

548 } else if (src[1] == '\n' \|\| src[1] == '\f') {

549 src += 2;

550 } else if (src[1] == '\r') {

551 src += src[2] == '\n' ? 3 : 2;

552 } else {

553 SrcCharacterType* savedEscapeStart = src;

554 unsigned unicode = parseEscape<SrcCharacterType>(src);

555 if (unicode > 0xff && sizeof(DestCharacterType) == 1) {

556 src = savedEscapeStart;

557 return false;

558 }

559 UnicodeToChars(result, unicode);

560 }

561 }

562

563 return true;

564 }

565

566 template <typename CharacterType>

567 inline void CSSTokenizer::parseString(CharacterType*& result, CSSParserString& r esultString, UChar quote)

568 {

569 CharacterType* start = currentCharacter<CharacterType>();

570

571 if (UNLIKELY(!parseStringInternal(currentCharacter<CharacterType>(), result, quote))) {

572 // Found an escape we couldn't handle with 8 bits, copy what has been re cognized and continue

573 ASSERT(is8BitSource());

574 UChar* result16 = allocateStringBuffer16((result - start) + peekMaxStrin gLen(currentCharacter<CharacterType>(), quote));

575 UChar* start16 = result16;

576 int i = 0;

577 for (; i < result - start; i++)

578 result16[i] = start[i];

579

580 result16 += i;

581

582 parseStringInternal(currentCharacter<CharacterType>(), result16, quote);

583

584 resultString.init(start16, result16 - start16);

585 return;

586 }

587

588 resultString.init(start, result - start);

589 }

590

591 template <typename CharacterType>

592 inline bool CSSTokenizer::findURI(CharacterType& start, CharacterType& end, UC har& quote)

593 {

594 start = skipWhiteSpace(currentCharacter<CharacterType>());

595

596 if (start == '"' \|\| start == '\'') {

597 quote = *start++;

598 end = checkAndSkipString(start, quote, AbortIfInvalid);

599 if (!end)

600 return false;

601 } else {

602 quote = 0;

603 end = start;

604 while (isURILetter(*end)) {

605 if (LIKELY(*end != '\\')) {

606 ++end;

607 } else {

608 end = checkAndSkipEscape(end);

609 if (!end)

610 return false;

611 }

612 }

613 }

614

615 end = skipWhiteSpace(end);

616 if (*end != ')')

617 return false;

618

619 return true;

620 }

621

622 template <typename SrcCharacterType>

623 inline size_t CSSTokenizer::peekMaxURILen(SrcCharacterType* src, UChar quote)

624 {

625 // The decoded form of a URI (after resolving escape sequences)

626 // will not contain more characters (ASCII or UTF-16 codepoints)

627 // than the input. This code can therefore ignore escape sequences

628 // completely.

629 SrcCharacterType* start = src;

630 if (quote) {

631 ASSERT(quote == '"' \|\| quote == '\'');

632 return peekMaxStringLen(src, quote);

633 }

634

635 while (isURILetter(*src)) {

636 if (LIKELY(*src != '\\'))

637 src++;

638 else

639 parseEscape<SrcCharacterType>(src);

640 }

641

642 return src - start;

643 }

644

645 template <typename SrcCharacterType, typename DestCharacterType>

646 inline bool CSSTokenizer::parseURIInternal(SrcCharacterType& src, DestCharacter Type& dest, UChar quote)

647 {

648 if (quote) {

649 ASSERT(quote == '"' \|\| quote == '\'');

650 return parseStringInternal(src, dest, quote);

651 }

652

653 while (isURILetter(*src)) {

654 if (LIKELY(*src != '\\')) {

655 dest++ = src++;

656 } else {

657 unsigned unicode = parseEscape<SrcCharacterType>(src);

658 if (unicode > 0xff && sizeof(DestCharacterType) == 1)

659 return false;

660 UnicodeToChars(dest, unicode);

661 }

662 }

663

664 return true;

665 }

666

667 template <typename CharacterType>

668 inline void CSSTokenizer::parseURI(CSSParserString& string)

669 {

670 CharacterType* uriStart;

671 CharacterType* uriEnd;

672 UChar quote;

673 if (!findURI(uriStart, uriEnd, quote))

674 return;

675

676 CharacterType* dest = currentCharacter<CharacterType>() = uriStart;

677 if (LIKELY(parseURIInternal(currentCharacter<CharacterType>(), dest, quote)) ) {

678 string.init(uriStart, dest - uriStart);

679 } else {

680 // An escape sequence was encountered that can't be stored in 8 bits.

681 // Reset the current character to the start of the URI and re-parse with

682 // a 16-bit destination.

683 ASSERT(is8BitSource());

684 currentCharacter<CharacterType>() = uriStart;

685 UChar* result16 = allocateStringBuffer16(peekMaxURILen(currentCharacter< CharacterType>(), quote));

686 UChar* uriStart16 = result16;

687 bool result = parseURIInternal(currentCharacter<CharacterType>(), result 16, quote);

688 ASSERT_UNUSED(result, result);

689 string.init(uriStart16, result16 - uriStart16);

690 }

691

692 currentCharacter<CharacterType>() = uriEnd + 1;

693 m_token = URI;

694 }

695

696 template <typename CharacterType>

697 inline bool CSSTokenizer::parseUnicodeRange()

698 {

699 CharacterType* character = currentCharacter<CharacterType>() + 1;

700 int length = 6;

701 ASSERT(*currentCharacter<CharacterType>() == '+');

702

703 while (isASCIIHexDigit(*character) && length) {

704 ++character;

705 --length;

706 }

707

708 if (length && *character == '?') {

709 // At most 5 hex digit followed by a question mark.

710 do {

711 ++character;

712 --length;

713 } while (*character == '?' && length);

714 currentCharacter<CharacterType>() = character;

715 return true;

716 }

717

718 if (length < 6) {

719 // At least one hex digit.

720 if (character[0] == '-' && isASCIIHexDigit(character[1])) {

721 // Followed by a dash and a hex digit.

722 ++character;

723 length = 6;

724 do {

725 ++character;

726 } while (--length && isASCIIHexDigit(*character));

727 }

728 currentCharacter<CharacterType>() = character;

729 return true;

730 }

731 return false;

732 }

733

734 template <typename CharacterType>

735 bool CSSTokenizer::parseNthChild()

736 {

737 CharacterType* character = currentCharacter<CharacterType>();

738

739 while (isASCIIDigit(*character))

740 ++character;

741 if (isASCIIAlphaCaselessEqual(*character, 'n')) {

742 currentCharacter<CharacterType>() = character + 1;

743 return true;

744 }

745 return false;

746 }

747

748 template <typename CharacterType>

749 bool CSSTokenizer::parseNthChildExtra()

750 {

751 CharacterType* character = skipWhiteSpace(currentCharacter<CharacterType>()) ;

752 if (character != '+' && character != '-')

753 return false;

754

755 character = skipWhiteSpace(character + 1);

756 if (!isASCIIDigit(*character))

757 return false;

758

759 do {

760 ++character;

761 } while (isASCIIDigit(*character));

762

763 currentCharacter<CharacterType>() = character;

764 return true;

765 }

766

767 template <typename CharacterType>

768 inline bool CSSTokenizer::detectFunctionTypeToken(int length)

769 {

770 ASSERT(length > 0);

771 CharacterType* name = tokenStart<CharacterType>();

772 SWITCH(name, length) {

773 CASE("not") {

774 m_token = NOTFUNCTION;

775 return true;

776 }

777 CASE("url") {

778 m_token = URI;

779 return true;

780 }

781 CASE("cue") {

782 m_token = CUEFUNCTION;

783 return true;

784 }

785 CASE("calc") {

786 m_token = CALCFUNCTION;

787 return true;

788 }

789 CASE("host") {

790 m_token = HOSTFUNCTION;

791 return true;

792 }

793 CASE("host-context") {

794 m_token = HOSTCONTEXTFUNCTION;

795 return true;

796 }

797 CASE("nth-child") {

798 m_parsingMode = NthChildMode;

799 return true;

800 }

801 CASE("nth-of-type") {

802 m_parsingMode = NthChildMode;

803 return true;

804 }

805 CASE("nth-last-child") {

806 m_parsingMode = NthChildMode;

807 return true;

808 }

809 CASE("nth-last-of-type") {

810 m_parsingMode = NthChildMode;

811 return true;

812 }

813 }

814 return false;

815 }

816

817 template <typename CharacterType>

818 inline void CSSTokenizer::detectMediaQueryToken(int length)

819 {

820 ASSERT(m_parsingMode == MediaQueryMode);

821 CharacterType* name = tokenStart<CharacterType>();

822

823 SWITCH(name, length) {

824 CASE("and") {

825 m_token = MEDIA_AND;

826 }

827 CASE("not") {

828 m_token = MEDIA_NOT;

829 }

830 CASE("only") {

831 m_token = MEDIA_ONLY;

832 }

833 CASE("or") {

834 m_token = MEDIA_OR;

835 }

836 }

837 }

838

839 template <typename CharacterType>

840 inline void CSSTokenizer::detectNumberToken(CharacterType* type, int length)

841 {

842 ASSERT(length > 0);

843

844 SWITCH(type, length) {

845 CASE("cm") {

846 m_token = CMS;

847 }

848 CASE("ch") {

849 m_token = CHS;

850 }

851 CASE("deg") {

852 m_token = DEGS;

853 }

854 CASE("dppx") {

855 // There is a discussion about the name of this unit on www-style.

856 // Keep this compile time guard in place until that is resolved.

857 // http://lists.w3.org/Archives/Public/www-style/2012May/0915.html

858 m_token = DPPX;

859 }

860 CASE("dpcm") {

861 m_token = DPCM;

862 }

863 CASE("dpi") {

864 m_token = DPI;

865 }

866 CASE("em") {

867 m_token = EMS;

868 }

869 CASE("ex") {

870 m_token = EXS;

871 }

872 CASE("fr") {

873 m_token = FR;

874 }

875 CASE("grad") {

876 m_token = GRADS;

877 }

878 CASE("hz") {

879 m_token = HERTZ;

880 }

881 CASE("in") {

882 m_token = INS;

883 }

884 CASE("khz") {

885 m_token = KHERTZ;

886 }

887 CASE("mm") {

888 m_token = MMS;

889 }

890 CASE("ms") {

891 m_token = MSECS;

892 }

893 CASE("px") {

894 m_token = PXS;

895 }

896 CASE("pt") {

897 m_token = PTS;

898 }

899 CASE("pc") {

900 m_token = PCS;

901 }

902 CASE("rad") {

903 m_token = RADS;

904 }

905 CASE("rem") {

906 m_token = REMS;

907 }

908 CASE("s") {

909 m_token = SECS;

910 }

911 CASE("turn") {

912 m_token = TURNS;

913 }

914 CASE("vw") {

915 m_token = VW;

916 }

917 CASE("vh") {

918 m_token = VH;

919 }

920 CASE("vmin") {

921 m_token = VMIN;

922 }

923 CASE("vmax") {

924 m_token = VMAX;

925 }

926 CASE("__qem") {

927 m_token = QEMS;

928 }

929 }

930 }

931

932 template <typename CharacterType>

933 inline void CSSTokenizer::detectDashToken(int length)

934 {

935 CharacterType* name = tokenStart<CharacterType>();

936

937 // Ignore leading dash.

938 ++name;

939 --length;

940

941 SWITCH(name, length) {

942 CASE("webkit-any") {

943 m_token = ANYFUNCTION;

944 }

945 CASE("webkit-calc") {

946 m_token = CALCFUNCTION;

947 }

948 }

949 }

950

951 template <typename CharacterType>

952 inline void CSSTokenizer::detectAtToken(int length, bool hasEscape)

953 {

954 CharacterType* name = tokenStart<CharacterType>();

955 ASSERT(name[0] == '@' && length >= 2);

956

957 // Ignore leading @.

958 ++name;

959 --length;

960

961 // charset, font-face, import, media, namespace, page, supports,

962 // -webkit-keyframes, keyframes, and -webkit-mediaquery are not affected by hasEscape.

963 SWITCH(name, length) {

964 CASE("bottom-left") {

965 if (LIKELY(!hasEscape))

966 m_token = BOTTOMLEFT_SYM;

967 }

968 CASE("bottom-right") {

969 if (LIKELY(!hasEscape))

970 m_token = BOTTOMRIGHT_SYM;

971 }

972 CASE("bottom-center") {

973 if (LIKELY(!hasEscape))

974 m_token = BOTTOMCENTER_SYM;

975 }

976 CASE("bottom-left-corner") {

977 if (LIKELY(!hasEscape))

978 m_token = BOTTOMLEFTCORNER_SYM;

979 }

980 CASE("bottom-right-corner") {

981 if (LIKELY(!hasEscape))

982 m_token = BOTTOMRIGHTCORNER_SYM;

983 }

984 CASE("charset") {

985 if (name - 1 == dataStart<CharacterType>())

986 m_token = CHARSET_SYM;

987 }

988 CASE("font-face") {

989 m_token = FONT_FACE_SYM;

990 }

991 CASE("import") {

992 m_parsingMode = MediaQueryMode;

993 m_token = IMPORT_SYM;

994 }

995 CASE("keyframes") {

996 if (RuntimeEnabledFeatures::cssAnimationUnprefixedEnabled())

997 m_token = KEYFRAMES_SYM;

998 }

999 CASE("left-top") {

1000 if (LIKELY(!hasEscape))

1001 m_token = LEFTTOP_SYM;

1002 }

1003 CASE("left-middle") {

1004 if (LIKELY(!hasEscape))

1005 m_token = LEFTMIDDLE_SYM;

1006 }

1007 CASE("left-bottom") {

1008 if (LIKELY(!hasEscape))

1009 m_token = LEFTBOTTOM_SYM;

1010 }

1011 CASE("media") {

1012 m_parsingMode = MediaQueryMode;

1013 m_token = MEDIA_SYM;

1014 }

1015 CASE("namespace") {

1016 m_token = NAMESPACE_SYM;

1017 }

1018 CASE("page") {

1019 m_token = PAGE_SYM;

1020 }

1021 CASE("right-top") {

1022 if (LIKELY(!hasEscape))

1023 m_token = RIGHTTOP_SYM;

1024 }

1025 CASE("right-middle") {

1026 if (LIKELY(!hasEscape))

1027 m_token = RIGHTMIDDLE_SYM;

1028 }

1029 CASE("right-bottom") {

1030 if (LIKELY(!hasEscape))

1031 m_token = RIGHTBOTTOM_SYM;

1032 }

1033 CASE("supports") {

1034 m_parsingMode = SupportsMode;

1035 m_token = SUPPORTS_SYM;

1036 }

1037 CASE("top-left") {

1038 if (LIKELY(!hasEscape))

1039 m_token = TOPLEFT_SYM;

1040 }

1041 CASE("top-right") {

1042 if (LIKELY(!hasEscape))

1043 m_token = TOPRIGHT_SYM;

1044 }

1045 CASE("top-center") {

1046 if (LIKELY(!hasEscape))

1047 m_token = TOPCENTER_SYM;

1048 }

1049 CASE("top-left-corner") {

1050 if (LIKELY(!hasEscape))

1051 m_token = TOPLEFTCORNER_SYM;

1052 }

1053 CASE("top-right-corner") {

1054 if (LIKELY(!hasEscape))

1055 m_token = TOPRIGHTCORNER_SYM;

1056 }

1057 CASE("viewport") {

1058 m_token = VIEWPORT_RULE_SYM;

1059 }

1060 CASE("-internal-rule") {

1061 if (LIKELY(!hasEscape && m_internal))

1062 m_token = INTERNAL_RULE_SYM;

1063 }

1064 CASE("-internal-decls") {

1065 if (LIKELY(!hasEscape && m_internal))

1066 m_token = INTERNAL_DECLS_SYM;

1067 }

1068 CASE("-internal-value") {

1069 if (LIKELY(!hasEscape && m_internal))

1070 m_token = INTERNAL_VALUE_SYM;

1071 }

1072 CASE("-webkit-keyframes") {

1073 m_token = WEBKIT_KEYFRAMES_SYM;

1074 }

1075 CASE("-internal-selector") {

1076 if (LIKELY(!hasEscape && m_internal))

1077 m_token = INTERNAL_SELECTOR_SYM;

1078 }

1079 CASE("-internal-medialist") {

1080 if (!m_internal)

1081 return;

1082 m_parsingMode = MediaQueryMode;

1083 m_token = INTERNAL_MEDIALIST_SYM;

1084 }

1085 CASE("-internal-keyframe-rule") {

1086 if (LIKELY(!hasEscape && m_internal))

1087 m_token = INTERNAL_KEYFRAME_RULE_SYM;

1088 }

1089 CASE("-internal-keyframe-key-list") {

1090 if (!m_internal)

1091 return;

1092 m_token = INTERNAL_KEYFRAME_KEY_LIST_SYM;

1093 }

1094 CASE("-internal-supports-condition") {

1095 if (!m_internal)

1096 return;

1097 m_parsingMode = SupportsMode;

1098 m_token = INTERNAL_SUPPORTS_CONDITION_SYM;

1099 }

1100 }

1101 }

1102

1103 template <typename CharacterType>

1104 inline void CSSTokenizer::detectSupportsToken(int length)

1105 {

1106 ASSERT(m_parsingMode == SupportsMode);

1107 CharacterType* name = tokenStart<CharacterType>();

1108

1109 SWITCH(name, length) {

1110 CASE("or") {

1111 m_token = SUPPORTS_OR;

1112 }

1113 CASE("and") {

1114 m_token = SUPPORTS_AND;

1115 }

1116 CASE("not") {

1117 m_token = SUPPORTS_NOT;

1118 }

1119 }

1120 }

1121

1122 template <typename SrcCharacterType>

1123 int CSSTokenizer::realLex(void* yylvalWithoutType)

1124 {

1125 YYSTYPE* yylval = static_cast<YYSTYPE*>(yylvalWithoutType);

1126 // Write pointer for the next character.

1127 SrcCharacterType* result;

1128 CSSParserString resultString;

1129 bool hasEscape;

1130

1131 // The input buffer is terminated by a \0 character, so

1132 // it is safe to read one character ahead of a known non-null.

1133 #if ENABLE(ASSERT)

1134 // In debug we check with an ASSERT that the length is > 0 for string types.

1135 yylval->string.clear();

1136 #endif

1137

1138 restartAfterComment:

1139 result = currentCharacter<SrcCharacterType>();

1140 setTokenStart(result);

1141 m_tokenStartLineNumber = m_lineNumber;

1142 m_token = *currentCharacter<SrcCharacterType>();

1143 ++currentCharacter<SrcCharacterType>();

1144

1145 switch ((m_token <= 127) ? typesOfASCIICharacters[m_token] : CharacterIdenti fierStart) {

1146 case CharacterCaselessU:

1147 if (UNLIKELY(*currentCharacter<SrcCharacterType>() == '+')) {

1148 if (parseUnicodeRange<SrcCharacterType>()) {

1149 m_token = UNICODERANGE;

1150 yylval->string.init(tokenStart<SrcCharacterType>(), currentChara cter<SrcCharacterType>() - tokenStart<SrcCharacterType>());

1151 break;

1152 }

1153 }

1154 // Fall through to CharacterIdentifierStart.

1155

1156 case CharacterIdentifierStart:

1157 --currentCharacter<SrcCharacterType>();

1158 parseIdentifier(result, yylval->string, hasEscape);

1159 m_token = IDENT;

1160

1161 if (UNLIKELY(*currentCharacter<SrcCharacterType>() == '(')) {

1162 if (m_parsingMode == SupportsMode && !hasEscape) {

1163 detectSupportsToken<SrcCharacterType>(result - tokenStart<SrcCha racterType>());

1164 if (m_token != IDENT)

1165 break;

1166 }

1167

1168 m_token = FUNCTION;

1169 if (!hasEscape)

1170 detectFunctionTypeToken<SrcCharacterType>(result - tokenStart<Sr cCharacterType>());

1171

1172 // Skip parenthesis

1173 ++currentCharacter<SrcCharacterType>();

1174 ++result;

1175 ++yylval->string.m_length;

1176

1177 if (m_token == URI) {

1178 m_token = FUNCTION;

1179 // Check whether it is really an URI.

1180 if (yylval->string.is8Bit())

1181 parseURI<LChar>(yylval->string);

1182 else

1183 parseURI<UChar>(yylval->string);

1184 }

1185 } else if (UNLIKELY(m_parsingMode != NormalMode) && !hasEscape) {

1186 if (m_parsingMode == MediaQueryMode) {

1187 detectMediaQueryToken<SrcCharacterType>(result - tokenStart<SrcC haracterType>());

1188 } else if (m_parsingMode == SupportsMode) {

1189 detectSupportsToken<SrcCharacterType>(result - tokenStart<SrcCha racterType>());

1190 } else if (m_parsingMode == NthChildMode && isASCIIAlphaCaselessEqua l(tokenStart<SrcCharacterType>()[0], 'n')) {

1191 if (result - tokenStart<SrcCharacterType>() == 1) {

1192 // String "n" is IDENT but "n+1" is NTH.

1193 if (parseNthChildExtra<SrcCharacterType>()) {

1194 m_token = NTH;

1195 yylval->string.m_length = currentCharacter<SrcCharacterT ype>() - tokenStart<SrcCharacterType>();

1196 }

1197 } else if (result - tokenStart<SrcCharacterType>() >= 2 && token Start<SrcCharacterType>()[1] == '-') {

1198 // String "n-" is IDENT but "n-1" is NTH.

1199 // Set currentCharacter to '-' to continue parsing.

1200 SrcCharacterType* nextCharacter = result;

1201 currentCharacter<SrcCharacterType>() = tokenStart<SrcCharact erType>() + 1;

1202 if (parseNthChildExtra<SrcCharacterType>()) {

1203 m_token = NTH;

1204 yylval->string.setLength(currentCharacter<SrcCharacterTy pe>() - tokenStart<SrcCharacterType>());

1205 } else {

1206 // Revert the change to currentCharacter if unsuccessful .

1207 currentCharacter<SrcCharacterType>() = nextCharacter;

1208 }

1209 }

1210 }

1211 }

1212 break;

1213

1214 case CharacterDot:

1215 if (!isASCIIDigit(currentCharacter<SrcCharacterType>()[0]))

1216 break;

1217 // Fall through to CharacterNumber.

1218

1219 case CharacterNumber: {

1220 bool dotSeen = (m_token == '.');

1221

1222 while (true) {

1223 if (!isASCIIDigit(currentCharacter<SrcCharacterType>()[0])) {

1224 // Only one dot is allowed for a number,

1225 // and it must be followed by a digit.

1226 if (currentCharacter<SrcCharacterType>()[0] != '.' \|\| dotSeen \|\| !isASCIIDigit(currentCharacter<SrcCharacterType>()[1]))

1227 break;

1228 dotSeen = true;

1229 }

1230 ++currentCharacter<SrcCharacterType>();

1231 }

1232

1233 if (UNLIKELY(m_parsingMode == NthChildMode) && !dotSeen && isASCIIAlphaC aselessEqual(*currentCharacter<SrcCharacterType>(), 'n')) {

1234 // "[0-9]+n" is always an NthChild.

1235 ++currentCharacter<SrcCharacterType>();

1236 parseNthChildExtra<SrcCharacterType>();

1237 m_token = NTH;

1238 yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter <SrcCharacterType>() - tokenStart<SrcCharacterType>());

1239 break;

1240 }

1241

1242 // Use SVG parser for numbers on SVG presentation attributes.

1243 if (isSVGNumberParsingEnabledForMode(m_parser.m_context.mode())) {

1244 // We need to take care of units like 'em' or 'ex'.

1245 SrcCharacterType* character = currentCharacter<SrcCharacterType>();

1246 if (isASCIIAlphaCaselessEqual(*character, 'e')) {

1247 ASSERT(character - tokenStart<SrcCharacterType>() > 0);

1248 ++character;

1249 if (character == '-' \|\| character == '+' \|\| isASCIIDigit(*char acter)) {

1250 ++character;

1251 while (isASCIIDigit(*character))

1252 ++character;

1253 // Use FLOATTOKEN if the string contains exponents.

1254 dotSeen = true;

1255 currentCharacter<SrcCharacterType>() = character;

1256 }

1257 }

1258 if (!parseSVGNumber(tokenStart<SrcCharacterType>(), character - toke nStart<SrcCharacterType>(), yylval->number))

1259 break;

1260 } else {

1261 yylval->number = charactersToDouble(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());

1262 }

1263

1264 // Type of the function.

1265 if (isIdentifierStart<SrcCharacterType>()) {

1266 SrcCharacterType* type = currentCharacter<SrcCharacterType>();

1267 result = currentCharacter<SrcCharacterType>();

1268

1269 parseIdentifier(result, resultString, hasEscape);

1270

1271 m_token = DIMEN;

1272 if (!hasEscape)

1273 detectNumberToken(type, currentCharacter<SrcCharacterType>() - t ype);

1274

1275 if (m_token == DIMEN) {

1276 // The decoded number is overwritten, but this is intentional.

1277 yylval->string.init(tokenStart<SrcCharacterType>(), currentChara cter<SrcCharacterType>() - tokenStart<SrcCharacterType>());

1278 }

1279 } else if (*currentCharacter<SrcCharacterType>() == '%') {

1280 // Although the CSS grammar says {num}% we follow

1281 // webkit at the moment which uses {num}%+.

1282 do {

1283 ++currentCharacter<SrcCharacterType>();

1284 } while (*currentCharacter<SrcCharacterType>() == '%');

1285 m_token = PERCENTAGE;

1286 } else {

1287 m_token = dotSeen ? FLOATTOKEN : INTEGER;

1288 }

1289 break;

1290 }

1291

1292 case CharacterDash:

1293 if (isIdentifierStartAfterDash(currentCharacter<SrcCharacterType>())) {

1294 --currentCharacter<SrcCharacterType>();

1295 parseIdentifier(result, resultString, hasEscape);

1296 m_token = IDENT;

1297

1298 if (*currentCharacter<SrcCharacterType>() == '(') {

1299 m_token = FUNCTION;

1300 if (!hasEscape)

1301 detectDashToken<SrcCharacterType>(result - tokenStart<SrcCha racterType>());

1302 ++currentCharacter<SrcCharacterType>();

1303 ++result;

1304 } else if (UNLIKELY(m_parsingMode == NthChildMode) && !hasEscape && isASCIIAlphaCaselessEqual(tokenStart<SrcCharacterType>()[1], 'n')) {

1305 if (result - tokenStart<SrcCharacterType>() == 2) {

1306 // String "-n" is IDENT but "-n+1" is NTH.

1307 if (parseNthChildExtra<SrcCharacterType>()) {

1308 m_token = NTH;

1309 result = currentCharacter<SrcCharacterType>();

1310 }

1311 } else if (result - tokenStart<SrcCharacterType>() >= 3 && token Start<SrcCharacterType>()[2] == '-') {

1312 // String "-n-" is IDENT but "-n-1" is NTH.

1313 // Set currentCharacter to second '-' of '-n-' to continue p arsing.

1314 SrcCharacterType* nextCharacter = result;

1315 currentCharacter<SrcCharacterType>() = tokenStart<SrcCharact erType>() + 2;

1316 if (parseNthChildExtra<SrcCharacterType>()) {

1317 m_token = NTH;

1318 result = currentCharacter<SrcCharacterType>();

1319 } else {

1320 // Revert the change to currentCharacter if unsuccessful .

1321 currentCharacter<SrcCharacterType>() = nextCharacter;

1322 }

1323 }

1324 }

1325 resultString.setLength(result - tokenStart<SrcCharacterType>());

1326 yylval->string = resultString;

1327 } else if (currentCharacter<SrcCharacterType>()[0] == '-' && currentChar acter<SrcCharacterType>()[1] == '>') {

1328 currentCharacter<SrcCharacterType>() += 2;

1329 m_token = SGML_CD;

1330 } else if (UNLIKELY(m_parsingMode == NthChildMode)) {

1331 // "-[0-9]+n" is always an NthChild.

1332 if (parseNthChild<SrcCharacterType>()) {

1333 parseNthChildExtra<SrcCharacterType>();

1334 m_token = NTH;

1335 yylval->string.init(tokenStart<SrcCharacterType>(), currentChara cter<SrcCharacterType>() - tokenStart<SrcCharacterType>());

1336 }

1337 }

1338 break;

1339

1340 case CharacterOther:

1341 // m_token is simply the current character.

1342 break;

1343

1344 case CharacterNull:

1345 // Do not advance pointer at the end of input.

1346 --currentCharacter<SrcCharacterType>();

1347 break;

1348

1349 case CharacterWhiteSpace:

1350 m_token = WHITESPACE;

1351 // Might start with a '\n'.

1352 --currentCharacter<SrcCharacterType>();

1353 do {

1354 if (*currentCharacter<SrcCharacterType>() == '\n')

1355 ++m_lineNumber;

1356 ++currentCharacter<SrcCharacterType>();

1357 } while (currentCharacter<SrcCharacterType>() <= ' ' && (typesOfASCIICh aracters[currentCharacter<SrcCharacterType>()] == CharacterWhiteSpace));

1358 break;

1359

1360 case CharacterEndMediaQueryOrSupports:

1361 if (m_parsingMode == MediaQueryMode \|\| m_parsingMode == SupportsMode)

1362 m_parsingMode = NormalMode;

1363 break;

1364

1365 case CharacterEndNthChild:

1366 if (m_parsingMode == NthChildMode)

1367 m_parsingMode = NormalMode;

1368 break;

1369

1370 case CharacterQuote:

1371 if (checkAndSkipString(currentCharacter<SrcCharacterType>(), m_token, Ab ortIfInvalid)) {

1372 ++result;

1373 parseString<SrcCharacterType>(result, yylval->string, m_token);

1374 m_token = STRING;

1375 }

1376 break;

1377

1378 case CharacterExclamationMark: {

1379 SrcCharacterType* start = skipWhiteSpace(currentCharacter<SrcCharacterTy pe>());

1380 if (isEqualToCSSIdentifier(start, "important")) {

1381 m_token = IMPORTANT_SYM;

1382 currentCharacter<SrcCharacterType>() = start + 9;

1383 }

1384 break;

1385 }

1386

1387 case CharacterHashmark: {

1388 SrcCharacterType* start = currentCharacter<SrcCharacterType>();

1389 result = currentCharacter<SrcCharacterType>();

1390

1391 if (isASCIIDigit(*currentCharacter<SrcCharacterType>())) {

1392 // This must be a valid hex number token.

1393 do {

1394 ++currentCharacter<SrcCharacterType>();

1395 } while (isASCIIHexDigit(*currentCharacter<SrcCharacterType>()));

1396 m_token = HEX;

1397 yylval->string.init(start, currentCharacter<SrcCharacterType>() - st art);

1398 } else if (isIdentifierStart<SrcCharacterType>()) {

1399 m_token = IDSEL;

1400 parseIdentifier(result, yylval->string, hasEscape);

1401 if (!hasEscape) {

1402 // Check whether the identifier is also a valid hex number.

1403 SrcCharacterType* current = start;

1404 m_token = HEX;

1405 do {

1406 if (!isASCIIHexDigit(*current)) {

1407 m_token = IDSEL;

1408 break;

1409 }

1410 ++current;

1411 } while (current < result);

1412 }

1413 }

1414 break;

1415 }

1416

1417 case CharacterSlash:

1418 // Ignore comments. They are not even considered as white spaces.

1419 if (currentCharacter<SrcCharacterType>() == '') {

1420 const CSSParserLocation startLocation = currentLocation();

1421 if (m_parser.m_observer) {

1422 unsigned startOffset = currentCharacter<SrcCharacterType>() - da taStart<SrcCharacterType>() - 1; // Start with a slash.

1423 m_parser.m_observer->startComment(startOffset - m_parsedTextPref ixLength);

1424 }

1425 ++currentCharacter<SrcCharacterType>();

1426 while (currentCharacter<SrcCharacterType>()[0] != '*' \|\| currentChar acter<SrcCharacterType>()[1] != '/') {

1427 if (*currentCharacter<SrcCharacterType>() == '\n')

1428 ++m_lineNumber;

1429 if (*currentCharacter<SrcCharacterType>() == '\0') {

1430 // Unterminated comments are simply ignored.

1431 currentCharacter<SrcCharacterType>() -= 2;

1432 m_parser.reportError(startLocation, UnterminatedCommentCSSEr ror);

1433 break;

1434 }

1435 ++currentCharacter<SrcCharacterType>();

1436 }

1437 currentCharacter<SrcCharacterType>() += 2;

1438 if (m_parser.m_observer) {

1439 unsigned endOffset = currentCharacter<SrcCharacterType>() - data Start<SrcCharacterType>();

1440 unsigned userTextEndOffset = static_cast<unsigned>(m_length - 1 - m_parsedTextSuffixLength);

1441 m_parser.m_observer->endComment(std::min(endOffset, userTextEndO ffset) - m_parsedTextPrefixLength);

1442 }

1443 goto restartAfterComment;

1444 }

1445 break;

1446

1447 case CharacterDollar:

1448 if (*currentCharacter<SrcCharacterType>() == '=') {

1449 ++currentCharacter<SrcCharacterType>();

1450 m_token = ENDSWITH;

1451 }

1452 break;

1453

1454 case CharacterAsterisk:

1455 if (*currentCharacter<SrcCharacterType>() == '=') {

1456 ++currentCharacter<SrcCharacterType>();

1457 m_token = CONTAINS;

1458 }

1459 break;

1460

1461 case CharacterPlus:

1462 if (UNLIKELY(m_parsingMode == NthChildMode)) {

1463 // Simplest case. "+[0-9]*n" is always NthChild.

1464 if (parseNthChild<SrcCharacterType>()) {

1465 parseNthChildExtra<SrcCharacterType>();

1466 m_token = NTH;

1467 yylval->string.init(tokenStart<SrcCharacterType>(), currentChara cter<SrcCharacterType>() - tokenStart<SrcCharacterType>());

1468 }

1469 }

1470 break;

1471

1472 case CharacterLess:

1473 if (currentCharacter<SrcCharacterType>()[0] == '!' && currentCharacter<S rcCharacterType>()[1] == '-' && currentCharacter<SrcCharacterType>()[2] == '-') {

1474 currentCharacter<SrcCharacterType>() += 3;

1475 m_token = SGML_CD;

1476 }

1477 break;

1478

1479 case CharacterAt:

1480 if (isIdentifierStart<SrcCharacterType>()) {

1481 m_token = ATKEYWORD;

1482 ++result;

1483 parseIdentifier(result, resultString, hasEscape);

1484 // The standard enables unicode escapes in at-rules. In this case on ly the resultString will contain the

1485 // correct identifier, hence we have to use it to determine its leng th instead of the usual pointer arithmetic.

1486 detectAtToken<SrcCharacterType>(resultString.length() + 1, hasEscape );

1487 }

1488 break;

1489

1490 case CharacterBackSlash:

1491 if (isCSSEscape(*currentCharacter<SrcCharacterType>())) {

1492 --currentCharacter<SrcCharacterType>();

1493 parseIdentifier(result, yylval->string, hasEscape);

1494 m_token = IDENT;

1495 }

1496 break;

1497

1498 case CharacterXor:

1499 if (*currentCharacter<SrcCharacterType>() == '=') {

1500 ++currentCharacter<SrcCharacterType>();

1501 m_token = BEGINSWITH;

1502 }

1503 break;

1504

1505 case CharacterVerticalBar:

1506 if (*currentCharacter<SrcCharacterType>() == '=') {

1507 ++currentCharacter<SrcCharacterType>();

1508 m_token = DASHMATCH;

1509 }

1510 break;

1511

1512 case CharacterTilde:

1513 if (*currentCharacter<SrcCharacterType>() == '=') {

1514 ++currentCharacter<SrcCharacterType>();

1515 m_token = INCLUDES;

1516 }

1517 break;

1518

1519 default:

1520 ASSERT_NOT_REACHED();

1521 break;

1522 }

1523

1524 return m_token;

1525 }

1526

1527 template <>

1528 inline void CSSTokenizer::setTokenStart<LChar>(LChar* tokenStart)

1529 {

1530 m_tokenStart.ptr8 = tokenStart;

1531 }

1532

1533 template <>

1534 inline void CSSTokenizer::setTokenStart<UChar>(UChar* tokenStart)

1535 {

1536 m_tokenStart.ptr16 = tokenStart;

1537 }

1538

1539 void CSSTokenizer::setupTokenizer(const char* prefix, unsigned prefixLength, con st String& string, const char* suffix, unsigned suffixLength)

1540 {

1541 m_parsedTextPrefixLength = prefixLength;

1542 m_parsedTextSuffixLength = suffixLength;

1543 unsigned stringLength = string.length();

1544 unsigned length = stringLength + m_parsedTextPrefixLength + m_parsedTextSuff ixLength + 1;

1545 m_length = length;

1546

1547 if (!stringLength \|\| string.is8Bit()) {

1548 m_dataStart8 = adoptArrayPtr(new LChar[length]);

1549 for (unsigned i = 0; i < m_parsedTextPrefixLength; i++)

1550 m_dataStart8[i] = prefix[i];

1551

1552 if (stringLength)

1553 memcpy(m_dataStart8.get() + m_parsedTextPrefixLength, string.charact ers8(), stringLength * sizeof(LChar));

1554

1555 unsigned start = m_parsedTextPrefixLength + stringLength;

1556 unsigned end = start + suffixLength;

1557 for (unsigned i = start; i < end; i++)

1558 m_dataStart8[i] = suffix[i - start];

1559

1560 m_dataStart8[length - 1] = 0;

1561

1562 m_is8BitSource = true;

1563 m_currentCharacter8 = m_dataStart8.get();

1564 m_currentCharacter16 = 0;

1565 setTokenStart<LChar>(m_currentCharacter8);

1566 m_lexFunc = &CSSTokenizer::realLex<LChar>;

1567 return;

1568 }

1569

1570 m_dataStart16 = adoptArrayPtr(new UChar[length]);

1571 for (unsigned i = 0; i < m_parsedTextPrefixLength; i++)

1572 m_dataStart16[i] = prefix[i];

1573

1574 ASSERT(stringLength);

1575 memcpy(m_dataStart16.get() + m_parsedTextPrefixLength, string.characters16() , stringLength * sizeof(UChar));

1576

1577 unsigned start = m_parsedTextPrefixLength + stringLength;

1578 unsigned end = start + suffixLength;

1579 for (unsigned i = start; i < end; i++)

1580 m_dataStart16[i] = suffix[i - start];

1581

1582 m_dataStart16[length - 1] = 0;

1583

1584 m_is8BitSource = false;

1585 m_currentCharacter8 = 0;

1586 m_currentCharacter16 = m_dataStart16.get();

1587 setTokenStart<UChar>(m_currentCharacter16);

1588 m_lexFunc = &CSSTokenizer::realLex<UChar>;

1589 }

1590

1591 } // namespace blink

OLD	NEW

« no previous file with comments | « Source/core/css/CSSTokenizer.h ('k') | Source/core/css/CSSValueList.cpp » ('j') | no next file with comments »