Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright (C) 2003 Lars Knoll (knoll@kde.org) | 2 * Copyright (C) 2003 Lars Knoll (knoll@kde.org) |
| 3 * Copyright (C) 2005 Allan Sandfeld Jensen (kde@carewolf.com) | 3 * Copyright (C) 2005 Allan Sandfeld Jensen (kde@carewolf.com) |
| 4 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Apple Inc. All rights reserved. | 4 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Apple Inc. All rights reserved. |
| 5 * Copyright (C) 2007 Nicholas Shanks <webkit@nickshanks.com> | 5 * Copyright (C) 2007 Nicholas Shanks <webkit@nickshanks.com> |
| 6 * Copyright (C) 2008 Eric Seidel <eric@webkit.org> | 6 * Copyright (C) 2008 Eric Seidel <eric@webkit.org> |
| 7 * Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmo bile.com/) | 7 * Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmo bile.com/) |
| 8 * Copyright (C) 2012 Adobe Systems Incorporated. All rights reserved. | 8 * Copyright (C) 2012 Adobe Systems Incorporated. All rights reserved. |
| 9 * Copyright (C) 2012 Intel Corporation. All rights reserved. | 9 * Copyright (C) 2012 Intel Corporation. All rights reserved. |
| 10 * | 10 * |
| (...skipping 286 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 297 { | 297 { |
| 298 return m_currentCharacter8; | 298 return m_currentCharacter8; |
| 299 } | 299 } |
| 300 | 300 |
| 301 template <> | 301 template <> |
| 302 inline UChar*& CSSTokenizer::currentCharacter<UChar>() | 302 inline UChar*& CSSTokenizer::currentCharacter<UChar>() |
| 303 { | 303 { |
| 304 return m_currentCharacter16; | 304 return m_currentCharacter16; |
| 305 } | 305 } |
| 306 | 306 |
| 307 UChar*& CSSTokenizer::currentCharacter16() | 307 UChar* CSSTokenizer::getStringBuffer16(size_t len) |
| 308 { | 308 { |
| 309 if (!m_currentCharacter16) { | 309 OwnPtr<UChar[]> buffer = adoptArrayPtr(new UChar[len]); |
| 310 m_dataStart16 = adoptArrayPtr(new UChar[m_length]); | |
| 311 m_currentCharacter16 = m_dataStart16.get(); | |
| 312 } | |
| 313 | 310 |
| 314 return m_currentCharacter16; | 311 UChar* bufferPtr = buffer.get(); |
| 312 | |
| 313 m_cssStrings16.append(buffer.release()); | |
| 314 return bufferPtr; | |
| 315 } | 315 } |
| 316 | 316 |
| 317 template <> | 317 template <> |
| 318 inline LChar* CSSTokenizer::dataStart<LChar>() | 318 inline LChar* CSSTokenizer::dataStart<LChar>() |
| 319 { | 319 { |
| 320 return m_dataStart8.get(); | 320 return m_dataStart8.get(); |
| 321 } | 321 } |
| 322 | 322 |
| 323 template <> | 323 template <> |
| 324 inline UChar* CSSTokenizer::dataStart<UChar>() | 324 inline UChar* CSSTokenizer::dataStart<UChar>() |
| (...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 405 if (unicode > 0x10ffff) | 405 if (unicode > 0x10ffff) |
| 406 unicode = 0xfffd; | 406 unicode = 0xfffd; |
| 407 | 407 |
| 408 // Optional space after the escape sequence. | 408 // Optional space after the escape sequence. |
| 409 if (isHTMLSpace<CharacterType>(*src)) | 409 if (isHTMLSpace<CharacterType>(*src)) |
| 410 ++src; | 410 ++src; |
| 411 | 411 |
| 412 return unicode; | 412 return unicode; |
| 413 } | 413 } |
| 414 | 414 |
| 415 return *currentCharacter<CharacterType>()++; | 415 return *src++; |
| 416 } | 416 } |
| 417 | 417 |
| 418 template <> | 418 template <> |
| 419 inline void CSSTokenizer::UnicodeToChars<LChar>(LChar*& result, unsigned unicode ) | 419 inline void CSSTokenizer::UnicodeToChars<LChar>(LChar*& result, unsigned unicode ) |
| 420 { | 420 { |
| 421 ASSERT(unicode <= 0xff); | 421 ASSERT(unicode <= 0xff); |
| 422 *result = unicode; | 422 *result = unicode; |
| 423 | 423 |
| 424 ++result; | 424 ++result; |
| 425 } | 425 } |
| 426 | 426 |
| 427 template <> | 427 template <> |
| 428 inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode ) | 428 inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode ) |
| 429 { | 429 { |
| 430 // Replace unicode with a surrogate pairs when it is bigger than 0xffff | 430 // Replace unicode with a surrogate pairs when it is bigger than 0xffff |
| 431 if (U16_LENGTH(unicode) == 2) { | 431 if (U16_LENGTH(unicode) == 2) { |
| 432 *result++ = U16_LEAD(unicode); | 432 *result++ = U16_LEAD(unicode); |
| 433 *result = U16_TRAIL(unicode); | 433 *result = U16_TRAIL(unicode); |
| 434 } else { | 434 } else { |
| 435 *result = unicode; | 435 *result = unicode; |
| 436 } | 436 } |
| 437 | 437 |
| 438 ++result; | 438 ++result; |
| 439 } | 439 } |
| 440 | 440 |
| 441 template <typename SrcCharacterType> | |
| 442 size_t CSSTokenizer::peekMaxIdentifierLen(SrcCharacterType* src) | |
| 443 { | |
| 444 // The decoded form of an identifier (after resolving escape | |
| 445 // sequences) will not contain more characters (ASCII or UTF-16 | |
| 446 // codepoints) than the input. This code can therefore ignore | |
| 447 // escape sequences completely. | |
| 448 SrcCharacterType* start = src; | |
| 449 do { | |
| 450 if (LIKELY(*src != '\\')) | |
| 451 src++; | |
| 452 else | |
| 453 parseEscape<SrcCharacterType>(src); | |
| 454 } while (isCSSLetter(src[0]) || (src[0] == '\\' && isCSSEscape(src[1]))); | |
| 455 | |
| 456 return src - start; | |
| 457 } | |
| 458 | |
| 441 template <typename SrcCharacterType, typename DestCharacterType> | 459 template <typename SrcCharacterType, typename DestCharacterType> |
| 442 inline bool CSSTokenizer::parseIdentifierInternal(SrcCharacterType*& src, DestCh aracterType*& result, bool& hasEscape) | 460 inline bool CSSTokenizer::parseIdentifierInternal(SrcCharacterType*& src, DestCh aracterType*& result, bool& hasEscape) |
| 443 { | 461 { |
| 444 hasEscape = false; | 462 hasEscape = false; |
| 445 do { | 463 do { |
| 446 if (LIKELY(*src != '\\')) { | 464 if (LIKELY(*src != '\\')) { |
| 447 *result++ = *src++; | 465 *result++ = *src++; |
| 448 } else { | 466 } else { |
| 449 hasEscape = true; | 467 hasEscape = true; |
| 450 SrcCharacterType* savedEscapeStart = src; | 468 SrcCharacterType* savedEscapeStart = src; |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 464 inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserStrin g& resultString, bool& hasEscape) | 482 inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserStrin g& resultString, bool& hasEscape) |
| 465 { | 483 { |
| 466 // If a valid identifier start is found, we can safely | 484 // If a valid identifier start is found, we can safely |
| 467 // parse the identifier until the next invalid character. | 485 // parse the identifier until the next invalid character. |
| 468 ASSERT(isIdentifierStart<CharacterType>()); | 486 ASSERT(isIdentifierStart<CharacterType>()); |
| 469 | 487 |
| 470 CharacterType* start = currentCharacter<CharacterType>(); | 488 CharacterType* start = currentCharacter<CharacterType>(); |
| 471 if (UNLIKELY(!parseIdentifierInternal(currentCharacter<CharacterType>(), res ult, hasEscape))) { | 489 if (UNLIKELY(!parseIdentifierInternal(currentCharacter<CharacterType>(), res ult, hasEscape))) { |
| 472 // Found an escape we couldn't handle with 8 bits, copy what has been re cognized and continue | 490 // Found an escape we couldn't handle with 8 bits, copy what has been re cognized and continue |
| 473 ASSERT(is8BitSource()); | 491 ASSERT(is8BitSource()); |
| 474 UChar*& result16 = currentCharacter16(); | 492 UChar* result16 = getStringBuffer16((result - start) + peekMaxIdentifier Len(result)); |
| 475 UChar* start16 = result16; | 493 UChar* start16 = result16; |
| 476 int i = 0; | 494 int i = 0; |
| 477 for (; i < result - start; i++) | 495 for (; i < result - start; i++) |
| 478 result16[i] = start[i]; | 496 result16[i] = start[i]; |
| 479 | 497 |
| 480 result16 += i; | 498 result16 += i; |
| 481 | 499 |
| 482 parseIdentifierInternal(currentCharacter<CharacterType>(), result16, has Escape); | 500 parseIdentifierInternal(currentCharacter<CharacterType>(), result16, has Escape); |
| 483 | 501 |
| 484 resultString.init(start16, result16 - start16); | 502 resultString.init(start16, result16 - start16); |
| 485 | 503 |
| 486 return; | 504 return; |
| 487 } | 505 } |
| 488 | 506 |
| 489 resultString.init(start, result - start); | 507 resultString.init(start, result - start); |
| 490 } | 508 } |
| 491 | 509 |
| 510 template <typename SrcCharacterType> | |
| 511 size_t CSSTokenizer::peekMaxStringLen(SrcCharacterType* src, UChar quote) | |
| 512 { | |
| 513 // A string can't be longer than the ASCII characters used to write | |
| 514 // it down so use that as the upper limit. | |
|
rune
2014/03/17 20:18:35
I just noticed now this comment has the same issue
Daniel Bratell
2014/03/17 20:25:17
Done.
| |
| 515 SrcCharacterType* start = src; | |
| 516 while (true) { | |
| 517 if (UNLIKELY(*src == quote)) { | |
| 518 // String parsing is done. | |
| 519 ++src; | |
| 520 break; | |
| 521 } | |
| 522 if (UNLIKELY(!*src)) { | |
| 523 // String parsing is done, but don't advance pointer if at the end o f input. | |
| 524 break; | |
| 525 } | |
| 526 ASSERT(*src > '\r' || (*src < '\n' && *src) || *src == '\v'); | |
| 527 | |
| 528 if (LIKELY(src[0] != '\\')) | |
| 529 src++; | |
| 530 else if (src[1] == '\n' || src[1] == '\f') | |
| 531 src += 2; | |
| 532 else if (src[1] == '\r') | |
| 533 src += src[2] == '\n' ? 3 : 2; | |
| 534 else | |
| 535 parseEscape<SrcCharacterType>(src); | |
| 536 } | |
| 537 | |
| 538 return src - start; | |
| 539 } | |
| 540 | |
| 492 template <typename SrcCharacterType, typename DestCharacterType> | 541 template <typename SrcCharacterType, typename DestCharacterType> |
| 493 inline bool CSSTokenizer::parseStringInternal(SrcCharacterType*& src, DestCharac terType*& result, UChar quote) | 542 inline bool CSSTokenizer::parseStringInternal(SrcCharacterType*& src, DestCharac terType*& result, UChar quote) |
| 494 { | 543 { |
| 495 while (true) { | 544 while (true) { |
| 496 if (UNLIKELY(*src == quote)) { | 545 if (UNLIKELY(*src == quote)) { |
| 497 // String parsing is done. | 546 // String parsing is done. |
| 498 ++src; | 547 ++src; |
| 499 return true; | 548 return true; |
| 500 } | 549 } |
| 501 if (UNLIKELY(!*src)) { | 550 if (UNLIKELY(!*src)) { |
| (...skipping 23 matching lines...) Expand all Loading... | |
| 525 } | 574 } |
| 526 | 575 |
| 527 template <typename CharacterType> | 576 template <typename CharacterType> |
| 528 inline void CSSTokenizer::parseString(CharacterType*& result, CSSParserString& r esultString, UChar quote) | 577 inline void CSSTokenizer::parseString(CharacterType*& result, CSSParserString& r esultString, UChar quote) |
| 529 { | 578 { |
| 530 CharacterType* start = currentCharacter<CharacterType>(); | 579 CharacterType* start = currentCharacter<CharacterType>(); |
| 531 | 580 |
| 532 if (UNLIKELY(!parseStringInternal(currentCharacter<CharacterType>(), result, quote))) { | 581 if (UNLIKELY(!parseStringInternal(currentCharacter<CharacterType>(), result, quote))) { |
| 533 // Found an escape we couldn't handle with 8 bits, copy what has been re cognized and continue | 582 // Found an escape we couldn't handle with 8 bits, copy what has been re cognized and continue |
| 534 ASSERT(is8BitSource()); | 583 ASSERT(is8BitSource()); |
| 535 UChar*& result16 = currentCharacter16(); | 584 UChar* result16 = getStringBuffer16((result - start) + peekMaxStringLen( result, quote)); |
| 536 UChar* start16 = result16; | 585 UChar* start16 = result16; |
| 537 int i = 0; | 586 int i = 0; |
| 538 for (; i < result - start; i++) | 587 for (; i < result - start; i++) |
| 539 result16[i] = start[i]; | 588 result16[i] = start[i]; |
| 540 | 589 |
| 541 result16 += i; | 590 result16 += i; |
| 542 | 591 |
| 543 parseStringInternal(currentCharacter<CharacterType>(), result16, quote); | 592 parseStringInternal(currentCharacter<CharacterType>(), result16, quote); |
| 544 | 593 |
| 545 resultString.init(start16, result16 - start16); | 594 resultString.init(start16, result16 - start16); |
| (...skipping 27 matching lines...) Expand all Loading... | |
| 573 } | 622 } |
| 574 } | 623 } |
| 575 | 624 |
| 576 end = skipWhiteSpace(end); | 625 end = skipWhiteSpace(end); |
| 577 if (*end != ')') | 626 if (*end != ')') |
| 578 return false; | 627 return false; |
| 579 | 628 |
| 580 return true; | 629 return true; |
| 581 } | 630 } |
| 582 | 631 |
| 632 template <typename SrcCharacterType> | |
| 633 inline size_t CSSTokenizer::peekMaxURILen(SrcCharacterType* src, UChar quote) | |
| 634 { | |
| 635 // A URI can't be longer than the ASCII characters used to write | |
| 636 // it down so use that as the upper limit. | |
|
rune
2014/03/17 20:18:35
I just noticed now this comment has the same issue
Daniel Bratell
2014/03/17 20:25:17
Done.
| |
| 637 SrcCharacterType* start = src; | |
| 638 if (quote) { | |
| 639 ASSERT(quote == '"' || quote == '\''); | |
| 640 return peekMaxStringLen(src, quote); | |
| 641 } | |
| 642 | |
| 643 while (isURILetter(*src)) { | |
| 644 if (LIKELY(*src != '\\')) | |
| 645 src++; | |
| 646 else | |
| 647 parseEscape<SrcCharacterType>(src); | |
| 648 } | |
| 649 | |
| 650 return src - start; | |
| 651 } | |
| 652 | |
| 583 template <typename SrcCharacterType, typename DestCharacterType> | 653 template <typename SrcCharacterType, typename DestCharacterType> |
| 584 inline bool CSSTokenizer::parseURIInternal(SrcCharacterType*& src, DestCharacter Type*& dest, UChar quote) | 654 inline bool CSSTokenizer::parseURIInternal(SrcCharacterType*& src, DestCharacter Type*& dest, UChar quote) |
| 585 { | 655 { |
| 586 if (quote) { | 656 if (quote) { |
| 587 ASSERT(quote == '"' || quote == '\''); | 657 ASSERT(quote == '"' || quote == '\''); |
| 588 return parseStringInternal(src, dest, quote); | 658 return parseStringInternal(src, dest, quote); |
| 589 } | 659 } |
| 590 | 660 |
| 591 while (isURILetter(*src)) { | 661 while (isURILetter(*src)) { |
| 592 if (LIKELY(*src != '\\')) { | 662 if (LIKELY(*src != '\\')) { |
| 593 *dest++ = *src++; | 663 *dest++ = *src++; |
| 594 } else { | 664 } else { |
| 595 unsigned unicode = parseEscape<SrcCharacterType>(src); | 665 unsigned unicode = parseEscape<SrcCharacterType>(src); |
| 596 if (unicode > 0xff && sizeof(SrcCharacterType) == 1) | 666 if (unicode > 0xff && sizeof(DestCharacterType) == 1) |
|
Daniel Bratell
2014/03/17 16:01:07
Note: This is an unrelated bug fix. Before this fi
Daniel Bratell
2014/03/17 20:15:36
Done.
| |
| 597 return false; | 667 return false; |
| 598 UnicodeToChars(dest, unicode); | 668 UnicodeToChars(dest, unicode); |
| 599 } | 669 } |
| 600 } | 670 } |
| 601 | 671 |
| 602 return true; | 672 return true; |
| 603 } | 673 } |
| 604 | 674 |
| 605 template <typename CharacterType> | 675 template <typename CharacterType> |
| 606 inline void CSSTokenizer::parseURI(CSSParserString& string) | 676 inline void CSSTokenizer::parseURI(CSSParserString& string) |
| 607 { | 677 { |
| 608 CharacterType* uriStart; | 678 CharacterType* uriStart; |
| 609 CharacterType* uriEnd; | 679 CharacterType* uriEnd; |
| 610 UChar quote; | 680 UChar quote; |
| 611 if (!findURI(uriStart, uriEnd, quote)) | 681 if (!findURI(uriStart, uriEnd, quote)) |
| 612 return; | 682 return; |
| 613 | 683 |
| 614 CharacterType* dest = currentCharacter<CharacterType>() = uriStart; | 684 CharacterType* dest = currentCharacter<CharacterType>() = uriStart; |
| 615 if (LIKELY(parseURIInternal(currentCharacter<CharacterType>(), dest, quote)) ) { | 685 if (LIKELY(parseURIInternal(currentCharacter<CharacterType>(), dest, quote)) ) { |
| 616 string.init(uriStart, dest - uriStart); | 686 string.init(uriStart, dest - uriStart); |
| 617 } else { | 687 } else { |
| 618 // An escape sequence was encountered that can't be stored in 8 bits. | 688 // An escape sequence was encountered that can't be stored in 8 bits. |
| 619 // Reset the current character to the start of the URI and re-parse with | 689 // Reset the current character to the start of the URI and re-parse with |
| 620 // a 16-bit destination. | 690 // a 16-bit destination. |
| 621 ASSERT(is8BitSource()); | 691 ASSERT(is8BitSource()); |
| 622 UChar* uriStart16 = currentCharacter16(); | 692 UChar* result16 = getStringBuffer16(peekMaxURILen(uriStart, quote)); |
| 693 UChar* uriStart16 = result16; | |
| 623 currentCharacter<CharacterType>() = uriStart; | 694 currentCharacter<CharacterType>() = uriStart; |
| 624 bool result = parseURIInternal(currentCharacter<CharacterType>(), curren tCharacter16(), quote); | 695 bool result = parseURIInternal(currentCharacter<CharacterType>(), result 16, quote); |
| 625 ASSERT_UNUSED(result, result); | 696 ASSERT_UNUSED(result, result); |
| 626 string.init(uriStart16, currentCharacter16() - uriStart16); | 697 string.init(uriStart16, result16 - uriStart16); |
| 627 } | 698 } |
| 628 | 699 |
| 629 currentCharacter<CharacterType>() = uriEnd + 1; | 700 currentCharacter<CharacterType>() = uriEnd + 1; |
| 630 m_token = URI; | 701 m_token = URI; |
| 631 } | 702 } |
| 632 | 703 |
| 633 template <typename CharacterType> | 704 template <typename CharacterType> |
| 634 inline bool CSSTokenizer::parseUnicodeRange() | 705 inline bool CSSTokenizer::parseUnicodeRange() |
| 635 { | 706 { |
| 636 CharacterType* character = currentCharacter<CharacterType>() + 1; | 707 CharacterType* character = currentCharacter<CharacterType>() + 1; |
| (...skipping 889 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1526 m_dataStart16[length - 1] = 0; | 1597 m_dataStart16[length - 1] = 0; |
| 1527 | 1598 |
| 1528 m_is8BitSource = false; | 1599 m_is8BitSource = false; |
| 1529 m_currentCharacter8 = 0; | 1600 m_currentCharacter8 = 0; |
| 1530 m_currentCharacter16 = m_dataStart16.get(); | 1601 m_currentCharacter16 = m_dataStart16.get(); |
| 1531 setTokenStart<UChar>(m_currentCharacter16); | 1602 setTokenStart<UChar>(m_currentCharacter16); |
| 1532 m_lexFunc = &CSSTokenizer::realLex<UChar>; | 1603 m_lexFunc = &CSSTokenizer::realLex<UChar>; |
| 1533 } | 1604 } |
| 1534 | 1605 |
| 1535 } // namespace WebCore | 1606 } // namespace WebCore |
| OLD | NEW |