OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2003 Lars Knoll (knoll@kde.org) | 2 * Copyright (C) 2003 Lars Knoll (knoll@kde.org) |
3 * Copyright (C) 2005 Allan Sandfeld Jensen (kde@carewolf.com) | 3 * Copyright (C) 2005 Allan Sandfeld Jensen (kde@carewolf.com) |
4 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Apple Inc.
All rights reserved. | 4 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Apple Inc.
All rights reserved. |
5 * Copyright (C) 2007 Nicholas Shanks <webkit@nickshanks.com> | 5 * Copyright (C) 2007 Nicholas Shanks <webkit@nickshanks.com> |
6 * Copyright (C) 2008 Eric Seidel <eric@webkit.org> | 6 * Copyright (C) 2008 Eric Seidel <eric@webkit.org> |
7 * Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmo
bile.com/) | 7 * Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmo
bile.com/) |
8 * Copyright (C) 2012 Adobe Systems Incorporated. All rights reserved. | 8 * Copyright (C) 2012 Adobe Systems Incorporated. All rights reserved. |
9 * Copyright (C) 2012 Intel Corporation. All rights reserved. | 9 * Copyright (C) 2012 Intel Corporation. All rights reserved. |
10 * | 10 * |
(...skipping 286 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
297 { | 297 { |
298 return m_currentCharacter8; | 298 return m_currentCharacter8; |
299 } | 299 } |
300 | 300 |
301 template <> | 301 template <> |
302 inline UChar*& CSSTokenizer::currentCharacter<UChar>() | 302 inline UChar*& CSSTokenizer::currentCharacter<UChar>() |
303 { | 303 { |
304 return m_currentCharacter16; | 304 return m_currentCharacter16; |
305 } | 305 } |
306 | 306 |
307 UChar*& CSSTokenizer::currentCharacter16() | 307 UChar* CSSTokenizer::allocateStringBuffer16(size_t len) |
308 { | 308 { |
309 if (!m_currentCharacter16) { | 309 // Allocates and returns a CSSTokenizer owned buffer for storing |
310 m_dataStart16 = adoptArrayPtr(new UChar[m_length]); | 310 // UTF-16 data. Used to get a suitable life span for UTF-16 |
311 m_currentCharacter16 = m_dataStart16.get(); | 311 // strings, identifiers and URIs created by the tokenizer. |
312 } | 312 OwnPtr<UChar[]> buffer = adoptArrayPtr(new UChar[len]); |
313 | 313 |
314 return m_currentCharacter16; | 314 UChar* bufferPtr = buffer.get(); |
| 315 |
| 316 m_cssStrings16.append(buffer.release()); |
| 317 return bufferPtr; |
315 } | 318 } |
316 | 319 |
317 template <> | 320 template <> |
318 inline LChar* CSSTokenizer::dataStart<LChar>() | 321 inline LChar* CSSTokenizer::dataStart<LChar>() |
319 { | 322 { |
320 return m_dataStart8.get(); | 323 return m_dataStart8.get(); |
321 } | 324 } |
322 | 325 |
323 template <> | 326 template <> |
324 inline UChar* CSSTokenizer::dataStart<UChar>() | 327 inline UChar* CSSTokenizer::dataStart<UChar>() |
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
405 if (unicode > 0x10ffff) | 408 if (unicode > 0x10ffff) |
406 unicode = 0xfffd; | 409 unicode = 0xfffd; |
407 | 410 |
408 // Optional space after the escape sequence. | 411 // Optional space after the escape sequence. |
409 if (isHTMLSpace<CharacterType>(*src)) | 412 if (isHTMLSpace<CharacterType>(*src)) |
410 ++src; | 413 ++src; |
411 | 414 |
412 return unicode; | 415 return unicode; |
413 } | 416 } |
414 | 417 |
415 return *currentCharacter<CharacterType>()++; | 418 return *src++; |
416 } | 419 } |
417 | 420 |
418 template <> | 421 template <> |
419 inline void CSSTokenizer::UnicodeToChars<LChar>(LChar*& result, unsigned unicode
) | 422 inline void CSSTokenizer::UnicodeToChars<LChar>(LChar*& result, unsigned unicode
) |
420 { | 423 { |
421 ASSERT(unicode <= 0xff); | 424 ASSERT(unicode <= 0xff); |
422 *result = unicode; | 425 *result = unicode; |
423 | 426 |
424 ++result; | 427 ++result; |
425 } | 428 } |
426 | 429 |
427 template <> | 430 template <> |
428 inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode
) | 431 inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode
) |
429 { | 432 { |
430 // Replace unicode with a surrogate pairs when it is bigger than 0xffff | 433 // Replace unicode with a surrogate pairs when it is bigger than 0xffff |
431 if (U16_LENGTH(unicode) == 2) { | 434 if (U16_LENGTH(unicode) == 2) { |
432 *result++ = U16_LEAD(unicode); | 435 *result++ = U16_LEAD(unicode); |
433 *result = U16_TRAIL(unicode); | 436 *result = U16_TRAIL(unicode); |
434 } else { | 437 } else { |
435 *result = unicode; | 438 *result = unicode; |
436 } | 439 } |
437 | 440 |
438 ++result; | 441 ++result; |
439 } | 442 } |
440 | 443 |
| 444 template <typename SrcCharacterType> |
| 445 size_t CSSTokenizer::peekMaxIdentifierLen(SrcCharacterType* src) |
| 446 { |
| 447 // The decoded form of an identifier (after resolving escape |
| 448 // sequences) will not contain more characters (ASCII or UTF-16 |
| 449 // codepoints) than the input. This code can therefore ignore |
| 450 // escape sequences completely. |
| 451 SrcCharacterType* start = src; |
| 452 do { |
| 453 if (LIKELY(*src != '\\')) |
| 454 src++; |
| 455 else |
| 456 parseEscape<SrcCharacterType>(src); |
| 457 } while (isCSSLetter(src[0]) || (src[0] == '\\' && isCSSEscape(src[1]))); |
| 458 |
| 459 return src - start; |
| 460 } |
| 461 |
441 template <typename SrcCharacterType, typename DestCharacterType> | 462 template <typename SrcCharacterType, typename DestCharacterType> |
442 inline bool CSSTokenizer::parseIdentifierInternal(SrcCharacterType*& src, DestCh
aracterType*& result, bool& hasEscape) | 463 inline bool CSSTokenizer::parseIdentifierInternal(SrcCharacterType*& src, DestCh
aracterType*& result, bool& hasEscape) |
443 { | 464 { |
444 hasEscape = false; | 465 hasEscape = false; |
445 do { | 466 do { |
446 if (LIKELY(*src != '\\')) { | 467 if (LIKELY(*src != '\\')) { |
447 *result++ = *src++; | 468 *result++ = *src++; |
448 } else { | 469 } else { |
449 hasEscape = true; | 470 hasEscape = true; |
450 SrcCharacterType* savedEscapeStart = src; | 471 SrcCharacterType* savedEscapeStart = src; |
(...skipping 13 matching lines...) Expand all Loading... |
464 inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserStrin
g& resultString, bool& hasEscape) | 485 inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserStrin
g& resultString, bool& hasEscape) |
465 { | 486 { |
466 // If a valid identifier start is found, we can safely | 487 // If a valid identifier start is found, we can safely |
467 // parse the identifier until the next invalid character. | 488 // parse the identifier until the next invalid character. |
468 ASSERT(isIdentifierStart<CharacterType>()); | 489 ASSERT(isIdentifierStart<CharacterType>()); |
469 | 490 |
470 CharacterType* start = currentCharacter<CharacterType>(); | 491 CharacterType* start = currentCharacter<CharacterType>(); |
471 if (UNLIKELY(!parseIdentifierInternal(currentCharacter<CharacterType>(), res
ult, hasEscape))) { | 492 if (UNLIKELY(!parseIdentifierInternal(currentCharacter<CharacterType>(), res
ult, hasEscape))) { |
472 // Found an escape we couldn't handle with 8 bits, copy what has been re
cognized and continue | 493 // Found an escape we couldn't handle with 8 bits, copy what has been re
cognized and continue |
473 ASSERT(is8BitSource()); | 494 ASSERT(is8BitSource()); |
474 UChar*& result16 = currentCharacter16(); | 495 UChar* result16 = allocateStringBuffer16((result - start) + peekMaxIdent
ifierLen(result)); |
475 UChar* start16 = result16; | 496 UChar* start16 = result16; |
476 int i = 0; | 497 int i = 0; |
477 for (; i < result - start; i++) | 498 for (; i < result - start; i++) |
478 result16[i] = start[i]; | 499 result16[i] = start[i]; |
479 | 500 |
480 result16 += i; | 501 result16 += i; |
481 | 502 |
482 parseIdentifierInternal(currentCharacter<CharacterType>(), result16, has
Escape); | 503 parseIdentifierInternal(currentCharacter<CharacterType>(), result16, has
Escape); |
483 | 504 |
484 resultString.init(start16, result16 - start16); | 505 resultString.init(start16, result16 - start16); |
485 | 506 |
486 return; | 507 return; |
487 } | 508 } |
488 | 509 |
489 resultString.init(start, result - start); | 510 resultString.init(start, result - start); |
490 } | 511 } |
491 | 512 |
| 513 template <typename SrcCharacterType> |
| 514 size_t CSSTokenizer::peekMaxStringLen(SrcCharacterType* src, UChar quote) |
| 515 { |
| 516 // The decoded form of a CSS string (after resolving escape |
| 517 // sequences) will not contain more characters (ASCII or UTF-16 |
| 518 // codepoints) than the input. This code can therefore ignore |
| 519 // escape sequences completely and just return the length of the |
| 520 // input string (possibly including terminating quote if any). |
| 521 SrcCharacterType* end = checkAndSkipString(src, quote); |
| 522 return end ? end - src : 0; |
| 523 } |
| 524 |
492 template <typename SrcCharacterType, typename DestCharacterType> | 525 template <typename SrcCharacterType, typename DestCharacterType> |
493 inline bool CSSTokenizer::parseStringInternal(SrcCharacterType*& src, DestCharac
terType*& result, UChar quote) | 526 inline bool CSSTokenizer::parseStringInternal(SrcCharacterType*& src, DestCharac
terType*& result, UChar quote) |
494 { | 527 { |
495 while (true) { | 528 while (true) { |
496 if (UNLIKELY(*src == quote)) { | 529 if (UNLIKELY(*src == quote)) { |
497 // String parsing is done. | 530 // String parsing is done. |
498 ++src; | 531 ++src; |
499 return true; | 532 return true; |
500 } | 533 } |
501 if (UNLIKELY(!*src)) { | 534 if (UNLIKELY(!*src)) { |
(...skipping 23 matching lines...) Expand all Loading... |
525 } | 558 } |
526 | 559 |
527 template <typename CharacterType> | 560 template <typename CharacterType> |
528 inline void CSSTokenizer::parseString(CharacterType*& result, CSSParserString& r
esultString, UChar quote) | 561 inline void CSSTokenizer::parseString(CharacterType*& result, CSSParserString& r
esultString, UChar quote) |
529 { | 562 { |
530 CharacterType* start = currentCharacter<CharacterType>(); | 563 CharacterType* start = currentCharacter<CharacterType>(); |
531 | 564 |
532 if (UNLIKELY(!parseStringInternal(currentCharacter<CharacterType>(), result,
quote))) { | 565 if (UNLIKELY(!parseStringInternal(currentCharacter<CharacterType>(), result,
quote))) { |
533 // Found an escape we couldn't handle with 8 bits, copy what has been re
cognized and continue | 566 // Found an escape we couldn't handle with 8 bits, copy what has been re
cognized and continue |
534 ASSERT(is8BitSource()); | 567 ASSERT(is8BitSource()); |
535 UChar*& result16 = currentCharacter16(); | 568 UChar* result16 = allocateStringBuffer16((result - start) + peekMaxStrin
gLen(result, quote)); |
536 UChar* start16 = result16; | 569 UChar* start16 = result16; |
537 int i = 0; | 570 int i = 0; |
538 for (; i < result - start; i++) | 571 for (; i < result - start; i++) |
539 result16[i] = start[i]; | 572 result16[i] = start[i]; |
540 | 573 |
541 result16 += i; | 574 result16 += i; |
542 | 575 |
543 parseStringInternal(currentCharacter<CharacterType>(), result16, quote); | 576 parseStringInternal(currentCharacter<CharacterType>(), result16, quote); |
544 | 577 |
545 resultString.init(start16, result16 - start16); | 578 resultString.init(start16, result16 - start16); |
(...skipping 27 matching lines...) Expand all Loading... |
573 } | 606 } |
574 } | 607 } |
575 | 608 |
576 end = skipWhiteSpace(end); | 609 end = skipWhiteSpace(end); |
577 if (*end != ')') | 610 if (*end != ')') |
578 return false; | 611 return false; |
579 | 612 |
580 return true; | 613 return true; |
581 } | 614 } |
582 | 615 |
| 616 template <typename SrcCharacterType> |
| 617 inline size_t CSSTokenizer::peekMaxURILen(SrcCharacterType* src, UChar quote) |
| 618 { |
| 619 // The decoded form of a URI (after resolving escape sequences) |
| 620 // will not contain more characters (ASCII or UTF-16 codepoints) |
| 621 // than the input. This code can therefore ignore escape sequences |
| 622 // completely. |
| 623 SrcCharacterType* start = src; |
| 624 if (quote) { |
| 625 ASSERT(quote == '"' || quote == '\''); |
| 626 return peekMaxStringLen(src, quote); |
| 627 } |
| 628 |
| 629 while (isURILetter(*src)) { |
| 630 if (LIKELY(*src != '\\')) |
| 631 src++; |
| 632 else |
| 633 parseEscape<SrcCharacterType>(src); |
| 634 } |
| 635 |
| 636 return src - start; |
| 637 } |
| 638 |
583 template <typename SrcCharacterType, typename DestCharacterType> | 639 template <typename SrcCharacterType, typename DestCharacterType> |
584 inline bool CSSTokenizer::parseURIInternal(SrcCharacterType*& src, DestCharacter
Type*& dest, UChar quote) | 640 inline bool CSSTokenizer::parseURIInternal(SrcCharacterType*& src, DestCharacter
Type*& dest, UChar quote) |
585 { | 641 { |
586 if (quote) { | 642 if (quote) { |
587 ASSERT(quote == '"' || quote == '\''); | 643 ASSERT(quote == '"' || quote == '\''); |
588 return parseStringInternal(src, dest, quote); | 644 return parseStringInternal(src, dest, quote); |
589 } | 645 } |
590 | 646 |
591 while (isURILetter(*src)) { | 647 while (isURILetter(*src)) { |
592 if (LIKELY(*src != '\\')) { | 648 if (LIKELY(*src != '\\')) { |
593 *dest++ = *src++; | 649 *dest++ = *src++; |
594 } else { | 650 } else { |
595 unsigned unicode = parseEscape<SrcCharacterType>(src); | 651 unsigned unicode = parseEscape<SrcCharacterType>(src); |
596 if (unicode > 0xff && sizeof(SrcCharacterType) == 1) | 652 if (unicode > 0xff && sizeof(DestCharacterType) == 1) |
597 return false; | 653 return false; |
598 UnicodeToChars(dest, unicode); | 654 UnicodeToChars(dest, unicode); |
599 } | 655 } |
600 } | 656 } |
601 | 657 |
602 return true; | 658 return true; |
603 } | 659 } |
604 | 660 |
605 template <typename CharacterType> | 661 template <typename CharacterType> |
606 inline void CSSTokenizer::parseURI(CSSParserString& string) | 662 inline void CSSTokenizer::parseURI(CSSParserString& string) |
607 { | 663 { |
608 CharacterType* uriStart; | 664 CharacterType* uriStart; |
609 CharacterType* uriEnd; | 665 CharacterType* uriEnd; |
610 UChar quote; | 666 UChar quote; |
611 if (!findURI(uriStart, uriEnd, quote)) | 667 if (!findURI(uriStart, uriEnd, quote)) |
612 return; | 668 return; |
613 | 669 |
614 CharacterType* dest = currentCharacter<CharacterType>() = uriStart; | 670 CharacterType* dest = currentCharacter<CharacterType>() = uriStart; |
615 if (LIKELY(parseURIInternal(currentCharacter<CharacterType>(), dest, quote))
) { | 671 if (LIKELY(parseURIInternal(currentCharacter<CharacterType>(), dest, quote))
) { |
616 string.init(uriStart, dest - uriStart); | 672 string.init(uriStart, dest - uriStart); |
617 } else { | 673 } else { |
618 // An escape sequence was encountered that can't be stored in 8 bits. | 674 // An escape sequence was encountered that can't be stored in 8 bits. |
619 // Reset the current character to the start of the URI and re-parse with | 675 // Reset the current character to the start of the URI and re-parse with |
620 // a 16-bit destination. | 676 // a 16-bit destination. |
621 ASSERT(is8BitSource()); | 677 ASSERT(is8BitSource()); |
622 UChar* uriStart16 = currentCharacter16(); | 678 UChar* result16 = allocateStringBuffer16(peekMaxURILen(uriStart, quote))
; |
| 679 UChar* uriStart16 = result16; |
623 currentCharacter<CharacterType>() = uriStart; | 680 currentCharacter<CharacterType>() = uriStart; |
624 bool result = parseURIInternal(currentCharacter<CharacterType>(), curren
tCharacter16(), quote); | 681 bool result = parseURIInternal(currentCharacter<CharacterType>(), result
16, quote); |
625 ASSERT_UNUSED(result, result); | 682 ASSERT_UNUSED(result, result); |
626 string.init(uriStart16, currentCharacter16() - uriStart16); | 683 string.init(uriStart16, result16 - uriStart16); |
627 } | 684 } |
628 | 685 |
629 currentCharacter<CharacterType>() = uriEnd + 1; | 686 currentCharacter<CharacterType>() = uriEnd + 1; |
630 m_token = URI; | 687 m_token = URI; |
631 } | 688 } |
632 | 689 |
633 template <typename CharacterType> | 690 template <typename CharacterType> |
634 inline bool CSSTokenizer::parseUnicodeRange() | 691 inline bool CSSTokenizer::parseUnicodeRange() |
635 { | 692 { |
636 CharacterType* character = currentCharacter<CharacterType>() + 1; | 693 CharacterType* character = currentCharacter<CharacterType>() + 1; |
(...skipping 886 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1523 m_dataStart16[length - 1] = 0; | 1580 m_dataStart16[length - 1] = 0; |
1524 | 1581 |
1525 m_is8BitSource = false; | 1582 m_is8BitSource = false; |
1526 m_currentCharacter8 = 0; | 1583 m_currentCharacter8 = 0; |
1527 m_currentCharacter16 = m_dataStart16.get(); | 1584 m_currentCharacter16 = m_dataStart16.get(); |
1528 setTokenStart<UChar>(m_currentCharacter16); | 1585 setTokenStart<UChar>(m_currentCharacter16); |
1529 m_lexFunc = &CSSTokenizer::realLex<UChar>; | 1586 m_lexFunc = &CSSTokenizer::realLex<UChar>; |
1530 } | 1587 } |
1531 | 1588 |
1532 } // namespace WebCore | 1589 } // namespace WebCore |
OLD | NEW |