OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (C) 2003 Lars Knoll (knoll@kde.org) | 2 * Copyright (C) 2003 Lars Knoll (knoll@kde.org) |
3 * Copyright (C) 2005 Allan Sandfeld Jensen (kde@carewolf.com) | 3 * Copyright (C) 2005 Allan Sandfeld Jensen (kde@carewolf.com) |
4 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Apple Inc. All rights reserved. | 4 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Apple Inc. All rights reserved. |
5 * Copyright (C) 2007 Nicholas Shanks <webkit@nickshanks.com> | 5 * Copyright (C) 2007 Nicholas Shanks <webkit@nickshanks.com> |
6 * Copyright (C) 2008 Eric Seidel <eric@webkit.org> | 6 * Copyright (C) 2008 Eric Seidel <eric@webkit.org> |
7 * Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmo bile.com/) | 7 * Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmo bile.com/) |
8 * Copyright (C) 2012 Adobe Systems Incorporated. All rights reserved. | 8 * Copyright (C) 2012 Adobe Systems Incorporated. All rights reserved. |
9 * Copyright (C) 2012 Intel Corporation. All rights reserved. | 9 * Copyright (C) 2012 Intel Corporation. All rights reserved. |
10 * | 10 * |
(...skipping 286 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
297 { | 297 { |
298 return m_currentCharacter8; | 298 return m_currentCharacter8; |
299 } | 299 } |
300 | 300 |
301 template <> | 301 template <> |
302 inline UChar*& CSSTokenizer::currentCharacter<UChar>() | 302 inline UChar*& CSSTokenizer::currentCharacter<UChar>() |
303 { | 303 { |
304 return m_currentCharacter16; | 304 return m_currentCharacter16; |
305 } | 305 } |
306 | 306 |
307 UChar*& CSSTokenizer::currentCharacter16() | 307 UChar* CSSTokenizer::getStringBuffer16(size_t len) |
Julien - ping for review
2014/03/20 20:57:39
We usually don't put the word "get" on getters as
Daniel Bratell
2014/03/21 15:14:39
Done.
| |
308 { | 308 { |
309 if (!m_currentCharacter16) { | 309 OwnPtr<UChar[]> buffer = adoptArrayPtr(new UChar[len]); |
310 m_dataStart16 = adoptArrayPtr(new UChar[m_length]); | |
311 m_currentCharacter16 = m_dataStart16.get(); | |
312 } | |
313 | 310 |
314 return m_currentCharacter16; | 311 UChar* bufferPtr = buffer.get(); |
312 | |
313 m_cssStrings16.append(buffer.release()); | |
314 return bufferPtr; | |
315 } | 315 } |
316 | 316 |
317 template <> | 317 template <> |
318 inline LChar* CSSTokenizer::dataStart<LChar>() | 318 inline LChar* CSSTokenizer::dataStart<LChar>() |
319 { | 319 { |
320 return m_dataStart8.get(); | 320 return m_dataStart8.get(); |
321 } | 321 } |
322 | 322 |
323 template <> | 323 template <> |
324 inline UChar* CSSTokenizer::dataStart<UChar>() | 324 inline UChar* CSSTokenizer::dataStart<UChar>() |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
379 currentCharacter += currentCharacter[2] == '\n' ? 3 : 2; | 379 currentCharacter += currentCharacter[2] == '\n' ? 3 : 2; |
380 } else { | 380 } else { |
381 currentCharacter = checkAndSkipEscape(currentCharacter); | 381 currentCharacter = checkAndSkipEscape(currentCharacter); |
382 if (!currentCharacter) | 382 if (!currentCharacter) |
383 return 0; | 383 return 0; |
384 } | 384 } |
385 } | 385 } |
386 } | 386 } |
387 | 387 |
388 template <typename CharacterType> | 388 template <typename CharacterType> |
389 unsigned CSSTokenizer::parseEscape(CharacterType*& src) | 389 unsigned CSSTokenizer::parseEscape(CharacterType*& src) |
Julien - ping for review
2014/03/20 20:57:39
This function should now have static linkage.
Daniel Bratell
2014/03/21 15:14:39
Hmm, not sure I understand. A function can have in
Julien - ping for review
2014/03/21 17:56:26
I was talking about file level internal function a
| |
390 { | 390 { |
391 ASSERT(*src == '\\' && isCSSEscape(src[1])); | 391 ASSERT(*src == '\\' && isCSSEscape(src[1])); |
392 | 392 |
393 unsigned unicode = 0; | 393 unsigned unicode = 0; |
394 | 394 |
395 ++src; | 395 ++src; |
396 if (isASCIIHexDigit(*src)) { | 396 if (isASCIIHexDigit(*src)) { |
397 | 397 |
398 int length = 6; | 398 int length = 6; |
399 | 399 |
400 do { | 400 do { |
401 unicode = (unicode << 4) + toASCIIHexValue(*src++); | 401 unicode = (unicode << 4) + toASCIIHexValue(*src++); |
402 } while (--length && isASCIIHexDigit(*src)); | 402 } while (--length && isASCIIHexDigit(*src)); |
403 | 403 |
404 // Characters above 0x10ffff are not handled. | 404 // Characters above 0x10ffff are not handled. |
405 if (unicode > 0x10ffff) | 405 if (unicode > 0x10ffff) |
406 unicode = 0xfffd; | 406 unicode = 0xfffd; |
407 | 407 |
408 // Optional space after the escape sequence. | 408 // Optional space after the escape sequence. |
409 if (isHTMLSpace<CharacterType>(*src)) | 409 if (isHTMLSpace<CharacterType>(*src)) |
410 ++src; | 410 ++src; |
411 | 411 |
412 return unicode; | 412 return unicode; |
413 } | 413 } |
414 | 414 |
415 return *currentCharacter<CharacterType>()++; | 415 return *src++; |
416 } | 416 } |
417 | 417 |
418 template <> | 418 template <> |
419 inline void CSSTokenizer::UnicodeToChars<LChar>(LChar*& result, unsigned unicode ) | 419 inline void CSSTokenizer::UnicodeToChars<LChar>(LChar*& result, unsigned unicode ) |
420 { | 420 { |
421 ASSERT(unicode <= 0xff); | 421 ASSERT(unicode <= 0xff); |
422 *result = unicode; | 422 *result = unicode; |
423 | 423 |
424 ++result; | 424 ++result; |
425 } | 425 } |
426 | 426 |
427 template <> | 427 template <> |
428 inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode ) | 428 inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode ) |
429 { | 429 { |
430 // Replace unicode with a surrogate pairs when it is bigger than 0xffff | 430 // Replace unicode with a surrogate pairs when it is bigger than 0xffff |
431 if (U16_LENGTH(unicode) == 2) { | 431 if (U16_LENGTH(unicode) == 2) { |
432 *result++ = U16_LEAD(unicode); | 432 *result++ = U16_LEAD(unicode); |
433 *result = U16_TRAIL(unicode); | 433 *result = U16_TRAIL(unicode); |
434 } else { | 434 } else { |
435 *result = unicode; | 435 *result = unicode; |
436 } | 436 } |
437 | 437 |
438 ++result; | 438 ++result; |
439 } | 439 } |
440 | 440 |
441 template <typename SrcCharacterType> | |
442 size_t CSSTokenizer::peekMaxIdentifierLen(SrcCharacterType* src) | |
443 { | |
444 // The decoded form of an identifier (after resolving escape | |
445 // sequences) will not contain more characters (ASCII or UTF-16 | |
446 // codepoints) than the input. This code can therefore ignore | |
447 // escape sequences completely. | |
448 SrcCharacterType* start = src; | |
449 do { | |
450 if (LIKELY(*src != '\\')) | |
451 src++; | |
452 else | |
453 parseEscape<SrcCharacterType>(src); | |
454 } while (isCSSLetter(src[0]) || (src[0] == '\\' && isCSSEscape(src[1]))); | |
455 | |
456 return src - start; | |
457 } | |
458 | |
441 template <typename SrcCharacterType, typename DestCharacterType> | 459 template <typename SrcCharacterType, typename DestCharacterType> |
442 inline bool CSSTokenizer::parseIdentifierInternal(SrcCharacterType*& src, DestCh aracterType*& result, bool& hasEscape) | 460 inline bool CSSTokenizer::parseIdentifierInternal(SrcCharacterType*& src, DestCh aracterType*& result, bool& hasEscape) |
443 { | 461 { |
444 hasEscape = false; | 462 hasEscape = false; |
445 do { | 463 do { |
446 if (LIKELY(*src != '\\')) { | 464 if (LIKELY(*src != '\\')) { |
447 *result++ = *src++; | 465 *result++ = *src++; |
448 } else { | 466 } else { |
449 hasEscape = true; | 467 hasEscape = true; |
450 SrcCharacterType* savedEscapeStart = src; | 468 SrcCharacterType* savedEscapeStart = src; |
(...skipping 13 matching lines...) Expand all Loading... | |
464 inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserStrin g& resultString, bool& hasEscape) | 482 inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserStrin g& resultString, bool& hasEscape) |
465 { | 483 { |
466 // If a valid identifier start is found, we can safely | 484 // If a valid identifier start is found, we can safely |
467 // parse the identifier until the next invalid character. | 485 // parse the identifier until the next invalid character. |
468 ASSERT(isIdentifierStart<CharacterType>()); | 486 ASSERT(isIdentifierStart<CharacterType>()); |
469 | 487 |
470 CharacterType* start = currentCharacter<CharacterType>(); | 488 CharacterType* start = currentCharacter<CharacterType>(); |
471 if (UNLIKELY(!parseIdentifierInternal(currentCharacter<CharacterType>(), res ult, hasEscape))) { | 489 if (UNLIKELY(!parseIdentifierInternal(currentCharacter<CharacterType>(), res ult, hasEscape))) { |
472 // Found an escape we couldn't handle with 8 bits, copy what has been re cognized and continue | 490 // Found an escape we couldn't handle with 8 bits, copy what has been re cognized and continue |
473 ASSERT(is8BitSource()); | 491 ASSERT(is8BitSource()); |
474 UChar*& result16 = currentCharacter16(); | 492 UChar* result16 = getStringBuffer16((result - start) + peekMaxIdentifier Len(result)); |
475 UChar* start16 = result16; | 493 UChar* start16 = result16; |
476 int i = 0; | 494 int i = 0; |
477 for (; i < result - start; i++) | 495 for (; i < result - start; i++) |
478 result16[i] = start[i]; | 496 result16[i] = start[i]; |
479 | 497 |
480 result16 += i; | 498 result16 += i; |
481 | 499 |
482 parseIdentifierInternal(currentCharacter<CharacterType>(), result16, has Escape); | 500 parseIdentifierInternal(currentCharacter<CharacterType>(), result16, has Escape); |
483 | 501 |
484 resultString.init(start16, result16 - start16); | 502 resultString.init(start16, result16 - start16); |
485 | 503 |
486 return; | 504 return; |
487 } | 505 } |
488 | 506 |
489 resultString.init(start, result - start); | 507 resultString.init(start, result - start); |
490 } | 508 } |
491 | 509 |
510 template <typename SrcCharacterType> | |
511 size_t CSSTokenizer::peekMaxStringLen(SrcCharacterType* src, UChar quote) | |
512 { | |
513 // The decoded form of a CSS string (after resolving escape | |
514 // sequences) will not contain more characters (ASCII or UTF-16 | |
515 // codepoints) than the input. This code can therefore ignore | |
516 // escape sequences completely. | |
517 SrcCharacterType* start = src; | |
518 while (true) { | |
519 if (UNLIKELY(*src == quote)) { | |
520 // String parsing is done. | |
521 ++src; | |
522 break; | |
523 } | |
524 if (UNLIKELY(!*src)) { | |
525 // String parsing is done, but don't advance pointer if at the end o f input. | |
526 break; | |
527 } | |
528 ASSERT(*src > '\r' || (*src < '\n' && *src) || *src == '\v'); | |
529 | |
530 if (LIKELY(src[0] != '\\')) | |
531 src++; | |
532 else if (src[1] == '\n' || src[1] == '\f') | |
533 src += 2; | |
534 else if (src[1] == '\r') | |
535 src += src[2] == '\n' ? 3 : 2; | |
Julien - ping for review
2014/03/20 20:57:39
This looks awfully like checkAndSkipString, maybe
| |
536 else | |
537 parseEscape<SrcCharacterType>(src); | |
538 } | |
539 | |
540 return src - start; | |
541 } | |
542 | |
492 template <typename SrcCharacterType, typename DestCharacterType> | 543 template <typename SrcCharacterType, typename DestCharacterType> |
493 inline bool CSSTokenizer::parseStringInternal(SrcCharacterType*& src, DestCharac terType*& result, UChar quote) | 544 inline bool CSSTokenizer::parseStringInternal(SrcCharacterType*& src, DestCharac terType*& result, UChar quote) |
494 { | 545 { |
495 while (true) { | 546 while (true) { |
496 if (UNLIKELY(*src == quote)) { | 547 if (UNLIKELY(*src == quote)) { |
497 // String parsing is done. | 548 // String parsing is done. |
498 ++src; | 549 ++src; |
499 return true; | 550 return true; |
500 } | 551 } |
501 if (UNLIKELY(!*src)) { | 552 if (UNLIKELY(!*src)) { |
(...skipping 23 matching lines...) Expand all Loading... | |
525 } | 576 } |
526 | 577 |
527 template <typename CharacterType> | 578 template <typename CharacterType> |
528 inline void CSSTokenizer::parseString(CharacterType*& result, CSSParserString& r esultString, UChar quote) | 579 inline void CSSTokenizer::parseString(CharacterType*& result, CSSParserString& r esultString, UChar quote) |
529 { | 580 { |
530 CharacterType* start = currentCharacter<CharacterType>(); | 581 CharacterType* start = currentCharacter<CharacterType>(); |
531 | 582 |
532 if (UNLIKELY(!parseStringInternal(currentCharacter<CharacterType>(), result, quote))) { | 583 if (UNLIKELY(!parseStringInternal(currentCharacter<CharacterType>(), result, quote))) { |
533 // Found an escape we couldn't handle with 8 bits, copy what has been re cognized and continue | 584 // Found an escape we couldn't handle with 8 bits, copy what has been re cognized and continue |
534 ASSERT(is8BitSource()); | 585 ASSERT(is8BitSource()); |
535 UChar*& result16 = currentCharacter16(); | 586 UChar* result16 = getStringBuffer16((result - start) + peekMaxStringLen( result, quote)); |
536 UChar* start16 = result16; | 587 UChar* start16 = result16; |
537 int i = 0; | 588 int i = 0; |
538 for (; i < result - start; i++) | 589 for (; i < result - start; i++) |
539 result16[i] = start[i]; | 590 result16[i] = start[i]; |
540 | 591 |
541 result16 += i; | 592 result16 += i; |
542 | 593 |
543 parseStringInternal(currentCharacter<CharacterType>(), result16, quote); | 594 parseStringInternal(currentCharacter<CharacterType>(), result16, quote); |
544 | 595 |
545 resultString.init(start16, result16 - start16); | 596 resultString.init(start16, result16 - start16); |
(...skipping 27 matching lines...) Expand all Loading... | |
573 } | 624 } |
574 } | 625 } |
575 | 626 |
576 end = skipWhiteSpace(end); | 627 end = skipWhiteSpace(end); |
577 if (*end != ')') | 628 if (*end != ')') |
578 return false; | 629 return false; |
579 | 630 |
580 return true; | 631 return true; |
581 } | 632 } |
582 | 633 |
634 template <typename SrcCharacterType> | |
635 inline size_t CSSTokenizer::peekMaxURILen(SrcCharacterType* src, UChar quote) | |
636 { | |
637 // The decoded form of a URI (after resolving escape sequences) | |
638 // will not contain more characters (ASCII or UTF-16 codepoints) | |
639 // than the input. This code can therefore ignore escape sequences | |
640 // completely. | |
641 SrcCharacterType* start = src; | |
642 if (quote) { | |
643 ASSERT(quote == '"' || quote == '\''); | |
644 return peekMaxStringLen(src, quote); | |
645 } | |
646 | |
647 while (isURILetter(*src)) { | |
648 if (LIKELY(*src != '\\')) | |
649 src++; | |
650 else | |
651 parseEscape<SrcCharacterType>(src); | |
652 } | |
653 | |
654 return src - start; | |
655 } | |
656 | |
583 template <typename SrcCharacterType, typename DestCharacterType> | 657 template <typename SrcCharacterType, typename DestCharacterType> |
584 inline bool CSSTokenizer::parseURIInternal(SrcCharacterType*& src, DestCharacter Type*& dest, UChar quote) | 658 inline bool CSSTokenizer::parseURIInternal(SrcCharacterType*& src, DestCharacter Type*& dest, UChar quote) |
585 { | 659 { |
586 if (quote) { | 660 if (quote) { |
587 ASSERT(quote == '"' || quote == '\''); | 661 ASSERT(quote == '"' || quote == '\''); |
588 return parseStringInternal(src, dest, quote); | 662 return parseStringInternal(src, dest, quote); |
589 } | 663 } |
590 | 664 |
591 while (isURILetter(*src)) { | 665 while (isURILetter(*src)) { |
592 if (LIKELY(*src != '\\')) { | 666 if (LIKELY(*src != '\\')) { |
593 *dest++ = *src++; | 667 *dest++ = *src++; |
594 } else { | 668 } else { |
595 unsigned unicode = parseEscape<SrcCharacterType>(src); | 669 unsigned unicode = parseEscape<SrcCharacterType>(src); |
596 if (unicode > 0xff && sizeof(SrcCharacterType) == 1) | 670 if (unicode > 0xff && sizeof(DestCharacterType) == 1) |
597 return false; | 671 return false; |
598 UnicodeToChars(dest, unicode); | 672 UnicodeToChars(dest, unicode); |
599 } | 673 } |
600 } | 674 } |
601 | 675 |
602 return true; | 676 return true; |
603 } | 677 } |
604 | 678 |
605 template <typename CharacterType> | 679 template <typename CharacterType> |
606 inline void CSSTokenizer::parseURI(CSSParserString& string) | 680 inline void CSSTokenizer::parseURI(CSSParserString& string) |
607 { | 681 { |
608 CharacterType* uriStart; | 682 CharacterType* uriStart; |
609 CharacterType* uriEnd; | 683 CharacterType* uriEnd; |
610 UChar quote; | 684 UChar quote; |
611 if (!findURI(uriStart, uriEnd, quote)) | 685 if (!findURI(uriStart, uriEnd, quote)) |
612 return; | 686 return; |
613 | 687 |
614 CharacterType* dest = currentCharacter<CharacterType>() = uriStart; | 688 CharacterType* dest = currentCharacter<CharacterType>() = uriStart; |
615 if (LIKELY(parseURIInternal(currentCharacter<CharacterType>(), dest, quote)) ) { | 689 if (LIKELY(parseURIInternal(currentCharacter<CharacterType>(), dest, quote)) ) { |
616 string.init(uriStart, dest - uriStart); | 690 string.init(uriStart, dest - uriStart); |
617 } else { | 691 } else { |
618 // An escape sequence was encountered that can't be stored in 8 bits. | 692 // An escape sequence was encountered that can't be stored in 8 bits. |
619 // Reset the current character to the start of the URI and re-parse with | 693 // Reset the current character to the start of the URI and re-parse with |
620 // a 16-bit destination. | 694 // a 16-bit destination. |
621 ASSERT(is8BitSource()); | 695 ASSERT(is8BitSource()); |
622 UChar* uriStart16 = currentCharacter16(); | 696 UChar* result16 = getStringBuffer16(peekMaxURILen(uriStart, quote)); |
697 UChar* uriStart16 = result16; | |
623 currentCharacter<CharacterType>() = uriStart; | 698 currentCharacter<CharacterType>() = uriStart; |
624 bool result = parseURIInternal(currentCharacter<CharacterType>(), curren tCharacter16(), quote); | 699 bool result = parseURIInternal(currentCharacter<CharacterType>(), result 16, quote); |
625 ASSERT_UNUSED(result, result); | 700 ASSERT_UNUSED(result, result); |
626 string.init(uriStart16, currentCharacter16() - uriStart16); | 701 string.init(uriStart16, result16 - uriStart16); |
627 } | 702 } |
628 | 703 |
629 currentCharacter<CharacterType>() = uriEnd + 1; | 704 currentCharacter<CharacterType>() = uriEnd + 1; |
630 m_token = URI; | 705 m_token = URI; |
631 } | 706 } |
632 | 707 |
633 template <typename CharacterType> | 708 template <typename CharacterType> |
634 inline bool CSSTokenizer::parseUnicodeRange() | 709 inline bool CSSTokenizer::parseUnicodeRange() |
635 { | 710 { |
636 CharacterType* character = currentCharacter<CharacterType>() + 1; | 711 CharacterType* character = currentCharacter<CharacterType>() + 1; |
(...skipping 889 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1526 m_dataStart16[length - 1] = 0; | 1601 m_dataStart16[length - 1] = 0; |
1527 | 1602 |
1528 m_is8BitSource = false; | 1603 m_is8BitSource = false; |
1529 m_currentCharacter8 = 0; | 1604 m_currentCharacter8 = 0; |
1530 m_currentCharacter16 = m_dataStart16.get(); | 1605 m_currentCharacter16 = m_dataStart16.get(); |
1531 setTokenStart<UChar>(m_currentCharacter16); | 1606 setTokenStart<UChar>(m_currentCharacter16); |
1532 m_lexFunc = &CSSTokenizer::realLex<UChar>; | 1607 m_lexFunc = &CSSTokenizer::realLex<UChar>; |
1533 } | 1608 } |
1534 | 1609 |
1535 } // namespace WebCore | 1610 } // namespace WebCore |
OLD | NEW |