Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(205)

Side by Side Diff: Source/wtf/text/StringImpl.cpp

Issue 1135003004: Make small-caps work correctly with tr locale (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master
Patch Set: Patch for landing Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« Source/wtf/text/StringImpl.h ('K') | « Source/wtf/text/StringImpl.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) 2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * (C) 1999 Antti Koivisto (koivisto@kde.org) 3 * (C) 1999 Antti Koivisto (koivisto@kde.org)
4 * (C) 2001 Dirk Mueller ( mueller@kde.org ) 4 * (C) 2001 Dirk Mueller ( mueller@kde.org )
5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All r ights reserved. 5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All r ights reserved.
6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net) 6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
7 * 7 *
8 * This library is free software; you can redistribute it and/or 8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public 9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either 10 * License as published by the Free Software Foundation; either
(...skipping 512 matching lines...) Expand 10 before | Expand all | Expand 10 after
523 523
524 if (!(ored & ~0x7F)) { 524 if (!(ored & ~0x7F)) {
525 for (int32_t i = 0; i < length; ++i) 525 for (int32_t i = 0; i < length; ++i)
526 data8[i] = toASCIILower(characters8()[i]); 526 data8[i] = toASCIILower(characters8()[i]);
527 527
528 return newImpl.release(); 528 return newImpl.release();
529 } 529 }
530 530
531 // Do a slower implementation for cases that include non-ASCII Latin-1 c haracters. 531 // Do a slower implementation for cases that include non-ASCII Latin-1 c haracters.
532 for (int32_t i = 0; i < length; ++i) 532 for (int32_t i = 0; i < length; ++i)
533 data8[i] = static_cast<LChar>(Unicode::toLower(characters8()[i])); 533 data8[i] = static_cast<LChar>(toLower(characters8()[i]));
tkent 2015/05/28 00:38:26 I prefer keeping Unicode::. We have Unicode::toLo
rwlbuis 2015/05/28 23:03:07 Yes, I should not have included it. My idea was to
534 534
535 return newImpl.release(); 535 return newImpl.release();
536 } 536 }
537 537
538 const UChar* end = characters16() + m_length; 538 const UChar* end = characters16() + m_length;
539 for (const UChar* chp = characters16(); chp != end; ++chp) { 539 for (const UChar* chp = characters16(); chp != end; ++chp) {
540 if (UNLIKELY(isASCIIUpper(*chp))) 540 if (UNLIKELY(isASCIIUpper(*chp)))
541 noUpper = false; 541 noUpper = false;
542 ored |= *chp; 542 ored |= *chp;
543 } 543 }
(...skipping 13 matching lines...) Expand all
557 data16[i] = toASCIILower(c); 557 data16[i] = toASCIILower(c);
558 } 558 }
559 return newImpl.release(); 559 return newImpl.release();
560 } 560 }
561 561
562 // Do a slower implementation for cases that include non-ASCII characters. 562 // Do a slower implementation for cases that include non-ASCII characters.
563 UChar* data16; 563 UChar* data16;
564 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); 564 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
565 565
566 bool error; 566 bool error;
567 int32_t realLength = Unicode::toLower(data16, length, characters16(), m_leng th, &error); 567 int32_t realLength = toLower(data16, length, characters16(), m_length, &erro r);
568 if (!error && realLength == length) 568 if (!error && realLength == length)
569 return newImpl.release(); 569 return newImpl.release();
570 570
571 newImpl = createUninitialized(realLength, data16); 571 newImpl = createUninitialized(realLength, data16);
572 Unicode::toLower(data16, realLength, characters16(), m_length, &error); 572 toLower(data16, realLength, characters16(), m_length, &error);
573 if (error) 573 if (error)
574 return this; 574 return this;
575 return newImpl.release(); 575 return newImpl.release();
576 } 576 }
577 577
578 PassRefPtr<StringImpl> StringImpl::upper() 578 PassRefPtr<StringImpl> StringImpl::upper()
579 { 579 {
580 // This function could be optimized for no-op cases the way lower() is, 580 // This function could be optimized for no-op cases the way lower() is,
581 // but in empirical testing, few actual calls to upper() are no-ops, so 581 // but in empirical testing, few actual calls to upper() are no-ops, so
582 // it wouldn't be worth the extra time for pre-scanning. 582 // it wouldn't be worth the extra time for pre-scanning.
(...skipping 18 matching lines...) Expand all
601 // Do a slower implementation for cases that include non-ASCII Latin-1 c haracters. 601 // Do a slower implementation for cases that include non-ASCII Latin-1 c haracters.
602 int numberSharpSCharacters = 0; 602 int numberSharpSCharacters = 0;
603 603
604 // There are two special cases. 604 // There are two special cases.
605 // 1. latin-1 characters when converted to upper case are 16 bit charac ters. 605 // 1. latin-1 characters when converted to upper case are 16 bit charac ters.
606 // 2. Lower case sharp-S converts to "SS" (two characters) 606 // 2. Lower case sharp-S converts to "SS" (two characters)
607 for (int32_t i = 0; i < length; ++i) { 607 for (int32_t i = 0; i < length; ++i) {
608 LChar c = characters8()[i]; 608 LChar c = characters8()[i];
609 if (UNLIKELY(c == smallLetterSharpSCharacter)) 609 if (UNLIKELY(c == smallLetterSharpSCharacter))
610 ++numberSharpSCharacters; 610 ++numberSharpSCharacters;
611 UChar upper = static_cast<UChar>(Unicode::toUpper(c)); 611 UChar upper = static_cast<UChar>(toUpper(c));
612 if (UNLIKELY(upper > 0xff)) { 612 if (UNLIKELY(upper > 0xff)) {
613 // Since this upper-cased character does not fit in an 8-bit str ing, we need to take the 16-bit path. 613 // Since this upper-cased character does not fit in an 8-bit str ing, we need to take the 16-bit path.
614 goto upconvert; 614 goto upconvert;
615 } 615 }
616 data8[i] = static_cast<LChar>(upper); 616 data8[i] = static_cast<LChar>(upper);
617 } 617 }
618 618
619 if (!numberSharpSCharacters) 619 if (!numberSharpSCharacters)
620 return newImpl.release(); 620 return newImpl.release();
621 621
622 // We have numberSSCharacters sharp-s characters, but none of the other special characters. 622 // We have numberSSCharacters sharp-s characters, but none of the other special characters.
623 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8); 623 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8);
624 624
625 LChar* dest = data8; 625 LChar* dest = data8;
626 626
627 for (int32_t i = 0; i < length; ++i) { 627 for (int32_t i = 0; i < length; ++i) {
628 LChar c = characters8()[i]; 628 LChar c = characters8()[i];
629 if (c == smallLetterSharpSCharacter) { 629 if (c == smallLetterSharpSCharacter) {
630 *dest++ = 'S'; 630 *dest++ = 'S';
631 *dest++ = 'S'; 631 *dest++ = 'S';
632 } else 632 } else {
633 *dest++ = static_cast<LChar>(Unicode::toUpper(c)); 633 *dest++ = static_cast<LChar>(toUpper(c));
634 }
634 } 635 }
635 636
636 return newImpl.release(); 637 return newImpl.release();
637 } 638 }
638 639
639 upconvert: 640 upconvert:
640 RefPtr<StringImpl> upconverted = upconvertedString(); 641 RefPtr<StringImpl> upconverted = upconvertedString();
641 const UChar* source16 = upconverted->characters16(); 642 const UChar* source16 = upconverted->characters16();
642 643
643 UChar* data16; 644 UChar* data16;
644 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); 645 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
645 646
646 // Do a faster loop for the case where all the characters are ASCII. 647 // Do a faster loop for the case where all the characters are ASCII.
647 UChar ored = 0; 648 UChar ored = 0;
648 for (int i = 0; i < length; ++i) { 649 for (int i = 0; i < length; ++i) {
649 UChar c = source16[i]; 650 UChar c = source16[i];
650 ored |= c; 651 ored |= c;
651 data16[i] = toASCIIUpper(c); 652 data16[i] = toASCIIUpper(c);
652 } 653 }
653 if (!(ored & ~0x7F)) 654 if (!(ored & ~0x7F))
654 return newImpl.release(); 655 return newImpl.release();
655 656
656 // Do a slower implementation for cases that include non-ASCII characters. 657 // Do a slower implementation for cases that include non-ASCII characters.
657 bool error; 658 bool error;
658 int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &e rror); 659 int32_t realLength = toUpper(data16, length, source16, m_length, &error);
659 if (!error && realLength == length) 660 if (!error && realLength == length)
660 return newImpl; 661 return newImpl;
661 newImpl = createUninitialized(realLength, data16); 662 newImpl = createUninitialized(realLength, data16);
662 Unicode::toUpper(data16, realLength, source16, m_length, &error); 663 toUpper(data16, realLength, source16, m_length, &error);
663 if (error) 664 if (error)
664 return this; 665 return this;
665 return newImpl.release(); 666 return newImpl.release();
666 } 667 }
667 668
668 static bool inline localeIdMatchesLang(const AtomicString& localeId, const char* lang) 669 static bool inline localeIdMatchesLang(const AtomicString& localeId, const char* lang)
669 { 670 {
670 if (equalIgnoringCase(localeId, lang)) 671 if (equalIgnoringCase(localeId, lang))
671 return true; 672 return true;
672 static char localeIdPrefix[4]; 673 static char localeIdPrefix[4];
(...skipping 129 matching lines...) Expand 10 before | Expand all | Expand 10 after
802 LChar c = characters8()[i]; 803 LChar c = characters8()[i];
803 data[i] = toASCIILower(c); 804 data[i] = toASCIILower(c);
804 ored |= c; 805 ored |= c;
805 } 806 }
806 807
807 if (!(ored & ~0x7F)) 808 if (!(ored & ~0x7F))
808 return newImpl.release(); 809 return newImpl.release();
809 810
810 // Do a slower implementation for cases that include non-ASCII Latin-1 c haracters. 811 // Do a slower implementation for cases that include non-ASCII Latin-1 c haracters.
811 for (int32_t i = 0; i < length; ++i) 812 for (int32_t i = 0; i < length; ++i)
812 data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i])); 813 data[i] = static_cast<LChar>(toLower(characters8()[i]));
813 814
814 return newImpl.release(); 815 return newImpl.release();
815 } 816 }
816 817
817 // Do a faster loop for the case where all the characters are ASCII. 818 // Do a faster loop for the case where all the characters are ASCII.
818 UChar* data; 819 UChar* data;
819 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 820 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
820 UChar ored = 0; 821 UChar ored = 0;
821 for (int32_t i = 0; i < length; ++i) { 822 for (int32_t i = 0; i < length; ++i) {
822 UChar c = characters16()[i]; 823 UChar c = characters16()[i];
(...skipping 1264 matching lines...) Expand 10 before | Expand all | Expand 10 after
2087 } 2088 }
2088 2089
2089 size_t StringImpl::sizeInBytes() const 2090 size_t StringImpl::sizeInBytes() const
2090 { 2091 {
2091 size_t size = length(); 2092 size_t size = length();
2092 if (!is8Bit()) 2093 if (!is8Bit())
2093 size *= 2; 2094 size *= 2;
2094 return size + sizeof(*this); 2095 return size + sizeof(*this);
2095 } 2096 }
2096 2097
2098 UChar32 toUpper(UChar32 c, const AtomicString& localeIdentifier)
2099 {
2100 if (!localeIdentifier.isNull()) {
2101 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(l ocaleIdentifier, "az")) {
2102 if (c == 'i')
2103 return 0x130; // Latin capital letter i with dot above
tkent 2015/05/28 00:38:26 Please add latinCapitalLetterIWithDotAbove to wtf/
rwlbuis 2015/05/28 23:03:08 Done.
2104 if (c == 0x131) // Latin small letter dotless i
tkent 2015/05/28 00:38:26 Please add latinSmallLetterDotlessI to wtf/unicode
rwlbuis 2015/05/28 23:03:07 Done.
2105 return 'I';
2106 } else if (localeIdMatchesLang(localeIdentifier, "lt")) {
2107 // FIXME
tkent 2015/05/28 00:38:26 FIXME -> TODO(rob.buis): blah blah
rwlbuis 2015/05/28 23:03:07 In this case, I don't think I want to put my name
tkent 2015/05/29 05:49:50 Please add TODO(rub.buis). It doesn't mean you'll
2108 }
2109 }
2110
2111 return toUpper(c);
2112 }
2113
2097 } // namespace WTF 2114 } // namespace WTF
OLDNEW
« Source/wtf/text/StringImpl.h ('K') | « Source/wtf/text/StringImpl.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698