Source/wtf/text/StringImpl.cpp - Issue 1135003004: Make small-caps work correctly with tr locale

Side by Side Diff: Source/wtf/text/StringImpl.cpp

Issue 1135003004: Make small-caps work correctly with tr locale (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master

Patch Set: Patch for landing Created 5 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)	2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)

3 * (C) 1999 Antti Koivisto (koivisto@kde.org)	3 * (C) 1999 Antti Koivisto (koivisto@kde.org)

4 * (C) 2001 Dirk Mueller ( mueller@kde.org )	4 * (C) 2001 Dirk Mueller ( mueller@kde.org )

5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All r ights reserved.	5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All r ights reserved.

6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)	6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)

7 *	7 *

8 * This library is free software; you can redistribute it and/or	8 * This library is free software; you can redistribute it and/or

9 * modify it under the terms of the GNU Library General Public	9 * modify it under the terms of the GNU Library General Public

10 * License as published by the Free Software Foundation; either	10 * License as published by the Free Software Foundation; either

(...skipping 512 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
523	523

524 if (!(ored & ~0x7F)) {	524 if (!(ored & ~0x7F)) {

525 for (int32_t i = 0; i < length; ++i)	525 for (int32_t i = 0; i < length; ++i)

526 data8[i] = toASCIILower(characters8()[i]);	526 data8[i] = toASCIILower(characters8()[i]);

527	527

528 return newImpl.release();	528 return newImpl.release();

529 }	529 }

530	530

531 // Do a slower implementation for cases that include non-ASCII Latin-1 c haracters.	531 // Do a slower implementation for cases that include non-ASCII Latin-1 c haracters.

532 for (int32_t i = 0; i < length; ++i)	532 for (int32_t i = 0; i < length; ++i)

533 data8[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));	533 data8[i] = static_cast<LChar>(toLower(characters8()[i]));
tkent 2015/05/28 00:38:26 I prefer keeping Unicode::. We have Unicode::toLo I prefer keeping Unicode::. We have Unicode::toLower, StringImpl::lower, Unicode::toUpper, StringImpl::upper, StringImpl::toUpper. They are confusing. rwlbuis 2015/05/28 23:03:07 Yes, I should not have included it. My idea was to Show quoted text On 2015/05/28 00:38:26, tkent wrote: > I prefer keeping Unicode::. We have Unicode::toLower, StringImpl::lower, > Unicode::toUpper, StringImpl::upper, StringImpl::toUpper. They are confusing. Yes, I should not have included it. My idea was to make it consistent. Fixed.
534	534

535 return newImpl.release();	535 return newImpl.release();

536 }	536 }

537	537

538 const UChar* end = characters16() + m_length;	538 const UChar* end = characters16() + m_length;

539 for (const UChar* chp = characters16(); chp != end; ++chp) {	539 for (const UChar* chp = characters16(); chp != end; ++chp) {

540 if (UNLIKELY(isASCIIUpper(*chp)))	540 if (UNLIKELY(isASCIIUpper(*chp)))

541 noUpper = false;	541 noUpper = false;

542 ored \|= *chp;	542 ored \|= *chp;

543 }	543 }

(...skipping 13 matching lines...) Expand all Loading...
557 data16[i] = toASCIILower(c);	557 data16[i] = toASCIILower(c);

558 }	558 }

559 return newImpl.release();	559 return newImpl.release();

560 }	560 }

561	561

562 // Do a slower implementation for cases that include non-ASCII characters.	562 // Do a slower implementation for cases that include non-ASCII characters.

563 UChar* data16;	563 UChar* data16;

564 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);	564 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);

565	565

566 bool error;	566 bool error;

567 int32_t realLength = Unicode::toLower(data16, length, characters16(), m_leng th, &error);	567 int32_t realLength = toLower(data16, length, characters16(), m_length, &erro r);

568 if (!error && realLength == length)	568 if (!error && realLength == length)

569 return newImpl.release();	569 return newImpl.release();

570	570

571 newImpl = createUninitialized(realLength, data16);	571 newImpl = createUninitialized(realLength, data16);

572 Unicode::toLower(data16, realLength, characters16(), m_length, &error);	572 toLower(data16, realLength, characters16(), m_length, &error);

573 if (error)	573 if (error)

574 return this;	574 return this;

575 return newImpl.release();	575 return newImpl.release();

576 }	576 }

577	577

578 PassRefPtr<StringImpl> StringImpl::upper()	578 PassRefPtr<StringImpl> StringImpl::upper()

579 {	579 {

580 // This function could be optimized for no-op cases the way lower() is,	580 // This function could be optimized for no-op cases the way lower() is,

581 // but in empirical testing, few actual calls to upper() are no-ops, so	581 // but in empirical testing, few actual calls to upper() are no-ops, so

582 // it wouldn't be worth the extra time for pre-scanning.	582 // it wouldn't be worth the extra time for pre-scanning.

(...skipping 18 matching lines...) Expand all Loading...
601 // Do a slower implementation for cases that include non-ASCII Latin-1 c haracters.	601 // Do a slower implementation for cases that include non-ASCII Latin-1 c haracters.

602 int numberSharpSCharacters = 0;	602 int numberSharpSCharacters = 0;

603	603

604 // There are two special cases.	604 // There are two special cases.

605 // 1. latin-1 characters when converted to upper case are 16 bit charac ters.	605 // 1. latin-1 characters when converted to upper case are 16 bit charac ters.

606 // 2. Lower case sharp-S converts to "SS" (two characters)	606 // 2. Lower case sharp-S converts to "SS" (two characters)

607 for (int32_t i = 0; i < length; ++i) {	607 for (int32_t i = 0; i < length; ++i) {

608 LChar c = characters8()[i];	608 LChar c = characters8()[i];

609 if (UNLIKELY(c == smallLetterSharpSCharacter))	609 if (UNLIKELY(c == smallLetterSharpSCharacter))

610 ++numberSharpSCharacters;	610 ++numberSharpSCharacters;

611 UChar upper = static_cast<UChar>(Unicode::toUpper(c));	611 UChar upper = static_cast<UChar>(toUpper(c));

612 if (UNLIKELY(upper > 0xff)) {	612 if (UNLIKELY(upper > 0xff)) {

613 // Since this upper-cased character does not fit in an 8-bit str ing, we need to take the 16-bit path.	613 // Since this upper-cased character does not fit in an 8-bit str ing, we need to take the 16-bit path.

614 goto upconvert;	614 goto upconvert;

615 }	615 }

616 data8[i] = static_cast<LChar>(upper);	616 data8[i] = static_cast<LChar>(upper);

617 }	617 }

618	618

619 if (!numberSharpSCharacters)	619 if (!numberSharpSCharacters)

620 return newImpl.release();	620 return newImpl.release();

621	621

622 // We have numberSSCharacters sharp-s characters, but none of the other special characters.	622 // We have numberSSCharacters sharp-s characters, but none of the other special characters.

623 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8);	623 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8);

624	624

625 LChar* dest = data8;	625 LChar* dest = data8;

626	626

627 for (int32_t i = 0; i < length; ++i) {	627 for (int32_t i = 0; i < length; ++i) {

628 LChar c = characters8()[i];	628 LChar c = characters8()[i];

629 if (c == smallLetterSharpSCharacter) {	629 if (c == smallLetterSharpSCharacter) {

630 *dest++ = 'S';	630 *dest++ = 'S';

631 *dest++ = 'S';	631 *dest++ = 'S';

632 } else	632 } else {

633 *dest++ = static_cast<LChar>(Unicode::toUpper(c));	633 *dest++ = static_cast<LChar>(toUpper(c));

	634 }

634 }	635 }

635	636

636 return newImpl.release();	637 return newImpl.release();

637 }	638 }

638	639

639 upconvert:	640 upconvert:

640 RefPtr<StringImpl> upconverted = upconvertedString();	641 RefPtr<StringImpl> upconverted = upconvertedString();

641 const UChar* source16 = upconverted->characters16();	642 const UChar* source16 = upconverted->characters16();

642	643

643 UChar* data16;	644 UChar* data16;

644 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);	645 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);

645	646

646 // Do a faster loop for the case where all the characters are ASCII.	647 // Do a faster loop for the case where all the characters are ASCII.

647 UChar ored = 0;	648 UChar ored = 0;

648 for (int i = 0; i < length; ++i) {	649 for (int i = 0; i < length; ++i) {

649 UChar c = source16[i];	650 UChar c = source16[i];

650 ored \|= c;	651 ored \|= c;

651 data16[i] = toASCIIUpper(c);	652 data16[i] = toASCIIUpper(c);

652 }	653 }

653 if (!(ored & ~0x7F))	654 if (!(ored & ~0x7F))

654 return newImpl.release();	655 return newImpl.release();

655	656

656 // Do a slower implementation for cases that include non-ASCII characters.	657 // Do a slower implementation for cases that include non-ASCII characters.

657 bool error;	658 bool error;

658 int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &e rror);	659 int32_t realLength = toUpper(data16, length, source16, m_length, &error);

659 if (!error && realLength == length)	660 if (!error && realLength == length)

660 return newImpl;	661 return newImpl;

661 newImpl = createUninitialized(realLength, data16);	662 newImpl = createUninitialized(realLength, data16);

662 Unicode::toUpper(data16, realLength, source16, m_length, &error);	663 toUpper(data16, realLength, source16, m_length, &error);

663 if (error)	664 if (error)

664 return this;	665 return this;

665 return newImpl.release();	666 return newImpl.release();

666 }	667 }

667	668

668 static bool inline localeIdMatchesLang(const AtomicString& localeId, const char* lang)	669 static bool inline localeIdMatchesLang(const AtomicString& localeId, const char* lang)

669 {	670 {

670 if (equalIgnoringCase(localeId, lang))	671 if (equalIgnoringCase(localeId, lang))

671 return true;	672 return true;

672 static char localeIdPrefix[4];	673 static char localeIdPrefix[4];

(...skipping 129 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
802 LChar c = characters8()[i];	803 LChar c = characters8()[i];

803 data[i] = toASCIILower(c);	804 data[i] = toASCIILower(c);

804 ored \|= c;	805 ored \|= c;

805 }	806 }

806	807

807 if (!(ored & ~0x7F))	808 if (!(ored & ~0x7F))

808 return newImpl.release();	809 return newImpl.release();

809	810

810 // Do a slower implementation for cases that include non-ASCII Latin-1 c haracters.	811 // Do a slower implementation for cases that include non-ASCII Latin-1 c haracters.

811 for (int32_t i = 0; i < length; ++i)	812 for (int32_t i = 0; i < length; ++i)

812 data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));	813 data[i] = static_cast<LChar>(toLower(characters8()[i]));

813	814

814 return newImpl.release();	815 return newImpl.release();

815 }	816 }

816	817

817 // Do a faster loop for the case where all the characters are ASCII.	818 // Do a faster loop for the case where all the characters are ASCII.

818 UChar* data;	819 UChar* data;

819 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);	820 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);

820 UChar ored = 0;	821 UChar ored = 0;

821 for (int32_t i = 0; i < length; ++i) {	822 for (int32_t i = 0; i < length; ++i) {

822 UChar c = characters16()[i];	823 UChar c = characters16()[i];

(...skipping 1264 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2087 }	2088 }

2088	2089

2089 size_t StringImpl::sizeInBytes() const	2090 size_t StringImpl::sizeInBytes() const

2090 {	2091 {

2091 size_t size = length();	2092 size_t size = length();

2092 if (!is8Bit())	2093 if (!is8Bit())

2093 size *= 2;	2094 size *= 2;

2094 return size + sizeof(*this);	2095 return size + sizeof(*this);

2095 }	2096 }

2096	2097

	2098 UChar32 toUpper(UChar32 c, const AtomicString& localeIdentifier)

	2099 {

	2100 if (!localeIdentifier.isNull()) {

	2101 if (localeIdMatchesLang(localeIdentifier, "tr") \|\| localeIdMatchesLang(l ocaleIdentifier, "az")) {

	2102 if (c == 'i')

	2103 return 0x130; // Latin capital letter i with dot above
	tkent 2015/05/28 00:38:26 Please add latinCapitalLetterIWithDotAbove to wtf/ Please add latinCapitalLetterIWithDotAbove to wtf/unicode/CharacterNames.h. rwlbuis 2015/05/28 23:03:08 Done. Show quoted text On 2015/05/28 00:38:26, tkent wrote: > Please add latinCapitalLetterIWithDotAbove to wtf/unicode/CharacterNames.h. Done.
	2104 if (c == 0x131) // Latin small letter dotless i
	tkent 2015/05/28 00:38:26 Please add latinSmallLetterDotlessI to wtf/unicode Please add latinSmallLetterDotlessI to wtf/unicode/CharacterNames.h. rwlbuis 2015/05/28 23:03:07 Done. Show quoted text On 2015/05/28 00:38:26, tkent wrote: > Please add latinSmallLetterDotlessI to wtf/unicode/CharacterNames.h. Done.
	2105 return 'I';

	2106 } else if (localeIdMatchesLang(localeIdentifier, "lt")) {

	2107 // FIXME
	tkent 2015/05/28 00:38:26 FIXME -> TODO(rob.buis): blah blah FIXME -> TODO(rob.buis): blah blah rwlbuis 2015/05/28 23:03:07 In this case, I don't think I want to put my name Show quoted text On 2015/05/28 00:38:26, tkent wrote: > FIXME -> TODO(rob.buis): blah blah In this case, I don't think I want to put my name there. I would not mind working on it, but I don't know the language and don't know of a bug/testcase for it. It is more a reminder that we'll have to do something based on StringImpl::upper. tkent 2015/05/29 05:49:50 Please add TODO(rub.buis). It doesn't mean you'll Show quoted text On 2015/05/28 23:03:07, rwlbuis wrote: > On 2015/05/28 00:38:26, tkent wrote: > > FIXME -> TODO(rob.buis): blah blah > > In this case, I don't think I want to put my name there. I would not mind > working on it, but I don't know the language and don't know of a bug/testcase > for it. It is more a reminder that we'll have to do something based on > StringImpl::upper. Please add TODO(rub.buis). It doesn't mean you'll fix it. Please refer to the discussion on blink-dev.
	2108 }

	2109 }

	2110

	2111 return toUpper(c);

	2112 }

	2113

2097 } // namespace WTF	2114 } // namespace WTF

OLD	NEW

« Source/wtf/text/StringImpl.h ('K') | « Source/wtf/text/StringImpl.h ('k') | no next file » | no next file with comments »