Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(448)

Unified Diff: Source/wtf/text/StringImpl.cpp

Issue 313993002: Bindings: Add ScalarValueString support (Closed) Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: Incorporate review feedback Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: Source/wtf/text/StringImpl.cpp
diff --git a/Source/wtf/text/StringImpl.cpp b/Source/wtf/text/StringImpl.cpp
index 838b6715fcdce7c992b8a90e4fcfaab69c36ac77..b29005523e2d40f46ee94b2741cf4b43bc5babea 100644
--- a/Source/wtf/text/StringImpl.cpp
+++ b/Source/wtf/text/StringImpl.cpp
@@ -1893,6 +1893,120 @@ PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* repl
return newImpl.release();
}
+bool StringImpl::hasUnmatchedSurrogates() const
+{
+ // By definition, 8-bit strings are confined to the Latin-1 code page and
+ // have no surrogates, matched or otherwise.
+ if (is8Bit())
+ return false;
+
+ const UChar* characters = characters16();
+ const unsigned length = m_length;
+
+ for (unsigned i = 0; i < length; ++i) {
+ UChar c = characters[i];
+ if (c < 0xD800 || c > 0xDFFF) {
tkent 2014/06/16 07:51:16 !U16_IS_SURROGATE(c)
jsbell 2014/06/17 21:39:50 Done - used U16_XXX macros throughout. Also, tight
+ // Non-surrogate
+ continue;
+ }
+ if (0xDC00 <= c && c <= 0xDFFF) {
tkent 2014/06/16 07:51:16 U16_IS_TRAIL(c)
+ // Unmatched trail surrogate.
+ return true;
+ }
Nils Barth (inactive) 2014/06/16 07:08:28 Want to add something like: // Lead surrogate. //
+ if (i == length - 1) {
+ // Unmatched lead surrogate at EOF.
+ return true;
+ }
+ UChar d = characters[i + 1];
+ if (0xDC00 <= d && d <= 0xDFFF) {
tkent 2014/06/16 07:51:16 U16_IS_TRAIL(d)
+ // Matching trail surrogate.
+ ++i;
+ continue;
+ }
+ // Unmatched lead.
+ return true;
+ }
+ return false;
+}
+
+PassRefPtr<StringImpl> StringImpl::replaceUnmatchedSurrogates()
+{
+ // This roughly implements http://heycam.github.io/webidl/#dfn-obtain-unicode
+ // but the output is still a sequence of 16-bit code units, effectively
+ // re-encoding to UTF-16 after performing the replacements.
+
+ // The concepts of surrogate pairs are explained at:
+ // http://www.unicode.org/versions/Unicode6.2.0/ch03.pdf#G2630
+
+ // Blink-specific optimization to avoid making an unnecessary copy.
+ if (!hasUnmatchedSurrogates())
+ return this;
+ ASSERT(!is8Bit());
+
+ // 1. Let S be the DOMString value.
+ const UChar* s = characters16();
+
+ // 2. Let n be the length of S.
+ const unsigned n = m_length;
+
+ // 3. Initialize i to 0.
+ unsigned i = 0;
+
+ // 4. Initialize U to be an empty sequence of Unicode characters.
+ // (Blink: we just use an array of UTF-16 code units.)
+ UChar* u;
+ RefPtr<StringImpl> newImpl = createUninitialized(n, u);
+
+ // 5. While i < n:
+ while (i < n) {
+ // 1. Let c be the code unit in S at index i.
+ UChar c = s[i];
+ // 2. Depending on the value of c:
+ if (c < 0xD800 || c > 0xDFFF) {
+ // c < 0xD800 or c > 0xDFFF
+ // Append to U the Unicode character with code point c.
+ u[i] = c;
+ } else if (0xDC00 <= c && c <= 0xDFFF) {
+ // 0xDC00 <= c <= 0xDFFF
+ // Append to U a U+FFFD REPLACEMENT CHARACTER.
+ u[i] = Unicode::replacementCharacter;
+ } else {
+ // 0xD800 <= c <= 0xDBFF
+ ASSERT(0xD800 <= c && c <= 0xDBFF);
+ if (i == n - 1) {
+ // 1. If i = n−1, then append to U a U+FFFD REPLACEMENT CHARACTER.
+ u[i] = Unicode::replacementCharacter;
+ } else {
+ // 2. Otherwise, i < n−1:
+ ASSERT(i < n - 1);
+ // ..1. Let d be the code unit in S at index i+1.
+ UChar d = s[i + 1];
+ if (0xDC00 <= d && d <= 0xDFFF) {
+ // 2. If 0xDC00 ≤ d ≤ 0xDFFF, then:
+ // ..1. Let a be c & 0x3FF.
+ // ..2. Let b be d & 0x3FF.
+ // ..3. Append to U the Unicode character with code point 2^16+2^10*a+b.
+ // (Blink: Just pass through the UTF-16 code units rather than
+ // decoding to a Unicode scalar value then re-encoding.)
+ u[i] = c;
+ u[i + 1] = d;
+ // ..4. Set i to i+1.
+ ++i;
+ } else {
+ // 3. Otherwise, d < 0xDC00 or d > 0xDFFF. Append to U a U+FFFD REPLACEMENT CHARACTER.
+ ASSERT(d < 0xD800 || d > 0xDFFF);
+ u[i] = Unicode::replacementCharacter;
+ }
+ }
+ }
+ // 3. Set i to i+1.
+ ++i;
+ }
+
+ // 6. Return U.
+ return newImpl.release();
+}
+
PassRefPtr<StringImpl> StringImpl::upconvertedString()
{
if (is8Bit())

Powered by Google App Engine
This is Rietveld 408576698