Index: Source/wtf/text/StringImpl.cpp |
diff --git a/Source/wtf/text/StringImpl.cpp b/Source/wtf/text/StringImpl.cpp |
index 838b6715fcdce7c992b8a90e4fcfaab69c36ac77..8ecf0face368396acbd9e95aff9ebab5bddf7e43 100644 |
--- a/Source/wtf/text/StringImpl.cpp |
+++ b/Source/wtf/text/StringImpl.cpp |
@@ -1893,6 +1893,74 @@ PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* repl |
return newImpl.release(); |
} |
+PassRefPtr<StringImpl> StringImpl::replaceUnpairedSurrogates() |
Nils Barth (inactive)
2014/06/11 03:54:26
Could you link to spec, and quote spec in comments
jsbell
2014/06/12 17:45:55
Done.
|
+{ |
+ if (is8Bit()) |
+ return this; |
+ |
+ const UChar* characters = characters16(); |
+ const unsigned length = m_length; |
+ |
+ // Scan to see if there are any unmatched surrogates. |
Nils Barth (inactive)
2014/06/11 03:54:26
Could you comment that this is a Blink-specific op
jsbell
2014/06/12 17:45:55
Done.
|
+ bool unmatched = false; |
Nils Barth (inactive)
2014/06/11 03:54:26
This block would be a bit simpler as a helper func
jsbell
2014/06/12 17:45:55
Done.
|
+ for (unsigned i = 0; i < length; ++i) { |
+ UChar c = characters[i]; |
+ if (c < 0xD800 || c > 0xDFFF) { |
Nils Barth (inactive)
2014/06/11 03:54:26
Understand "want to match below", though style-wis
jsbell
2014/06/12 17:45:56
Done.
|
+ // Non-surrogate - no-op. |
+ } else if (0xDC00 <= c && c <= 0xDFFF) { |
+ // Unmatched trail surrogate. |
+ unmatched = true; |
+ break; |
+ } else if (i == length - 1) { |
+ // Unmatched lead surrogate at EOF. |
+ unmatched = true; |
+ break; |
+ } else { |
+ UChar d = characters[i + 1]; |
+ if (0xDC00 <= d && d <= 0xDFFF) { |
+ // Matching trail surrogate. |
+ ++i; |
+ continue; |
+ } |
+ // Unmatched lead. |
+ unmatched = true; |
+ break; |
+ } |
+ } |
+ if (!unmatched) |
+ return this; |
+ |
+ UChar* data; |
+ RefPtr<StringImpl> newImpl = createUninitialized(length, data); |
+ for (unsigned i = 0; i < length; ++i) { |
+ UChar c = characters[i]; |
+ if (c < 0xD800 || c > 0xDFFF) { |
+ // Non-surrogate. |
+ data[i] = c; |
+ } else if (0xDC00 <= c && c <= 0xDFFF) { |
+ // Unmatched trail surrogate. |
+ data[i] = Unicode::replacementCharacter; |
+ } else if (i == length - 1) { |
Nils Barth (inactive)
2014/06/11 03:54:26
Could you add a comment stating:
// 0xD800 <= c &&
jsbell
2014/06/12 17:45:56
I added ASSERTs instead.
|
+ // Unmatched lead surrogate at EOF. |
+ data[i] = Unicode::replacementCharacter; |
+ } else { |
+ UChar d = characters[i + 1]; |
+ if (0xDC00 <= d && d <= 0xDFFF) { |
+ // Matching trail surrogate. |
+ data[i] = c; |
Nils Barth (inactive)
2014/06/11 03:54:26
Could you comment that we're not translating the s
jsbell
2014/06/12 17:45:55
Done.
|
+ data[i + 1] = d; |
+ ++i; |
+ } else { |
+ // Unmatched lead. |
+ data[i] = Unicode::replacementCharacter; |
+ } |
+ } |
+ } |
+ |
+ return newImpl.release(); |
+} |
+ |
Nils Barth (inactive)
2014/06/11 03:54:26
nit: one blank line, right?
jsbell
2014/06/12 17:45:55
Done.
|
+ |
PassRefPtr<StringImpl> StringImpl::upconvertedString() |
{ |
if (is8Bit()) |