Chromium Code Reviews| Index: Source/bindings/v8/V8Binding.cpp |
| diff --git a/Source/bindings/v8/V8Binding.cpp b/Source/bindings/v8/V8Binding.cpp |
| index 88192b78c1754962653549e4a784cd6ae0fe0b84..c4f3d9bba3d9babafbc3fd2e11adc3d22f39e49a 100644 |
| --- a/Source/bindings/v8/V8Binding.cpp |
| +++ b/Source/bindings/v8/V8Binding.cpp |
| @@ -67,6 +67,8 @@ |
| #include "wtf/text/StringBuffer.h" |
| #include "wtf/text/StringHash.h" |
| #include "wtf/text/WTFString.h" |
| +#include "wtf/unicode/CharacterNames.h" |
| +#include "wtf/unicode/Unicode.h" |
| namespace WebCore { |
| @@ -499,6 +501,8 @@ String toByteString(v8::Handle<v8::Value> value, ExceptionState& exceptionState) |
| return String(); |
| // From the Web IDL spec: http://heycam.github.io/webidl/#es-ByteString |
| + if (value.IsEmpty()) |
| + return String(); |
| // 1. Let x be ToString(v) |
| TONATIVE_DEFAULT_EXCEPTIONSTATE(v8::Local<v8::String>, stringObject, value->ToString(), exceptionState, String()); |
| @@ -517,6 +521,124 @@ String toByteString(v8::Handle<v8::Value> value, ExceptionState& exceptionState) |
| return x; |
| } |
| +static bool hasUnmatchedSurrogates(const String& string) |
| +{ |
| + // By definition, 8-bit strings are confined to the Latin-1 code page and |
| + // have no surrogates, matched or otherwise. |
| + if (string.is8Bit()) |
| + return false; |
| + |
| + const UChar* characters = string.characters16(); |
| + const unsigned length = string.length(); |
| + |
| + for (unsigned i = 0; i < length; ++i) { |
| + UChar c = characters[i]; |
| + if (U16_IS_SINGLE(c)) |
| + continue; |
| + if (U16_IS_TRAIL(c)) |
| + return true; |
| + ASSERT(U16_IS_LEAD(c)); |
| + if (i == length - 1) |
| + return true; |
| + UChar d = characters[i + 1]; |
| + if (!U16_IS_TRAIL(d)) |
| + return true; |
| + ++i; |
| + } |
| + return false; |
| +} |
| + |
| +static String convertDOMStringToSequenceOfUnicodeCharacters(const String& string) |
|
Nils Barth (inactive)
2014/06/18 03:25:26
I admire your adherence to spec, but you're right,
haraken
2014/06/18 04:57:33
I don't have a strong opinion here, since this met
jsbell
2014/06/18 17:25:19
I'll go back to 'replaceUnmatchedSurrogates' and a
|
| +{ |
| + // This roughly implements http://heycam.github.io/webidl/#dfn-obtain-unicode |
| + // but the output is still a sequence of 16-bit code units, effectively |
|
Nils Barth (inactive)
2014/06/18 03:25:26
Is this still true, now that you're using StringBu
jsbell
2014/06/18 17:25:19
Yes. Blink strings are 16-bit internally, this jus
Nils Barth (inactive)
2014/06/20 00:57:54
Thanks for explanation!
On 2014/06/18 17:25:19, j
|
| + // re-encoding to UTF-16 after performing the replacements. |
| + |
| + // The concepts of surrogate pairs are explained at: |
| + // http://www.unicode.org/versions/Unicode6.2.0/ch03.pdf#G2630 |
| + |
| + // Blink-specific optimization to avoid making an unnecessary copy. |
| + if (!hasUnmatchedSurrogates(string)) |
| + return string; |
| + ASSERT(!string.is8Bit()); |
| + |
| + // 1. Let S be the DOMString value. |
| + const UChar* s = string.characters16(); |
| + |
| + // 2. Let n be the length of S. |
| + const unsigned n = string.length(); |
| + |
| + // 3. Initialize i to 0. |
| + unsigned i = 0; |
| + |
| + // 4. Initialize U to be an empty sequence of Unicode characters. |
| + StringBuilder u; |
| + u.reserveCapacity(n); |
| + |
| + // 5. While i < n: |
| + while (i < n) { |
| + // 1. Let c be the code unit in S at index i. |
| + UChar c = s[i]; |
| + // 2. Depending on the value of c: |
| + if (U16_IS_SINGLE(c)) { |
| + // c < 0xD800 or c > 0xDFFF |
| + // Append to U the Unicode character with code point c. |
| + u.append(c); |
| + } else if (U16_IS_TRAIL(c)) { |
| + // 0xDC00 <= c <= 0xDFFF |
| + // Append to U a U+FFFD REPLACEMENT CHARACTER. |
| + u.append(WTF::Unicode::replacementCharacter); |
| + } else { |
| + // 0xD800 <= c <= 0xDBFF |
| + ASSERT(U16_IS_LEAD(c)); |
| + if (i == n - 1) { |
| + // 1. If i = n−1, then append to U a U+FFFD REPLACEMENT CHARACTER. |
| + u.append(WTF::Unicode::replacementCharacter); |
| + } else { |
| + // 2. Otherwise, i < n−1: |
| + ASSERT(i < n - 1); |
| + // ....1. Let d be the code unit in S at index i+1. |
| + UChar d = s[i + 1]; |
| + if (U16_IS_TRAIL(d)) { |
| + // 2. If 0xDC00 <= d <= 0xDFFF, then: |
| + // ..1. Let a be c & 0x3FF. |
| + unsigned a = c & 0x3FF; |
| + // ..2. Let b be d & 0x3FF. |
| + unsigned b = d & 0x3FF; |
| + // ..3. Append to U the Unicode character with code point 2^16+2^10*a+b. |
| + u.append(static_cast<UChar32>((1 << 16) + (a << 10) + b)); |
|
jsbell
2014/06/17 21:39:50
Since this is using StringBuilder, it's now "per s
tkent
2014/06/17 23:38:54
U16_GET_SUPPLEMENTARY(c, d) ?
Nils Barth (inactive)
2014/06/18 03:25:26
+1!
jsbell
2014/06/18 17:25:19
Will do!
|
| + // Blink: This is equivalent to u.append(c); u.append(d); |
| + ++i; |
| + } else { |
| + // 3. Otherwise, d < 0xDC00 or d > 0xDFFF. Append to U a U+FFFD REPLACEMENT CHARACTER. |
| + u.append(WTF::Unicode::replacementCharacter); |
| + } |
| + } |
| + } |
| + // 3. Set i to i+1. |
| + ++i; |
| + } |
| + |
| + // 6. Return U. |
| + ASSERT(u.length() == string.length()); |
| + return u.toString(); |
| +} |
| + |
| +String toScalarValueString(v8::Handle<v8::Value> value, ExceptionState& exceptionState) |
| +{ |
| + // From the Encoding standard (with a TODO to move to Web IDL): |
| + // http://encoding.spec.whatwg.org/#type-scalarvaluestring |
| + if (value.IsEmpty()) |
| + return String(); |
| + TONATIVE_DEFAULT_EXCEPTIONSTATE(v8::Local<v8::String>, stringObject, value->ToString(), exceptionState, String()); |
| + |
| + // ScalarValueString is identical to DOMString except that "convert a |
| + // DOMString to a sequence of Unicode characters" is used subsequently |
| + // when converting to an IDL value |
| + String x = toCoreString(stringObject); |
| + return convertDOMStringToSequenceOfUnicodeCharacters(x); |
| +} |
| + |
| PassRefPtrWillBeRawPtr<XPathNSResolver> toXPathNSResolver(v8::Handle<v8::Value> value, v8::Isolate* isolate) |
| { |
| RefPtrWillBeRawPtr<XPathNSResolver> resolver = nullptr; |