Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) | 2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) |
| 3 * (C) 1999 Antti Koivisto (koivisto@kde.org) | 3 * (C) 1999 Antti Koivisto (koivisto@kde.org) |
| 4 * (C) 2001 Dirk Mueller ( mueller@kde.org ) | 4 * (C) 2001 Dirk Mueller ( mueller@kde.org ) |
| 5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All r ights reserved. | 5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All r ights reserved. |
| 6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net) | 6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net) |
| 7 * | 7 * |
| 8 * This library is free software; you can redistribute it and/or | 8 * This library is free software; you can redistribute it and/or |
| 9 * modify it under the terms of the GNU Library General Public | 9 * modify it under the terms of the GNU Library General Public |
| 10 * License as published by the Free Software Foundation; either | 10 * License as published by the Free Software Foundation; either |
| (...skipping 1875 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1886 } else { | 1886 } else { |
| 1887 // Cases 2 & 4. | 1887 // Cases 2 & 4. |
| 1888 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLen gth * sizeof(UChar)); | 1888 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLen gth * sizeof(UChar)); |
| 1889 } | 1889 } |
| 1890 | 1890 |
| 1891 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); | 1891 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); |
| 1892 | 1892 |
| 1893 return newImpl.release(); | 1893 return newImpl.release(); |
| 1894 } | 1894 } |
| 1895 | 1895 |
| 1896 bool StringImpl::hasUnmatchedSurrogates() const | |
| 1897 { | |
| 1898 // By definition, 8-bit strings are confined to the Latin-1 code page and | |
| 1899 // have no surrogates, matched or otherwise. | |
| 1900 if (is8Bit()) | |
| 1901 return false; | |
| 1902 | |
| 1903 const UChar* characters = characters16(); | |
| 1904 const unsigned length = m_length; | |
| 1905 | |
| 1906 for (unsigned i = 0; i < length; ++i) { | |
| 1907 UChar c = characters[i]; | |
| 1908 if (c < 0xD800 || c > 0xDFFF) { | |
|
tkent
2014/06/16 07:51:16
!U16_IS_SURROGATE(c)
jsbell
2014/06/17 21:39:50
Done - used U16_XXX macros throughout. Also, tight
| |
| 1909 // Non-surrogate | |
| 1910 continue; | |
| 1911 } | |
| 1912 if (0xDC00 <= c && c <= 0xDFFF) { | |
|
tkent
2014/06/16 07:51:16
U16_IS_TRAIL(c)
| |
| 1913 // Unmatched trail surrogate. | |
| 1914 return true; | |
| 1915 } | |
|
Nils Barth (inactive)
2014/06/16 07:08:28
Want to add something like:
// Lead surrogate.
//
| |
| 1916 if (i == length - 1) { | |
| 1917 // Unmatched lead surrogate at EOF. | |
| 1918 return true; | |
| 1919 } | |
| 1920 UChar d = characters[i + 1]; | |
| 1921 if (0xDC00 <= d && d <= 0xDFFF) { | |
|
tkent
2014/06/16 07:51:16
U16_IS_TRAIL(d)
| |
| 1922 // Matching trail surrogate. | |
| 1923 ++i; | |
| 1924 continue; | |
| 1925 } | |
| 1926 // Unmatched lead. | |
| 1927 return true; | |
| 1928 } | |
| 1929 return false; | |
| 1930 } | |
| 1931 | |
| 1932 PassRefPtr<StringImpl> StringImpl::replaceUnmatchedSurrogates() | |
| 1933 { | |
| 1934 // This roughly implements http://heycam.github.io/webidl/#dfn-obtain-unicod e | |
| 1935 // but the output is still a sequence of 16-bit code units, effectively | |
| 1936 // re-encoding to UTF-16 after performing the replacements. | |
| 1937 | |
| 1938 // The concepts of surrogate pairs are explained at: | |
| 1939 // http://www.unicode.org/versions/Unicode6.2.0/ch03.pdf#G2630 | |
| 1940 | |
| 1941 // Blink-specific optimization to avoid making an unnecessary copy. | |
| 1942 if (!hasUnmatchedSurrogates()) | |
| 1943 return this; | |
| 1944 ASSERT(!is8Bit()); | |
| 1945 | |
| 1946 // 1. Let S be the DOMString value. | |
| 1947 const UChar* s = characters16(); | |
| 1948 | |
| 1949 // 2. Let n be the length of S. | |
| 1950 const unsigned n = m_length; | |
| 1951 | |
| 1952 // 3. Initialize i to 0. | |
| 1953 unsigned i = 0; | |
| 1954 | |
| 1955 // 4. Initialize U to be an empty sequence of Unicode characters. | |
| 1956 // (Blink: we just use an array of UTF-16 code units.) | |
| 1957 UChar* u; | |
| 1958 RefPtr<StringImpl> newImpl = createUninitialized(n, u); | |
| 1959 | |
| 1960 // 5. While i < n: | |
| 1961 while (i < n) { | |
| 1962 // 1. Let c be the code unit in S at index i. | |
| 1963 UChar c = s[i]; | |
| 1964 // 2. Depending on the value of c: | |
| 1965 if (c < 0xD800 || c > 0xDFFF) { | |
| 1966 // c < 0xD800 or c > 0xDFFF | |
| 1967 // Append to U the Unicode character with code point c. | |
| 1968 u[i] = c; | |
| 1969 } else if (0xDC00 <= c && c <= 0xDFFF) { | |
| 1970 // 0xDC00 <= c <= 0xDFFF | |
| 1971 // Append to U a U+FFFD REPLACEMENT CHARACTER. | |
| 1972 u[i] = Unicode::replacementCharacter; | |
| 1973 } else { | |
| 1974 // 0xD800 <= c <= 0xDBFF | |
| 1975 ASSERT(0xD800 <= c && c <= 0xDBFF); | |
| 1976 if (i == n - 1) { | |
| 1977 // 1. If i = n−1, then append to U a U+FFFD REPLACEMENT CHARACTE R. | |
| 1978 u[i] = Unicode::replacementCharacter; | |
| 1979 } else { | |
| 1980 // 2. Otherwise, i < n−1: | |
| 1981 ASSERT(i < n - 1); | |
| 1982 // ..1. Let d be the code unit in S at index i+1. | |
| 1983 UChar d = s[i + 1]; | |
| 1984 if (0xDC00 <= d && d <= 0xDFFF) { | |
| 1985 // 2. If 0xDC00 ≤ d ≤ 0xDFFF, then: | |
| 1986 // ..1. Let a be c & 0x3FF. | |
| 1987 // ..2. Let b be d & 0x3FF. | |
| 1988 // ..3. Append to U the Unicode character with code point 2^ 16+2^10*a+b. | |
| 1989 // (Blink: Just pass through the UTF-16 code units rather th an | |
| 1990 // decoding to a Unicode scalar value then re-encoding.) | |
| 1991 u[i] = c; | |
| 1992 u[i + 1] = d; | |
| 1993 // ..4. Set i to i+1. | |
| 1994 ++i; | |
| 1995 } else { | |
| 1996 // 3. Otherwise, d < 0xDC00 or d > 0xDFFF. Append to U a U+F FFD REPLACEMENT CHARACTER. | |
| 1997 ASSERT(d < 0xD800 || d > 0xDFFF); | |
| 1998 u[i] = Unicode::replacementCharacter; | |
| 1999 } | |
| 2000 } | |
| 2001 } | |
| 2002 // 3. Set i to i+1. | |
| 2003 ++i; | |
| 2004 } | |
| 2005 | |
| 2006 // 6. Return U. | |
| 2007 return newImpl.release(); | |
| 2008 } | |
| 2009 | |
| 1896 PassRefPtr<StringImpl> StringImpl::upconvertedString() | 2010 PassRefPtr<StringImpl> StringImpl::upconvertedString() |
| 1897 { | 2011 { |
| 1898 if (is8Bit()) | 2012 if (is8Bit()) |
| 1899 return String::make16BitFrom8BitSource(characters8(), m_length).releaseI mpl(); | 2013 return String::make16BitFrom8BitSource(characters8(), m_length).releaseI mpl(); |
| 1900 return this; | 2014 return this; |
| 1901 } | 2015 } |
| 1902 | 2016 |
| 1903 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl* b) | 2017 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl* b) |
| 1904 { | 2018 { |
| 1905 unsigned aLength = a->length(); | 2019 unsigned aLength = a->length(); |
| (...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2099 | 2213 |
| 2100 size_t StringImpl::sizeInBytes() const | 2214 size_t StringImpl::sizeInBytes() const |
| 2101 { | 2215 { |
| 2102 size_t size = length(); | 2216 size_t size = length(); |
| 2103 if (!is8Bit()) | 2217 if (!is8Bit()) |
| 2104 size *= 2; | 2218 size *= 2; |
| 2105 return size + sizeof(*this); | 2219 return size + sizeof(*this); |
| 2106 } | 2220 } |
| 2107 | 2221 |
| 2108 } // namespace WTF | 2222 } // namespace WTF |
| OLD | NEW |