Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(297)

Side by Side Diff: Source/wtf/text/StringImpl.cpp

Issue 313993002: Bindings: Add ScalarValueString support (Closed) Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: Incorporate review feedback Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) 2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * (C) 1999 Antti Koivisto (koivisto@kde.org) 3 * (C) 1999 Antti Koivisto (koivisto@kde.org)
4 * (C) 2001 Dirk Mueller ( mueller@kde.org ) 4 * (C) 2001 Dirk Mueller ( mueller@kde.org )
5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All r ights reserved. 5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All r ights reserved.
6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net) 6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
7 * 7 *
8 * This library is free software; you can redistribute it and/or 8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public 9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either 10 * License as published by the Free Software Foundation; either
(...skipping 1875 matching lines...) Expand 10 before | Expand all | Expand 10 after
1886 } else { 1886 } else {
1887 // Cases 2 & 4. 1887 // Cases 2 & 4.
1888 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLen gth * sizeof(UChar)); 1888 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLen gth * sizeof(UChar));
1889 } 1889 }
1890 1890
1891 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); 1891 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1892 1892
1893 return newImpl.release(); 1893 return newImpl.release();
1894 } 1894 }
1895 1895
1896 bool StringImpl::hasUnmatchedSurrogates() const
1897 {
1898 // By definition, 8-bit strings are confined to the Latin-1 code page and
1899 // have no surrogates, matched or otherwise.
1900 if (is8Bit())
1901 return false;
1902
1903 const UChar* characters = characters16();
1904 const unsigned length = m_length;
1905
1906 for (unsigned i = 0; i < length; ++i) {
1907 UChar c = characters[i];
1908 if (c < 0xD800 || c > 0xDFFF) {
tkent 2014/06/16 07:51:16 !U16_IS_SURROGATE(c)
jsbell 2014/06/17 21:39:50 Done - used U16_XXX macros throughout. Also, tight
1909 // Non-surrogate
1910 continue;
1911 }
1912 if (0xDC00 <= c && c <= 0xDFFF) {
tkent 2014/06/16 07:51:16 U16_IS_TRAIL(c)
1913 // Unmatched trail surrogate.
1914 return true;
1915 }
Nils Barth (inactive) 2014/06/16 07:08:28 Want to add something like: // Lead surrogate. //
1916 if (i == length - 1) {
1917 // Unmatched lead surrogate at EOF.
1918 return true;
1919 }
1920 UChar d = characters[i + 1];
1921 if (0xDC00 <= d && d <= 0xDFFF) {
tkent 2014/06/16 07:51:16 U16_IS_TRAIL(d)
1922 // Matching trail surrogate.
1923 ++i;
1924 continue;
1925 }
1926 // Unmatched lead.
1927 return true;
1928 }
1929 return false;
1930 }
1931
1932 PassRefPtr<StringImpl> StringImpl::replaceUnmatchedSurrogates()
1933 {
1934 // This roughly implements http://heycam.github.io/webidl/#dfn-obtain-unicod e
1935 // but the output is still a sequence of 16-bit code units, effectively
1936 // re-encoding to UTF-16 after performing the replacements.
1937
1938 // The concepts of surrogate pairs are explained at:
1939 // http://www.unicode.org/versions/Unicode6.2.0/ch03.pdf#G2630
1940
1941 // Blink-specific optimization to avoid making an unnecessary copy.
1942 if (!hasUnmatchedSurrogates())
1943 return this;
1944 ASSERT(!is8Bit());
1945
1946 // 1. Let S be the DOMString value.
1947 const UChar* s = characters16();
1948
1949 // 2. Let n be the length of S.
1950 const unsigned n = m_length;
1951
1952 // 3. Initialize i to 0.
1953 unsigned i = 0;
1954
1955 // 4. Initialize U to be an empty sequence of Unicode characters.
1956 // (Blink: we just use an array of UTF-16 code units.)
1957 UChar* u;
1958 RefPtr<StringImpl> newImpl = createUninitialized(n, u);
1959
1960 // 5. While i < n:
1961 while (i < n) {
1962 // 1. Let c be the code unit in S at index i.
1963 UChar c = s[i];
1964 // 2. Depending on the value of c:
1965 if (c < 0xD800 || c > 0xDFFF) {
1966 // c < 0xD800 or c > 0xDFFF
1967 // Append to U the Unicode character with code point c.
1968 u[i] = c;
1969 } else if (0xDC00 <= c && c <= 0xDFFF) {
1970 // 0xDC00 <= c <= 0xDFFF
1971 // Append to U a U+FFFD REPLACEMENT CHARACTER.
1972 u[i] = Unicode::replacementCharacter;
1973 } else {
1974 // 0xD800 <= c <= 0xDBFF
1975 ASSERT(0xD800 <= c && c <= 0xDBFF);
1976 if (i == n - 1) {
1977 // 1. If i = n−1, then append to U a U+FFFD REPLACEMENT CHARACTE R.
1978 u[i] = Unicode::replacementCharacter;
1979 } else {
1980 // 2. Otherwise, i < n−1:
1981 ASSERT(i < n - 1);
1982 // ..1. Let d be the code unit in S at index i+1.
1983 UChar d = s[i + 1];
1984 if (0xDC00 <= d && d <= 0xDFFF) {
1985 // 2. If 0xDC00 ≤ d ≤ 0xDFFF, then:
1986 // ..1. Let a be c & 0x3FF.
1987 // ..2. Let b be d & 0x3FF.
1988 // ..3. Append to U the Unicode character with code point 2^ 16+2^10*a+b.
1989 // (Blink: Just pass through the UTF-16 code units rather th an
1990 // decoding to a Unicode scalar value then re-encoding.)
1991 u[i] = c;
1992 u[i + 1] = d;
1993 // ..4. Set i to i+1.
1994 ++i;
1995 } else {
1996 // 3. Otherwise, d < 0xDC00 or d > 0xDFFF. Append to U a U+F FFD REPLACEMENT CHARACTER.
1997 ASSERT(d < 0xD800 || d > 0xDFFF);
1998 u[i] = Unicode::replacementCharacter;
1999 }
2000 }
2001 }
2002 // 3. Set i to i+1.
2003 ++i;
2004 }
2005
2006 // 6. Return U.
2007 return newImpl.release();
2008 }
2009
1896 PassRefPtr<StringImpl> StringImpl::upconvertedString() 2010 PassRefPtr<StringImpl> StringImpl::upconvertedString()
1897 { 2011 {
1898 if (is8Bit()) 2012 if (is8Bit())
1899 return String::make16BitFrom8BitSource(characters8(), m_length).releaseI mpl(); 2013 return String::make16BitFrom8BitSource(characters8(), m_length).releaseI mpl();
1900 return this; 2014 return this;
1901 } 2015 }
1902 2016
1903 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl* b) 2017 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl* b)
1904 { 2018 {
1905 unsigned aLength = a->length(); 2019 unsigned aLength = a->length();
(...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after
2099 2213
2100 size_t StringImpl::sizeInBytes() const 2214 size_t StringImpl::sizeInBytes() const
2101 { 2215 {
2102 size_t size = length(); 2216 size_t size = length();
2103 if (!is8Bit()) 2217 if (!is8Bit())
2104 size *= 2; 2218 size *= 2;
2105 return size + sizeof(*this); 2219 return size + sizeof(*this);
2106 } 2220 }
2107 2221
2108 } // namespace WTF 2222 } // namespace WTF
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698