Source/wtf/text/StringImpl.cpp - Issue 313993002: Bindings: Add ScalarValueString support

Side by Side Diff: Source/wtf/text/StringImpl.cpp

Issue 313993002: Bindings: Add ScalarValueString support (Closed) Base URL: svn://svn.chromium.org/blink/trunk

Patch Set: Incorporate review feedback Created 6 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)	2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)

3 * (C) 1999 Antti Koivisto (koivisto@kde.org)	3 * (C) 1999 Antti Koivisto (koivisto@kde.org)

4 * (C) 2001 Dirk Mueller ( mueller@kde.org )	4 * (C) 2001 Dirk Mueller ( mueller@kde.org )

5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All r ights reserved.	5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All r ights reserved.

6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)	6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)

7 *	7 *

8 * This library is free software; you can redistribute it and/or	8 * This library is free software; you can redistribute it and/or

9 * modify it under the terms of the GNU Library General Public	9 * modify it under the terms of the GNU Library General Public

10 * License as published by the Free Software Foundation; either	10 * License as published by the Free Software Foundation; either

(...skipping 1875 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1886 } else {	1886 } else {

1887 // Cases 2 & 4.	1887 // Cases 2 & 4.

1888 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLen gth * sizeof(UChar));	1888 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLen gth * sizeof(UChar));

1889 }	1889 }

1890	1890

1891 ASSERT(dstOffset + srcSegmentLength == newImpl->length());	1891 ASSERT(dstOffset + srcSegmentLength == newImpl->length());

1892	1892

1893 return newImpl.release();	1893 return newImpl.release();

1894 }	1894 }

1895	1895

	1896 bool StringImpl::hasUnmatchedSurrogates() const

	1897 {

	1898 // By definition, 8-bit strings are confined to the Latin-1 code page and

	1899 // have no surrogates, matched or otherwise.

	1900 if (is8Bit())

	1901 return false;

	1902

	1903 const UChar* characters = characters16();

	1904 const unsigned length = m_length;

	1905

	1906 for (unsigned i = 0; i < length; ++i) {

	1907 UChar c = characters[i];

	1908 if (c < 0xD800 \|\| c > 0xDFFF) {
	tkent 2014/06/16 07:51:16 !U16_IS_SURROGATE(c) !U16_IS_SURROGATE(c) jsbell 2014/06/17 21:39:50 Done - used U16_XXX macros throughout. Also, tight Show quoted text On 2014/06/16 07:51:16, tkent wrote: > !U16_IS_SURROGATE(c) Done - used U16_XXX macros throughout. Also, tightened the hasUnmatchedSurrogates() function up since the macros are self-documenting.
	1909 // Non-surrogate

	1910 continue;

	1911 }

	1912 if (0xDC00 <= c && c <= 0xDFFF) {
	tkent 2014/06/16 07:51:16 U16_IS_TRAIL(c) U16_IS_TRAIL(c)
	1913 // Unmatched trail surrogate.

	1914 return true;

	1915 }
	Nils Barth (inactive) 2014/06/16 07:08:28 Want to add something like: // Lead surrogate. // Want to add something like: // Lead surrogate. // 0xD800 <= c <= 0xDBFF ASSERT(0xD800 <= c && c <= 0xDBFF):
	1916 if (i == length - 1) {

	1917 // Unmatched lead surrogate at EOF.

	1918 return true;

	1919 }

	1920 UChar d = characters[i + 1];

	1921 if (0xDC00 <= d && d <= 0xDFFF) {
	tkent 2014/06/16 07:51:16 U16_IS_TRAIL(d) U16_IS_TRAIL(d)
	1922 // Matching trail surrogate.

	1923 ++i;

	1924 continue;

	1925 }

	1926 // Unmatched lead.

	1927 return true;

	1928 }

	1929 return false;

	1930 }

	1931

	1932 PassRefPtr<StringImpl> StringImpl::replaceUnmatchedSurrogates()

	1933 {

	1934 // This roughly implements http://heycam.github.io/webidl/#dfn-obtain-unicod e

	1935 // but the output is still a sequence of 16-bit code units, effectively

	1936 // re-encoding to UTF-16 after performing the replacements.

	1937

	1938 // The concepts of surrogate pairs are explained at:

	1939 // http://www.unicode.org/versions/Unicode6.2.0/ch03.pdf#G2630

	1940

	1941 // Blink-specific optimization to avoid making an unnecessary copy.

	1942 if (!hasUnmatchedSurrogates())

	1943 return this;

	1944 ASSERT(!is8Bit());

	1945

	1946 // 1. Let S be the DOMString value.

	1947 const UChar* s = characters16();

	1948

	1949 // 2. Let n be the length of S.

	1950 const unsigned n = m_length;

	1951

	1952 // 3. Initialize i to 0.

	1953 unsigned i = 0;

	1954

	1955 // 4. Initialize U to be an empty sequence of Unicode characters.

	1956 // (Blink: we just use an array of UTF-16 code units.)

	1957 UChar* u;

	1958 RefPtr<StringImpl> newImpl = createUninitialized(n, u);

	1959

	1960 // 5. While i < n:

	1961 while (i < n) {

	1962 // 1. Let c be the code unit in S at index i.

	1963 UChar c = s[i];

	1964 // 2. Depending on the value of c:

	1965 if (c < 0xD800 \|\| c > 0xDFFF) {

	1966 // c < 0xD800 or c > 0xDFFF

	1967 // Append to U the Unicode character with code point c.

	1968 u[i] = c;

	1969 } else if (0xDC00 <= c && c <= 0xDFFF) {

	1970 // 0xDC00 <= c <= 0xDFFF

	1971 // Append to U a U+FFFD REPLACEMENT CHARACTER.

	1972 u[i] = Unicode::replacementCharacter;

	1973 } else {

	1974 // 0xD800 <= c <= 0xDBFF

	1975 ASSERT(0xD800 <= c && c <= 0xDBFF);

	1976 if (i == n - 1) {

	1977 // 1. If i = n−1, then append to U a U+FFFD REPLACEMENT CHARACTE R.

	1978 u[i] = Unicode::replacementCharacter;

	1979 } else {

	1980 // 2. Otherwise, i < n−1:

	1981 ASSERT(i < n - 1);

	1982 // ..1. Let d be the code unit in S at index i+1.

	1983 UChar d = s[i + 1];

	1984 if (0xDC00 <= d && d <= 0xDFFF) {

	1985 // 2. If 0xDC00 ≤ d ≤ 0xDFFF, then:

	1986 // ..1. Let a be c & 0x3FF.

	1987 // ..2. Let b be d & 0x3FF.

	1988 // ..3. Append to U the Unicode character with code point 2^ 16+2^10*a+b.

	1989 // (Blink: Just pass through the UTF-16 code units rather th an

	1990 // decoding to a Unicode scalar value then re-encoding.)

	1991 u[i] = c;

	1992 u[i + 1] = d;

	1993 // ..4. Set i to i+1.

	1994 ++i;

	1995 } else {

	1996 // 3. Otherwise, d < 0xDC00 or d > 0xDFFF. Append to U a U+F FFD REPLACEMENT CHARACTER.

	1997 ASSERT(d < 0xD800 \|\| d > 0xDFFF);

	1998 u[i] = Unicode::replacementCharacter;

	1999 }

	2000 }

	2001 }

	2002 // 3. Set i to i+1.

	2003 ++i;

	2004 }

	2005

	2006 // 6. Return U.

	2007 return newImpl.release();

	2008 }

	2009

1896 PassRefPtr<StringImpl> StringImpl::upconvertedString()	2010 PassRefPtr<StringImpl> StringImpl::upconvertedString()

1897 {	2011 {

1898 if (is8Bit())	2012 if (is8Bit())

1899 return String::make16BitFrom8BitSource(characters8(), m_length).releaseI mpl();	2013 return String::make16BitFrom8BitSource(characters8(), m_length).releaseI mpl();

1900 return this;	2014 return this;

1901 }	2015 }

1902	2016

1903 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl* b)	2017 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl* b)

1904 {	2018 {

1905 unsigned aLength = a->length();	2019 unsigned aLength = a->length();

(...skipping 193 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2099	2213

2100 size_t StringImpl::sizeInBytes() const	2214 size_t StringImpl::sizeInBytes() const

2101 {	2215 {

2102 size_t size = length();	2216 size_t size = length();

2103 if (!is8Bit())	2217 if (!is8Bit())

2104 size *= 2;	2218 size *= 2;

2105 return size + sizeof(*this);	2219 return size + sizeof(*this);

2106 }	2220 }

2107	2221

2108 } // namespace WTF	2222 } // namespace WTF

OLD	NEW

« Source/wtf/text/StringImpl.h ('K') | « Source/wtf/text/StringImpl.h ('k') | Source/wtf/text/WTFString.h » ('j') | Source/wtf/text/WTFString.h » ('J')