Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(209)

Side by Side Diff: third_party/WebKit/Source/wtf/text/TextEncoding.cpp

Issue 1424303002: Remove unused support for NFC normalization during text encoding (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@nfc-remove
Patch Set: Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/WebKit/Source/wtf/text/TextEncoding.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. 2 * Copyright (C) 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
4 * Copyright (C) 2007-2009 Torch Mobile, Inc. 4 * Copyright (C) 2007-2009 Torch Mobile, Inc.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions 7 * modification, are permitted provided that the following conditions
8 * are met: 8 * are met:
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer. 10 * notice, this list of conditions and the following disclaimer.
(...skipping 16 matching lines...) Expand all
27 27
28 #include "config.h" 28 #include "config.h"
29 #include "wtf/text/TextEncoding.h" 29 #include "wtf/text/TextEncoding.h"
30 30
31 #include "wtf/OwnPtr.h" 31 #include "wtf/OwnPtr.h"
32 #include "wtf/StdLibExtras.h" 32 #include "wtf/StdLibExtras.h"
33 #include "wtf/Threading.h" 33 #include "wtf/Threading.h"
34 #include "wtf/text/CString.h" 34 #include "wtf/text/CString.h"
35 #include "wtf/text/TextEncodingRegistry.h" 35 #include "wtf/text/TextEncodingRegistry.h"
36 #include "wtf/text/WTFString.h" 36 #include "wtf/text/WTFString.h"
37 #include <unicode/unorm.h>
38 37
39 namespace WTF { 38 namespace WTF {
40 39
41 static const TextEncoding& UTF7Encoding() 40 static const TextEncoding& UTF7Encoding()
42 { 41 {
43 AtomicallyInitializedStaticReference(const TextEncoding, globalUTF7Encoding, new TextEncoding("UTF-7")); 42 AtomicallyInitializedStaticReference(const TextEncoding, globalUTF7Encoding, new TextEncoding("UTF-7"));
44 return globalUTF7Encoding; 43 return globalUTF7Encoding;
45 } 44 }
46 45
47 TextEncoding::TextEncoding(const char* name) 46 TextEncoding::TextEncoding(const char* name)
(...skipping 30 matching lines...) Expand all
78 77
79 OwnPtr<TextCodec> textCodec = newTextCodec(*this); 78 OwnPtr<TextCodec> textCodec = newTextCodec(*this);
80 CString encodedString; 79 CString encodedString;
81 if (string.is8Bit()) 80 if (string.is8Bit())
82 encodedString = textCodec->encode(string.characters8(), string.length(), handling); 81 encodedString = textCodec->encode(string.characters8(), string.length(), handling);
83 else 82 else
84 encodedString = textCodec->encode(string.characters16(), string.length() , handling); 83 encodedString = textCodec->encode(string.characters16(), string.length() , handling);
85 return encodedString; 84 return encodedString;
86 } 85 }
87 86
88 CString TextEncoding::normalizeAndEncode(const String& string, UnencodableHandli ng handling) const
89 {
90 if (!m_name)
91 return CString();
92
93 if (string.isEmpty())
94 return "";
95
96 // Text exclusively containing Latin-1 characters (U+0000..U+00FF) is left
97 // unaffected by NFC. This is effectively the same as saying that all
98 // Latin-1 text is already normalized to NFC.
99 // Source: http://unicode.org/reports/tr15/
100 if (string.is8Bit())
101 return newTextCodec(*this)->encode(string.characters8(), string.length() , handling);
102
103 const UChar* source = string.characters16();
104 size_t length = string.length();
105
106 Vector<UChar> normalizedCharacters;
107
108 UErrorCode err = U_ZERO_ERROR;
109 if (unorm_quickCheck(source, length, UNORM_NFC, &err) != UNORM_YES) {
110 // First try using the length of the original string, since normalizatio n to NFC rarely increases length.
111 normalizedCharacters.grow(length);
112 int32_t normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), length, &err);
113 if (err == U_BUFFER_OVERFLOW_ERROR) {
114 err = U_ZERO_ERROR;
115 normalizedCharacters.resize(normalizedLength);
116 normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, nor malizedCharacters.data(), normalizedLength, &err);
117 }
118 ASSERT(U_SUCCESS(err));
119
120 source = normalizedCharacters.data();
121 length = normalizedLength;
122 }
123
124 return newTextCodec(*this)->encode(source, length, handling);
125 }
126
127 bool TextEncoding::usesVisualOrdering() const 87 bool TextEncoding::usesVisualOrdering() const
128 { 88 {
129 if (noExtendedTextEncodingNameUsed()) 89 if (noExtendedTextEncodingNameUsed())
130 return false; 90 return false;
131 91
132 static const char* const a = atomicCanonicalTextEncodingName("ISO-8859-8"); 92 static const char* const a = atomicCanonicalTextEncodingName("ISO-8859-8");
133 return m_name == a; 93 return m_name == a;
134 } 94 }
135 95
136 bool TextEncoding::isNonByteBasedEncoding() const 96 bool TextEncoding::isNonByteBasedEncoding() const
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
216 return globalUTF8Encoding; 176 return globalUTF8Encoding;
217 } 177 }
218 178
219 const TextEncoding& WindowsLatin1Encoding() 179 const TextEncoding& WindowsLatin1Encoding()
220 { 180 {
221 AtomicallyInitializedStaticReference(const TextEncoding, globalWindowsLatin1 Encoding, new TextEncoding("WinLatin1")); 181 AtomicallyInitializedStaticReference(const TextEncoding, globalWindowsLatin1 Encoding, new TextEncoding("WinLatin1"));
222 return globalWindowsLatin1Encoding; 182 return globalWindowsLatin1Encoding;
223 } 183 }
224 184
225 } // namespace WTF 185 } // namespace WTF
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/wtf/text/TextEncoding.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698