Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(65)

Side by Side Diff: url/url_canon_icu.cc

Issue 2747973002: Roll ICU to ICU-59-to-be (97b9daaf8) (Closed)
Patch Set: Use icu::IDNA instead of uidna in url_canon Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « DEPS ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // ICU integration functions. 5 // ICU integration functions.
6 6
7 #include <stdint.h> 7 #include <stdint.h>
8 #include <stdlib.h> 8 #include <stdlib.h>
9 #include <string.h> 9 #include <string.h>
10 10
11 #include "base/lazy_instance.h" 11 #include "base/lazy_instance.h"
12 #include "base/logging.h" 12 #include "base/logging.h"
13 #include "third_party/icu/source/common/unicode/char16ptr.h"
14 #include "third_party/icu/source/common/unicode/idna.h"
13 #include "third_party/icu/source/common/unicode/ucnv.h" 15 #include "third_party/icu/source/common/unicode/ucnv.h"
14 #include "third_party/icu/source/common/unicode/ucnv_cb.h" 16 #include "third_party/icu/source/common/unicode/ucnv_cb.h"
15 #include "third_party/icu/source/common/unicode/uidna.h"
16 #include "url/url_canon_icu.h" 17 #include "url/url_canon_icu.h"
17 #include "url/url_canon_internal.h" // for _itoa_s 18 #include "url/url_canon_internal.h" // for _itoa_s
18 19
19 namespace url { 20 namespace url {
20 21
21 namespace { 22 namespace {
22 23
23 // Called when converting a character that can not be represented, this will 24 // Called when converting a character that can not be represented, this will
24 // append an escaped version of the numerical character reference for that code 25 // append an escaped version of the numerical character reference for that code
25 // point. It is of the form "&#1234;" and we will escape the non-digits to 26 // point. It is of the form "&#1234;" and we will escape the non-digits to
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
66 ucnv_setFromUCallBack(converter_, old_callback_, old_context_, 0, 0, &err); 67 ucnv_setFromUCallBack(converter_, old_callback_, old_context_, 0, 0, &err);
67 } 68 }
68 69
69 private: 70 private:
70 UConverter* converter_; 71 UConverter* converter_;
71 72
72 UConverterFromUCallback old_callback_; 73 UConverterFromUCallback old_callback_;
73 const void* old_context_; 74 const void* old_context_;
74 }; 75 };
75 76
76 // A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to 77 // A wrapper to use LazyInstance<>::Leaky with icu::IDNA (UTS46/IDNA 2008
77 // a UTS46/IDNA 2008 handling object opened with uidna_openUTS46(). 78 // handling class).
78 // 79 //
79 // We use UTS46 with BiDiCheck to migrate from IDNA 2003 (with unassigned 80 // We use UTS46 with BiDiCheck to migrate from IDNA 2003 (with unassigned
80 // code points allowed) to IDNA 2008 with 81 // code points allowed) to IDNA 2008 with
81 // the backward compatibility in mind. What it does: 82 // the backward compatibility in mind. What it does:
82 // 83 //
83 // 1. Use the up-to-date Unicode data. 84 // 1. Use the up-to-date Unicode data.
84 // 2. Define a case folding/mapping with the up-to-date Unicode data as 85 // 2. Define a case folding/mapping with the up-to-date Unicode data as
85 // in IDNA 2003. 86 // in IDNA 2003.
86 // 3. Use transitional mechanism for 4 deviation characters (sharp-s, 87 // 3. Use transitional mechanism for 4 deviation characters (sharp-s,
87 // final sigma, ZWJ and ZWNJ) for now. 88 // final sigma, ZWJ and ZWNJ) for now.
88 // 4. Continue to allow symbols and punctuations. 89 // 4. Continue to allow symbols and punctuations.
89 // 5. Apply new BiDi check rules more permissive than the IDNA 2003 BiDI rules. 90 // 5. Apply new BiDi check rules more permissive than the IDNA 2003 BiDI rules.
90 // 6. Do not apply STD3 rules 91 // 6. Do not apply STD3 rules
91 // 7. Do not allow unassigned code points. 92 // 7. Do not allow unassigned code points.
92 // 93 //
93 // It also closely matches what IE 10 does except for the BiDi check ( 94 // It also closely matches what IE 10 does except for the BiDi check (
94 // http://goo.gl/3XBhqw ). 95 // http://goo.gl/3XBhqw ).
95 // See http://http://unicode.org/reports/tr46/ and references therein 96 // See http://http://unicode.org/reports/tr46/ and references therein
96 // for more details. 97 // for more details.
97 struct UIDNAWrapper { 98 struct IDNAWrapper {
98 UIDNAWrapper() { 99 IDNAWrapper() {
99 UErrorCode err = U_ZERO_ERROR; 100 UErrorCode err = U_ZERO_ERROR;
100 // TODO(jungshik): Change options as different parties (browsers, 101 // TODO(jungshik): Change options as different parties (browsers,
101 // registrars, search engines) converge toward a consensus. 102 // registrars, search engines) converge toward a consensus.
102 value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err); 103 value = icu::IDNA::createUTS46Instance(UIDNA_CHECK_BIDI, err);
103 if (U_FAILURE(err)) { 104 if (U_FAILURE(err)) {
104 CHECK(false) << "failed to open UTS46 data with error: " << err; 105 CHECK(false) << "failed to open UTS46 data with error: " << err;
105 value = NULL; 106 value = nullptr;
106 } 107 }
107 } 108 }
108 109
109 UIDNA* value; 110 icu::IDNA* value;
110 }; 111 };
111 112
112 } // namespace 113 } // namespace
113 114
114 ICUCharsetConverter::ICUCharsetConverter(UConverter* converter) 115 ICUCharsetConverter::ICUCharsetConverter(UConverter* converter)
115 : converter_(converter) { 116 : converter_(converter) {
116 } 117 }
117 118
118 ICUCharsetConverter::~ICUCharsetConverter() { 119 ICUCharsetConverter::~ICUCharsetConverter() {
119 } 120 }
(...skipping 18 matching lines...) Expand all
138 output->set_length(begin_offset + required_capacity); 139 output->set_length(begin_offset + required_capacity);
139 return; 140 return;
140 } 141 }
141 142
142 // Output didn't fit, expand 143 // Output didn't fit, expand
143 dest_capacity = required_capacity; 144 dest_capacity = required_capacity;
144 output->Resize(begin_offset + dest_capacity); 145 output->Resize(begin_offset + dest_capacity);
145 } while (true); 146 } while (true);
146 } 147 }
147 148
148 static base::LazyInstance<UIDNAWrapper>::Leaky 149 static base::LazyInstance<IDNAWrapper>::Leaky g_idna =
149 g_uidna = LAZY_INSTANCE_INITIALIZER; 150 LAZY_INSTANCE_INITIALIZER;
150 151
151 // Converts the Unicode input representing a hostname to ASCII using IDN rules. 152 // Converts the Unicode input representing a hostname to ASCII using IDN rules.
152 // The output must be ASCII, but is represented as wide characters. 153 // The output must be ASCII, but is represented as wide characters.
153 // 154 //
154 // On success, the output will be filled with the ASCII host name and it will 155 // On success, the output will be filled with the ASCII host name and it will
155 // return true. Unlike most other canonicalization functions, this assumes that 156 // return true. Unlike most other canonicalization functions, this assumes that
156 // the output is empty. The beginning of the host will be at offset 0, and 157 // the output is empty. The beginning of the host will be at offset 0, and
157 // the length of the output will be set to the length of the new host name. 158 // the length of the output will be set to the length of the new host name.
158 // 159 //
159 // On error, this will return false. The output in this case is undefined. 160 // On error, this will return false. The output in this case is undefined.
160 // TODO(jungshik): use UTF-8/ASCII version of nameToASCII. 161 // TODO(jungshik): use UTF-8/ASCII version of nameToASCII.
161 // Change the function signature and callers accordingly to avoid unnecessary 162 // Change the function signature and callers accordingly to avoid unnecessary
162 // conversions in our code. In addition, consider using icu::IDNA's UTF-8/ASCII 163 // conversions in our code. In addition, consider using icu::IDNA's UTF-8/ASCII
163 // version with StringByteSink. That way, we can avoid C wrappers and additional 164 // version with StringByteSink. That way, we can avoid C wrappers and additional
164 // string conversion. 165 // string conversion.
165 bool IDNToASCII(const base::char16* src, int src_len, CanonOutputW* output) { 166 bool IDNToASCII(const base::char16* src, int src_len, CanonOutputW* output) {
166 DCHECK(output->length() == 0); // Output buffer is assumed empty. 167 DCHECK(output->length() == 0); // Output buffer is assumed empty.
167 168
168 UIDNA* uidna = g_uidna.Get().value; 169 icu::IDNA* idna = g_idna.Get().value;
169 DCHECK(uidna != NULL); 170 DCHECK(idna != NULL);
170 while (true) { 171 icu::UnicodeString ascii;
171 UErrorCode err = U_ZERO_ERROR; 172 icu::IDNAInfo info;
172 UIDNAInfo info = UIDNA_INFO_INITIALIZER; 173 UErrorCode err = U_ZERO_ERROR;
173 int output_length = uidna_nameToASCII(uidna, src, src_len, output->data(), 174 idna->nameToASCII(icu::UnicodeString(FALSE, src, src_len), ascii, info, err);
174 output->capacity(), &info, &err); 175 if (U_SUCCESS(err) && !info.hasErrors()) {
175 if (U_SUCCESS(err) && info.errors == 0) { 176 int output_length = ascii.length();
176 output->set_length(output_length); 177 if (output_length > output->capacity())
177 return true; 178 output->Resize(output_length);
178 } 179 output->Append(icu::toUCharPtr(ascii.getBuffer()), ascii.length());
jungshik at Google 2017/03/14 06:24:47 PNaCl now complains here about inline assembly.
179 180 return true;
180 // TODO(jungshik): Look at info.errors to handle them case-by-case basis
181 // if necessary.
182 if (err != U_BUFFER_OVERFLOW_ERROR || info.errors != 0)
183 return false; // Unknown error, give up.
184
185 // Not enough room in our buffer, expand.
186 output->Resize(output_length);
187 } 181 }
182 // TODO(jungshik): Look at info.getErrors() to handle them case-by-case basis
183 // if necessary.
184 return false; // Unknown error, give up.
188 } 185 }
189 186
190 } // namespace url 187 } // namespace url
OLDNEW
« no previous file with comments | « DEPS ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698