url/url_canon_icu.cc - Issue 2747973002: Roll ICU to ICU-59-to-be (97b9daaf8)

Side by Side Diff: url/url_canon_icu.cc

Issue 2747973002: Roll ICU to ICU-59-to-be (97b9daaf8) (Closed)

Patch Set: Use icu::IDNA instead of uidna in url_canon Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2013 The Chromium Authors. All rights reserved.	1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // ICU integration functions.	5 // ICU integration functions.

6	6

7 #include <stdint.h>	7 #include <stdint.h>

8 #include <stdlib.h>	8 #include <stdlib.h>

9 #include <string.h>	9 #include <string.h>

10	10

11 #include "base/lazy_instance.h"	11 #include "base/lazy_instance.h"

12 #include "base/logging.h"	12 #include "base/logging.h"

	13 #include "third_party/icu/source/common/unicode/char16ptr.h"

	14 #include "third_party/icu/source/common/unicode/idna.h"

13 #include "third_party/icu/source/common/unicode/ucnv.h"	15 #include "third_party/icu/source/common/unicode/ucnv.h"

14 #include "third_party/icu/source/common/unicode/ucnv_cb.h"	16 #include "third_party/icu/source/common/unicode/ucnv_cb.h"

15 #include "third_party/icu/source/common/unicode/uidna.h"

16 #include "url/url_canon_icu.h"	17 #include "url/url_canon_icu.h"

17 #include "url/url_canon_internal.h" // for _itoa_s	18 #include "url/url_canon_internal.h" // for _itoa_s

18	19

19 namespace url {	20 namespace url {

20	21

21 namespace {	22 namespace {

22	23

23 // Called when converting a character that can not be represented, this will	24 // Called when converting a character that can not be represented, this will

24 // append an escaped version of the numerical character reference for that code	25 // append an escaped version of the numerical character reference for that code

25 // point. It is of the form "Ӓ" and we will escape the non-digits to	26 // point. It is of the form "Ӓ" and we will escape the non-digits to

(...skipping 40 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
66 ucnv_setFromUCallBack(converter_, old_callback_, old_context_, 0, 0, &err);	67 ucnv_setFromUCallBack(converter_, old_callback_, old_context_, 0, 0, &err);

67 }	68 }

68	69

69 private:	70 private:

70 UConverter* converter_;	71 UConverter* converter_;

71	72

72 UConverterFromUCallback old_callback_;	73 UConverterFromUCallback old_callback_;

73 const void* old_context_;	74 const void* old_context_;

74 };	75 };

75	76

76 // A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to	77 // A wrapper to use LazyInstance<>::Leaky with icu::IDNA (UTS46/IDNA 2008

77 // a UTS46/IDNA 2008 handling object opened with uidna_openUTS46().	78 // handling class).

78 //	79 //

79 // We use UTS46 with BiDiCheck to migrate from IDNA 2003 (with unassigned	80 // We use UTS46 with BiDiCheck to migrate from IDNA 2003 (with unassigned

80 // code points allowed) to IDNA 2008 with	81 // code points allowed) to IDNA 2008 with

81 // the backward compatibility in mind. What it does:	82 // the backward compatibility in mind. What it does:

82 //	83 //

83 // 1. Use the up-to-date Unicode data.	84 // 1. Use the up-to-date Unicode data.

84 // 2. Define a case folding/mapping with the up-to-date Unicode data as	85 // 2. Define a case folding/mapping with the up-to-date Unicode data as

85 // in IDNA 2003.	86 // in IDNA 2003.

86 // 3. Use transitional mechanism for 4 deviation characters (sharp-s,	87 // 3. Use transitional mechanism for 4 deviation characters (sharp-s,

87 // final sigma, ZWJ and ZWNJ) for now.	88 // final sigma, ZWJ and ZWNJ) for now.

88 // 4. Continue to allow symbols and punctuations.	89 // 4. Continue to allow symbols and punctuations.

89 // 5. Apply new BiDi check rules more permissive than the IDNA 2003 BiDI rules.	90 // 5. Apply new BiDi check rules more permissive than the IDNA 2003 BiDI rules.

90 // 6. Do not apply STD3 rules	91 // 6. Do not apply STD3 rules

91 // 7. Do not allow unassigned code points.	92 // 7. Do not allow unassigned code points.

92 //	93 //

93 // It also closely matches what IE 10 does except for the BiDi check (	94 // It also closely matches what IE 10 does except for the BiDi check (

94 // http://goo.gl/3XBhqw ).	95 // http://goo.gl/3XBhqw ).

95 // See http://http://unicode.org/reports/tr46/ and references therein	96 // See http://http://unicode.org/reports/tr46/ and references therein

96 // for more details.	97 // for more details.

97 struct UIDNAWrapper {	98 struct IDNAWrapper {

98 UIDNAWrapper() {	99 IDNAWrapper() {

99 UErrorCode err = U_ZERO_ERROR;	100 UErrorCode err = U_ZERO_ERROR;

100 // TODO(jungshik): Change options as different parties (browsers,	101 // TODO(jungshik): Change options as different parties (browsers,

101 // registrars, search engines) converge toward a consensus.	102 // registrars, search engines) converge toward a consensus.

102 value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err);	103 value = icu::IDNA::createUTS46Instance(UIDNA_CHECK_BIDI, err);

103 if (U_FAILURE(err)) {	104 if (U_FAILURE(err)) {

104 CHECK(false) << "failed to open UTS46 data with error: " << err;	105 CHECK(false) << "failed to open UTS46 data with error: " << err;

105 value = NULL;	106 value = nullptr;

106 }	107 }

107 }	108 }

108	109

109 UIDNA* value;	110 icu::IDNA* value;

110 };	111 };

111	112

112 } // namespace	113 } // namespace

113	114

114 ICUCharsetConverter::ICUCharsetConverter(UConverter* converter)	115 ICUCharsetConverter::ICUCharsetConverter(UConverter* converter)

115 : converter_(converter) {	116 : converter_(converter) {

116 }	117 }

117	118

118 ICUCharsetConverter::~ICUCharsetConverter() {	119 ICUCharsetConverter::~ICUCharsetConverter() {

119 }	120 }

(...skipping 18 matching lines...) Expand all Loading...
138 output->set_length(begin_offset + required_capacity);	139 output->set_length(begin_offset + required_capacity);

139 return;	140 return;

140 }	141 }

141	142

142 // Output didn't fit, expand	143 // Output didn't fit, expand

143 dest_capacity = required_capacity;	144 dest_capacity = required_capacity;

144 output->Resize(begin_offset + dest_capacity);	145 output->Resize(begin_offset + dest_capacity);

145 } while (true);	146 } while (true);

146 }	147 }

147	148

148 static base::LazyInstance<UIDNAWrapper>::Leaky	149 static base::LazyInstance<IDNAWrapper>::Leaky g_idna =

149 g_uidna = LAZY_INSTANCE_INITIALIZER;	150 LAZY_INSTANCE_INITIALIZER;

150	151

151 // Converts the Unicode input representing a hostname to ASCII using IDN rules.	152 // Converts the Unicode input representing a hostname to ASCII using IDN rules.

152 // The output must be ASCII, but is represented as wide characters.	153 // The output must be ASCII, but is represented as wide characters.

153 //	154 //

154 // On success, the output will be filled with the ASCII host name and it will	155 // On success, the output will be filled with the ASCII host name and it will

155 // return true. Unlike most other canonicalization functions, this assumes that	156 // return true. Unlike most other canonicalization functions, this assumes that

156 // the output is empty. The beginning of the host will be at offset 0, and	157 // the output is empty. The beginning of the host will be at offset 0, and

157 // the length of the output will be set to the length of the new host name.	158 // the length of the output will be set to the length of the new host name.

158 //	159 //

159 // On error, this will return false. The output in this case is undefined.	160 // On error, this will return false. The output in this case is undefined.

160 // TODO(jungshik): use UTF-8/ASCII version of nameToASCII.	161 // TODO(jungshik): use UTF-8/ASCII version of nameToASCII.

161 // Change the function signature and callers accordingly to avoid unnecessary	162 // Change the function signature and callers accordingly to avoid unnecessary

162 // conversions in our code. In addition, consider using icu::IDNA's UTF-8/ASCII	163 // conversions in our code. In addition, consider using icu::IDNA's UTF-8/ASCII

163 // version with StringByteSink. That way, we can avoid C wrappers and additional	164 // version with StringByteSink. That way, we can avoid C wrappers and additional

164 // string conversion.	165 // string conversion.

165 bool IDNToASCII(const base::char16* src, int src_len, CanonOutputW* output) {	166 bool IDNToASCII(const base::char16* src, int src_len, CanonOutputW* output) {

166 DCHECK(output->length() == 0); // Output buffer is assumed empty.	167 DCHECK(output->length() == 0); // Output buffer is assumed empty.

167	168

168 UIDNA* uidna = g_uidna.Get().value;	169 icu::IDNA* idna = g_idna.Get().value;

169 DCHECK(uidna != NULL);	170 DCHECK(idna != NULL);

170 while (true) {	171 icu::UnicodeString ascii;

171 UErrorCode err = U_ZERO_ERROR;	172 icu::IDNAInfo info;

172 UIDNAInfo info = UIDNA_INFO_INITIALIZER;	173 UErrorCode err = U_ZERO_ERROR;

173 int output_length = uidna_nameToASCII(uidna, src, src_len, output->data(),	174 idna->nameToASCII(icu::UnicodeString(FALSE, src, src_len), ascii, info, err);

174 output->capacity(), &info, &err);	175 if (U_SUCCESS(err) && !info.hasErrors()) {

175 if (U_SUCCESS(err) && info.errors == 0) {	176 int output_length = ascii.length();

176 output->set_length(output_length);	177 if (output_length > output->capacity())

177 return true;	178 output->Resize(output_length);

178 }	179 output->Append(icu::toUCharPtr(ascii.getBuffer()), ascii.length());
	jungshik at Google 2017/03/14 06:24:47 PNaCl now complains here about inline assembly. PNaCl now complains here about inline assembly.
179	180 return true;

180 // TODO(jungshik): Look at info.errors to handle them case-by-case basis

181 // if necessary.

182 if (err != U_BUFFER_OVERFLOW_ERROR \|\| info.errors != 0)

183 return false; // Unknown error, give up.

184

185 // Not enough room in our buffer, expand.

186 output->Resize(output_length);

187 }	181 }

	182 // TODO(jungshik): Look at info.getErrors() to handle them case-by-case basis

	183 // if necessary.

	184 return false; // Unknown error, give up.

188 }	185 }

189	186

190 } // namespace url	187 } // namespace url

OLD	NEW

« no previous file with comments | « DEPS ('k') | no next file » | no next file with comments »