OLD | NEW |
| (Empty) |
1 /* | |
2 ******************************************************************************* | |
3 * Copyright (C) 2010, International Business Machines | |
4 * Corporation and others. All Rights Reserved. | |
5 ******************************************************************************* | |
6 * file name: idna.h | |
7 * encoding: US-ASCII | |
8 * tab size: 8 (not used) | |
9 * indentation:4 | |
10 * | |
11 * created on: 2010mar05 | |
12 * created by: Markus W. Scherer | |
13 */ | |
14 | |
15 #ifndef __IDNA_H__ | |
16 #define __IDNA_H__ | |
17 | |
18 /** | |
19 * \file | |
20 * \brief C++ API: Internationalizing Domain Names in Applications (IDNA) | |
21 */ | |
22 | |
23 #include "unicode/utypes.h" | |
24 | |
25 #if !UCONFIG_NO_IDNA | |
26 | |
27 #include "unicode/bytestream.h" | |
28 #include "unicode/stringpiece.h" | |
29 #include "unicode/uidna.h" | |
30 #include "unicode/unistr.h" | |
31 | |
32 U_NAMESPACE_BEGIN | |
33 | |
34 class U_COMMON_API IDNAInfo; | |
35 | |
36 /** | |
37 * Abstract base class for IDNA processing. | |
38 * See http://www.unicode.org/reports/tr46/ | |
39 * and http://www.ietf.org/rfc/rfc3490.txt | |
40 * | |
41 * The IDNA class is not intended for public subclassing. | |
42 * | |
43 * This C++ API currently only implements UTS #46. | |
44 * The uidna.h C API implements both UTS #46 (functions using UIDNA service obje
ct) | |
45 * and IDNA2003 (functions that do not use a service object). | |
46 * @draft ICU 4.6 | |
47 */ | |
48 class U_COMMON_API IDNA : public UObject { | |
49 public: | |
50 /** | |
51 * Returns an IDNA instance which implements UTS #46. | |
52 * Returns an unmodifiable instance, owned by the caller. | |
53 * Cache it for multiple operations, and delete it when done. | |
54 * The instance is thread-safe, that is, it can be used concurrently. | |
55 * | |
56 * UTS #46 defines Unicode IDNA Compatibility Processing, | |
57 * updated to the latest version of Unicode and compatible with both | |
58 * IDNA2003 and IDNA2008. | |
59 * | |
60 * The worker functions use transitional processing, including deviation map
pings, | |
61 * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE | |
62 * is used in which case the deviation characters are passed through without
change. | |
63 * | |
64 * Disallowed characters are mapped to U+FFFD. | |
65 * | |
66 * For available options see the uidna.h header. | |
67 * Operations with the UTS #46 instance do not support the | |
68 * UIDNA_ALLOW_UNASSIGNED option. | |
69 * | |
70 * By default, the UTS #46 implementation allows all ASCII characters (as va
lid or mapped). | |
71 * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than | |
72 * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped
to U+FFFD. | |
73 * | |
74 * @param options Bit set to modify the processing and error checking. | |
75 * See option bit set values in uidna.h. | |
76 * @param errorCode Standard ICU error code. Its input value must | |
77 * pass the U_SUCCESS() test, or else the function returns | |
78 * immediately. Check for U_FAILURE() on output or use with | |
79 * function chaining. (See User Guide for details.) | |
80 * @return the UTS #46 IDNA instance, if successful | |
81 * @draft ICU 4.6 | |
82 */ | |
83 static IDNA * | |
84 createUTS46Instance(uint32_t options, UErrorCode &errorCode); | |
85 | |
86 /** | |
87 * Converts a single domain name label into its ASCII form for DNS lookup. | |
88 * If any processing step fails, then info.hasErrors() will be TRUE and | |
89 * the result might not be an ASCII string. | |
90 * The label might be modified according to the types of errors. | |
91 * Labels with severe errors will be left in (or turned into) their Unicode
form. | |
92 * | |
93 * The UErrorCode indicates an error only in exceptional cases, | |
94 * such as a U_MEMORY_ALLOCATION_ERROR. | |
95 * | |
96 * @param label Input domain name label | |
97 * @param dest Destination string object | |
98 * @param info Output container of IDNA processing details. | |
99 * @param errorCode Standard ICU error code. Its input value must | |
100 * pass the U_SUCCESS() test, or else the function returns | |
101 * immediately. Check for U_FAILURE() on output or use with | |
102 * function chaining. (See User Guide for details.) | |
103 * @return dest | |
104 * @draft ICU 4.6 | |
105 */ | |
106 virtual UnicodeString & | |
107 labelToASCII(const UnicodeString &label, UnicodeString &dest, | |
108 IDNAInfo &info, UErrorCode &errorCode) const = 0; | |
109 | |
110 /** | |
111 * Converts a single domain name label into its Unicode form for human-reada
ble display. | |
112 * If any processing step fails, then info.hasErrors() will be TRUE. | |
113 * The label might be modified according to the types of errors. | |
114 * | |
115 * The UErrorCode indicates an error only in exceptional cases, | |
116 * such as a U_MEMORY_ALLOCATION_ERROR. | |
117 * | |
118 * @param label Input domain name label | |
119 * @param dest Destination string object | |
120 * @param info Output container of IDNA processing details. | |
121 * @param errorCode Standard ICU error code. Its input value must | |
122 * pass the U_SUCCESS() test, or else the function returns | |
123 * immediately. Check for U_FAILURE() on output or use with | |
124 * function chaining. (See User Guide for details.) | |
125 * @return dest | |
126 * @draft ICU 4.6 | |
127 */ | |
128 virtual UnicodeString & | |
129 labelToUnicode(const UnicodeString &label, UnicodeString &dest, | |
130 IDNAInfo &info, UErrorCode &errorCode) const = 0; | |
131 | |
132 /** | |
133 * Converts a whole domain name into its ASCII form for DNS lookup. | |
134 * If any processing step fails, then info.hasErrors() will be TRUE and | |
135 * the result might not be an ASCII string. | |
136 * The domain name might be modified according to the types of errors. | |
137 * Labels with severe errors will be left in (or turned into) their Unicode
form. | |
138 * | |
139 * The UErrorCode indicates an error only in exceptional cases, | |
140 * such as a U_MEMORY_ALLOCATION_ERROR. | |
141 * | |
142 * @param name Input domain name | |
143 * @param dest Destination string object | |
144 * @param info Output container of IDNA processing details. | |
145 * @param errorCode Standard ICU error code. Its input value must | |
146 * pass the U_SUCCESS() test, or else the function returns | |
147 * immediately. Check for U_FAILURE() on output or use with | |
148 * function chaining. (See User Guide for details.) | |
149 * @return dest | |
150 * @draft ICU 4.6 | |
151 */ | |
152 virtual UnicodeString & | |
153 nameToASCII(const UnicodeString &name, UnicodeString &dest, | |
154 IDNAInfo &info, UErrorCode &errorCode) const = 0; | |
155 | |
156 /** | |
157 * Converts a whole domain name into its Unicode form for human-readable dis
play. | |
158 * If any processing step fails, then info.hasErrors() will be TRUE. | |
159 * The domain name might be modified according to the types of errors. | |
160 * | |
161 * The UErrorCode indicates an error only in exceptional cases, | |
162 * such as a U_MEMORY_ALLOCATION_ERROR. | |
163 * | |
164 * @param name Input domain name | |
165 * @param dest Destination string object | |
166 * @param info Output container of IDNA processing details. | |
167 * @param errorCode Standard ICU error code. Its input value must | |
168 * pass the U_SUCCESS() test, or else the function returns | |
169 * immediately. Check for U_FAILURE() on output or use with | |
170 * function chaining. (See User Guide for details.) | |
171 * @return dest | |
172 * @draft ICU 4.6 | |
173 */ | |
174 virtual UnicodeString & | |
175 nameToUnicode(const UnicodeString &name, UnicodeString &dest, | |
176 IDNAInfo &info, UErrorCode &errorCode) const = 0; | |
177 | |
178 // UTF-8 versions of the processing methods ---------------------------- *** | |
179 | |
180 /** | |
181 * Converts a single domain name label into its ASCII form for DNS lookup. | |
182 * UTF-8 version of labelToASCII(), same behavior. | |
183 * | |
184 * @param label Input domain name label | |
185 * @param dest Destination byte sink; Flush()ed if successful | |
186 * @param info Output container of IDNA processing details. | |
187 * @param errorCode Standard ICU error code. Its input value must | |
188 * pass the U_SUCCESS() test, or else the function returns | |
189 * immediately. Check for U_FAILURE() on output or use with | |
190 * function chaining. (See User Guide for details.) | |
191 * @return dest | |
192 * @draft ICU 4.6 | |
193 */ | |
194 virtual void | |
195 labelToASCII_UTF8(const StringPiece &label, ByteSink &dest, | |
196 IDNAInfo &info, UErrorCode &errorCode) const; | |
197 | |
198 /** | |
199 * Converts a single domain name label into its Unicode form for human-reada
ble display. | |
200 * UTF-8 version of labelToUnicode(), same behavior. | |
201 * | |
202 * @param label Input domain name label | |
203 * @param dest Destination byte sink; Flush()ed if successful | |
204 * @param info Output container of IDNA processing details. | |
205 * @param errorCode Standard ICU error code. Its input value must | |
206 * pass the U_SUCCESS() test, or else the function returns | |
207 * immediately. Check for U_FAILURE() on output or use with | |
208 * function chaining. (See User Guide for details.) | |
209 * @return dest | |
210 * @draft ICU 4.6 | |
211 */ | |
212 virtual void | |
213 labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest, | |
214 IDNAInfo &info, UErrorCode &errorCode) const; | |
215 | |
216 /** | |
217 * Converts a whole domain name into its ASCII form for DNS lookup. | |
218 * UTF-8 version of nameToASCII(), same behavior. | |
219 * | |
220 * @param name Input domain name | |
221 * @param dest Destination byte sink; Flush()ed if successful | |
222 * @param info Output container of IDNA processing details. | |
223 * @param errorCode Standard ICU error code. Its input value must | |
224 * pass the U_SUCCESS() test, or else the function returns | |
225 * immediately. Check for U_FAILURE() on output or use with | |
226 * function chaining. (See User Guide for details.) | |
227 * @return dest | |
228 * @draft ICU 4.6 | |
229 */ | |
230 virtual void | |
231 nameToASCII_UTF8(const StringPiece &name, ByteSink &dest, | |
232 IDNAInfo &info, UErrorCode &errorCode) const; | |
233 | |
234 /** | |
235 * Converts a whole domain name into its Unicode form for human-readable dis
play. | |
236 * UTF-8 version of nameToUnicode(), same behavior. | |
237 * | |
238 * @param name Input domain name | |
239 * @param dest Destination byte sink; Flush()ed if successful | |
240 * @param info Output container of IDNA processing details. | |
241 * @param errorCode Standard ICU error code. Its input value must | |
242 * pass the U_SUCCESS() test, or else the function returns | |
243 * immediately. Check for U_FAILURE() on output or use with | |
244 * function chaining. (See User Guide for details.) | |
245 * @return dest | |
246 * @draft ICU 4.6 | |
247 */ | |
248 virtual void | |
249 nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest, | |
250 IDNAInfo &info, UErrorCode &errorCode) const; | |
251 | |
252 private: | |
253 // No ICU "poor man's RTTI" for this class nor its subclasses. | |
254 virtual UClassID getDynamicClassID() const; | |
255 }; | |
256 | |
257 class UTS46; | |
258 | |
259 /** | |
260 * Output container for IDNA processing errors. | |
261 * The IDNAInfo class is not suitable for subclassing. | |
262 * @draft ICU 4.6 | |
263 */ | |
264 class U_COMMON_API IDNAInfo : public UMemory { | |
265 public: | |
266 /** | |
267 * Constructor for stack allocation. | |
268 * @draft ICU 4.6 | |
269 */ | |
270 IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), i
sOkBiDi(TRUE) {} | |
271 /** | |
272 * Were there IDNA processing errors? | |
273 * @return TRUE if there were processing errors | |
274 * @draft ICU 4.6 | |
275 */ | |
276 UBool hasErrors() const { return errors!=0; } | |
277 /** | |
278 * Returns a bit set indicating IDNA processing errors. | |
279 * See UIDNA_ERROR_... constants in uidna.h. | |
280 * @return bit set of processing errors | |
281 * @draft ICU 4.6 | |
282 */ | |
283 uint32_t getErrors() const { return errors; } | |
284 /** | |
285 * Returns TRUE if transitional and nontransitional processing produce diffe
rent results. | |
286 * This is the case when the input label or domain name contains | |
287 * one or more deviation characters outside a Punycode label (see UTS #46). | |
288 * <ul> | |
289 * <li>With nontransitional processing, such characters are | |
290 * copied to the destination string. | |
291 * <li>With transitional processing, such characters are | |
292 * mapped (sharp s/sigma) or removed (joiner/nonjoiner). | |
293 * </ul> | |
294 * @return TRUE if transitional and nontransitional processing produce diffe
rent results | |
295 * @draft ICU 4.6 | |
296 */ | |
297 UBool isTransitionalDifferent() const { return isTransDiff; } | |
298 | |
299 private: | |
300 friend class UTS46; | |
301 | |
302 IDNAInfo(const IDNAInfo &other); // no copying | |
303 IDNAInfo &operator=(const IDNAInfo &other); // no copying | |
304 | |
305 void reset() { | |
306 errors=labelErrors=0; | |
307 isTransDiff=FALSE; | |
308 isBiDi=FALSE; | |
309 isOkBiDi=TRUE; | |
310 } | |
311 | |
312 uint32_t errors, labelErrors; | |
313 UBool isTransDiff; | |
314 UBool isBiDi; | |
315 UBool isOkBiDi; | |
316 }; | |
317 | |
318 U_NAMESPACE_END | |
319 | |
320 #endif // UCONFIG_NO_IDNA | |
321 #endif // __IDNA_H__ | |
OLD | NEW |