OLD | NEW |
| (Empty) |
1 /* | |
2 ******************************************************************************* | |
3 * | |
4 * Copyright (C) 2003-2011, International Business Machines | |
5 * Corporation and others. All Rights Reserved. | |
6 * | |
7 ******************************************************************************* | |
8 * file name: idnaref.cpp | |
9 * encoding: US-ASCII | |
10 * tab size: 8 (not used) | |
11 * indentation:4 | |
12 * | |
13 * created on: 2003feb1 | |
14 * created by: Ram Viswanadha | |
15 */ | |
16 | |
17 #include "unicode/utypes.h" | |
18 | |
19 #if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION | |
20 #include "idnaref.h" | |
21 #include "punyref.h" | |
22 #include "ustr_imp.h" | |
23 #include "cmemory.h" | |
24 #include "sprpimpl.h" | |
25 #include "nptrans.h" | |
26 #include "testidna.h" | |
27 #include "punycode.h" | |
28 #include "unicode/ustring.h" | |
29 | |
30 /* it is official IDNA ACE Prefix is "xn--" */ | |
31 static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ; | |
32 #define ACE_PREFIX_LENGTH 4 | |
33 | |
34 #define MAX_LABEL_LENGTH 63 | |
35 #define HYPHEN 0x002D | |
36 /* The Max length of the labels should not be more than 64 */ | |
37 #define MAX_LABEL_BUFFER_SIZE 100 | |
38 #define MAX_IDN_BUFFER_SIZE 300 | |
39 | |
40 #define CAPITAL_A 0x0041 | |
41 #define CAPITAL_Z 0x005A | |
42 #define LOWER_CASE_DELTA 0x0020 | |
43 #define FULL_STOP 0x002E | |
44 | |
45 | |
46 inline static UBool | |
47 startsWithPrefix(const UChar* src , int32_t srcLength){ | |
48 UBool startsWithPrefix = TRUE; | |
49 | |
50 if(srcLength < ACE_PREFIX_LENGTH){ | |
51 return FALSE; | |
52 } | |
53 | |
54 for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){ | |
55 if(u_tolower(src[i]) != ACE_PREFIX[i]){ | |
56 startsWithPrefix = FALSE; | |
57 } | |
58 } | |
59 return startsWithPrefix; | |
60 } | |
61 | |
62 inline static UChar | |
63 toASCIILower(UChar ch){ | |
64 if(CAPITAL_A <= ch && ch <= CAPITAL_Z){ | |
65 return ch + LOWER_CASE_DELTA; | |
66 } | |
67 return ch; | |
68 } | |
69 | |
70 inline static int32_t | |
71 compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len, | |
72 const UChar* s2, int32_t s2Len){ | |
73 if(s1Len != s2Len){ | |
74 return (s1Len > s2Len) ? s1Len : s2Len; | |
75 } | |
76 UChar c1,c2; | |
77 int32_t rc; | |
78 | |
79 for(int32_t i =0;/* no condition */;i++) { | |
80 /* If we reach the ends of both strings then they match */ | |
81 if(i == s1Len) { | |
82 return 0; | |
83 } | |
84 | |
85 c1 = s1[i]; | |
86 c2 = s2[i]; | |
87 | |
88 /* Case-insensitive comparison */ | |
89 if(c1!=c2) { | |
90 rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2); | |
91 if(rc!=0) { | |
92 return rc; | |
93 } | |
94 } | |
95 } | |
96 | |
97 } | |
98 | |
99 static UErrorCode getError(enum punycode_status status){ | |
100 switch(status){ | |
101 case punycode_success: | |
102 return U_ZERO_ERROR; | |
103 case punycode_bad_input: /* Input is invalid. */ | |
104 return U_INVALID_CHAR_FOUND; | |
105 case punycode_big_output: /* Output would exceed the space provided. */ | |
106 return U_BUFFER_OVERFLOW_ERROR; | |
107 case punycode_overflow : /* Input requires wider integers to process. */ | |
108 return U_INDEX_OUTOFBOUNDS_ERROR; | |
109 default: | |
110 return U_INTERNAL_PROGRAM_ERROR; | |
111 } | |
112 } | |
113 | |
114 static inline int32_t convertASCIIToUChars(const char* src,UChar* dest, int32_t
length){ | |
115 int i; | |
116 for(i=0;i<length;i++){ | |
117 dest[i] = src[i]; | |
118 } | |
119 return i; | |
120 } | |
121 static inline int32_t convertUCharsToASCII(const UChar* src,char* dest, int32_t
length){ | |
122 int i; | |
123 for(i=0;i<length;i++){ | |
124 dest[i] = (char)src[i]; | |
125 } | |
126 return i; | |
127 } | |
128 // wrapper around the reference Punycode implementation | |
129 static int32_t convertToPuny(const UChar* src, int32_t srcLength, | |
130 UChar* dest, int32_t destCapacity, | |
131 UErrorCode& status){ | |
132 uint32_t b1Stack[MAX_LABEL_BUFFER_SIZE]; | |
133 int32_t b1Len = 0, b1Capacity = MAX_LABEL_BUFFER_SIZE; | |
134 uint32_t* b1 = b1Stack; | |
135 char b2Stack[MAX_LABEL_BUFFER_SIZE]; | |
136 char* b2 = b2Stack; | |
137 int32_t b2Len =MAX_LABEL_BUFFER_SIZE ; | |
138 punycode_status error; | |
139 unsigned char* caseFlags = NULL; | |
140 | |
141 u_strToUTF32((UChar32*)b1,b1Capacity,&b1Len,src,srcLength,&status); | |
142 if(status == U_BUFFER_OVERFLOW_ERROR){ | |
143 // redo processing of string | |
144 /* we do not have enough room so grow the buffer*/ | |
145 b1 = (uint32_t*) uprv_malloc(b1Len * sizeof(uint32_t)); | |
146 if(b1==NULL){ | |
147 status = U_MEMORY_ALLOCATION_ERROR; | |
148 goto CLEANUP; | |
149 } | |
150 | |
151 status = U_ZERO_ERROR; // reset error | |
152 | |
153 u_strToUTF32((UChar32*)b1,b1Len,&b1Len,src,srcLength,&status); | |
154 } | |
155 if(U_FAILURE(status)){ | |
156 goto CLEANUP; | |
157 } | |
158 | |
159 //caseFlags = (unsigned char*) uprv_malloc(b1Len *sizeof(unsigned char)); | |
160 | |
161 error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2); | |
162 status = getError(error); | |
163 | |
164 if(status == U_BUFFER_OVERFLOW_ERROR){ | |
165 /* we do not have enough room so grow the buffer*/ | |
166 b2 = (char*) uprv_malloc( b2Len * sizeof(char)); | |
167 if(b2==NULL){ | |
168 status = U_MEMORY_ALLOCATION_ERROR; | |
169 goto CLEANUP; | |
170 } | |
171 | |
172 status = U_ZERO_ERROR; // reset error | |
173 | |
174 punycode_status error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&
b2Len, b2); | |
175 status = getError(error); | |
176 } | |
177 if(U_FAILURE(status)){ | |
178 goto CLEANUP; | |
179 } | |
180 | |
181 if(b2Len < destCapacity){ | |
182 convertASCIIToUChars(b2,dest,b2Len); | |
183 }else{ | |
184 status =U_BUFFER_OVERFLOW_ERROR; | |
185 } | |
186 | |
187 CLEANUP: | |
188 if(b1Stack != b1){ | |
189 uprv_free(b1); | |
190 } | |
191 if(b2Stack != b2){ | |
192 uprv_free(b2); | |
193 } | |
194 uprv_free(caseFlags); | |
195 | |
196 return b2Len; | |
197 } | |
198 | |
199 static int32_t convertFromPuny( const UChar* src, int32_t srcLength, | |
200 UChar* dest, int32_t destCapacity, | |
201 UErrorCode& status){ | |
202 char b1Stack[MAX_LABEL_BUFFER_SIZE]; | |
203 char* b1 = b1Stack; | |
204 int32_t destLen =0; | |
205 | |
206 convertUCharsToASCII(src, b1,srcLength); | |
207 | |
208 uint32_t b2Stack[MAX_LABEL_BUFFER_SIZE]; | |
209 uint32_t* b2 = b2Stack; | |
210 int32_t b2Len =MAX_LABEL_BUFFER_SIZE; | |
211 unsigned char* caseFlags = NULL; //(unsigned char*) uprv_malloc(srcLength *
sizeof(unsigned char*)); | |
212 punycode_status error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,ca
seFlags); | |
213 status = getError(error); | |
214 if(status == U_BUFFER_OVERFLOW_ERROR){ | |
215 b2 = (uint32_t*) uprv_malloc(b2Len * sizeof(uint32_t)); | |
216 if(b2 == NULL){ | |
217 status = U_MEMORY_ALLOCATION_ERROR; | |
218 goto CLEANUP; | |
219 } | |
220 error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags); | |
221 status = getError(error); | |
222 } | |
223 | |
224 if(U_FAILURE(status)){ | |
225 goto CLEANUP; | |
226 } | |
227 | |
228 u_strFromUTF32(dest,destCapacity,&destLen,(UChar32*)b2,b2Len,&status); | |
229 | |
230 CLEANUP: | |
231 if(b1Stack != b1){ | |
232 uprv_free(b1); | |
233 } | |
234 if(b2Stack != b2){ | |
235 uprv_free(b2); | |
236 } | |
237 uprv_free(caseFlags); | |
238 | |
239 return destLen; | |
240 } | |
241 | |
242 | |
243 U_CFUNC int32_t U_EXPORT2 | |
244 idnaref_toASCII(const UChar* src, int32_t srcLength, | |
245 UChar* dest, int32_t destCapacity, | |
246 int32_t options, | |
247 UParseError* parseError, | |
248 UErrorCode* status){ | |
249 | |
250 if(status == NULL || U_FAILURE(*status)){ | |
251 return 0; | |
252 } | |
253 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCa
pacity > 0)){ | |
254 *status = U_ILLEGAL_ARGUMENT_ERROR; | |
255 return 0; | |
256 } | |
257 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE]; | |
258 //initialize pointers to stack buffers | |
259 UChar *b1 = b1Stack, *b2 = b2Stack; | |
260 int32_t b1Len=0, b2Len=0, | |
261 b1Capacity = MAX_LABEL_BUFFER_SIZE, | |
262 b2Capacity = MAX_LABEL_BUFFER_SIZE , | |
263 reqLength=0; | |
264 | |
265 //get the options | |
266 UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0)
; | |
267 UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); | |
268 | |
269 UBool* caseFlags = NULL; | |
270 | |
271 // assume the source contains all ascii codepoints | |
272 UBool srcIsASCII = TRUE; | |
273 // assume the source contains all LDH codepoints | |
274 UBool srcIsLDH = TRUE; | |
275 int32_t j=0; | |
276 | |
277 if(srcLength == -1){ | |
278 srcLength = u_strlen(src); | |
279 } | |
280 | |
281 // step 1 | |
282 for( j=0;j<srcLength;j++){ | |
283 if(src[j] > 0x7F){ | |
284 srcIsASCII = FALSE; | |
285 } | |
286 b1[b1Len++] = src[j]; | |
287 } | |
288 | |
289 NamePrepTransform* prep = TestIDNA::getInstance(*status); | |
290 if(U_FAILURE(*status)){ | |
291 goto CLEANUP; | |
292 } | |
293 | |
294 // step 2 is performed only if the source contains non ASCII | |
295 if (!srcIsASCII) { | |
296 b1Len = prep->process(src,srcLength,b1, b1Capacity,allowUnassigned,parse
Error,*status); | |
297 | |
298 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
299 // redo processing of string | |
300 /* we do not have enough room so grow the buffer*/ | |
301 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
302 if(b1==NULL){ | |
303 *status = U_MEMORY_ALLOCATION_ERROR; | |
304 goto CLEANUP; | |
305 } | |
306 | |
307 *status = U_ZERO_ERROR; // reset error | |
308 | |
309 b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parse
Error, *status); | |
310 } | |
311 // error bail out | |
312 if(U_FAILURE(*status)){ | |
313 goto CLEANUP; | |
314 } | |
315 } | |
316 | |
317 if(b1Len == 0){ | |
318 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; | |
319 goto CLEANUP; | |
320 } | |
321 | |
322 srcIsASCII = TRUE; | |
323 // step 3 & 4 | |
324 for( j=0;j<b1Len;j++){ | |
325 if(b1[j] > 0x7F){// check if output of usprep_prepare is all ASCII | |
326 srcIsASCII = FALSE; | |
327 }else if(prep->isLDHChar(b1[j])==FALSE){ // if the char is in ASCII ran
ge verify that it is an LDH character{ | |
328 srcIsLDH = FALSE; | |
329 } | |
330 } | |
331 | |
332 if(useSTD3ASCIIRules == TRUE){ | |
333 // verify 3a and 3b | |
334 if( srcIsLDH == FALSE /* source contains some non-LDH characters */ | |
335 || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){ | |
336 *status = U_IDNA_STD3_ASCII_RULES_ERROR; | |
337 goto CLEANUP; | |
338 } | |
339 } | |
340 if(srcIsASCII){ | |
341 if(b1Len <= destCapacity){ | |
342 uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR); | |
343 reqLength = b1Len; | |
344 }else{ | |
345 reqLength = b1Len; | |
346 goto CLEANUP; | |
347 } | |
348 }else{ | |
349 // step 5 : verify the sequence does not begin with ACE prefix | |
350 if(!startsWithPrefix(b1,b1Len)){ | |
351 | |
352 //step 6: encode the sequence with punycode | |
353 //caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool)); | |
354 | |
355 b2Len = convertToPuny(b1,b1Len, b2,b2Capacity,*status); | |
356 //b2Len = u_strToPunycode(b2,b2Capacity,b1,b1Len, caseFlags, status)
; | |
357 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
358 // redo processing of string | |
359 /* we do not have enough room so grow the buffer*/ | |
360 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
361 if(b2 == NULL){ | |
362 *status = U_MEMORY_ALLOCATION_ERROR; | |
363 goto CLEANUP; | |
364 } | |
365 | |
366 *status = U_ZERO_ERROR; // reset error | |
367 | |
368 b2Len = convertToPuny(b1, b1Len, b2, b2Len, *status); | |
369 //b2Len = u_strToPunycode(b2,b2Len,b1,b1Len, caseFlags, status); | |
370 | |
371 } | |
372 //error bail out | |
373 if(U_FAILURE(*status)){ | |
374 goto CLEANUP; | |
375 } | |
376 reqLength = b2Len+ACE_PREFIX_LENGTH; | |
377 | |
378 if(reqLength > destCapacity){ | |
379 *status = U_BUFFER_OVERFLOW_ERROR; | |
380 goto CLEANUP; | |
381 } | |
382 //Step 7: prepend the ACE prefix | |
383 uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR); | |
384 //Step 6: copy the contents in b2 into dest | |
385 uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR); | |
386 | |
387 }else{ | |
388 *status = U_IDNA_ACE_PREFIX_ERROR; | |
389 goto CLEANUP; | |
390 } | |
391 } | |
392 | |
393 if(reqLength > MAX_LABEL_LENGTH){ | |
394 *status = U_IDNA_LABEL_TOO_LONG_ERROR; | |
395 } | |
396 | |
397 CLEANUP: | |
398 if(b1 != b1Stack){ | |
399 uprv_free(b1); | |
400 } | |
401 if(b2 != b2Stack){ | |
402 uprv_free(b2); | |
403 } | |
404 uprv_free(caseFlags); | |
405 | |
406 // delete prep; | |
407 | |
408 return u_terminateUChars(dest, destCapacity, reqLength, status); | |
409 } | |
410 | |
411 | |
412 U_CFUNC int32_t U_EXPORT2 | |
413 idnaref_toUnicode(const UChar* src, int32_t srcLength, | |
414 UChar* dest, int32_t destCapacity, | |
415 int32_t options, | |
416 UParseError* parseError, | |
417 UErrorCode* status){ | |
418 | |
419 if(status == NULL || U_FAILURE(*status)){ | |
420 return 0; | |
421 } | |
422 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCa
pacity > 0)){ | |
423 *status = U_ILLEGAL_ARGUMENT_ERROR; | |
424 return 0; | |
425 } | |
426 | |
427 | |
428 | |
429 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stac
k[MAX_LABEL_BUFFER_SIZE]; | |
430 | |
431 //initialize pointers to stack buffers | |
432 UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack; | |
433 int32_t b1Len, b2Len, b1PrimeLen, b3Len, | |
434 b1Capacity = MAX_LABEL_BUFFER_SIZE, | |
435 b2Capacity = MAX_LABEL_BUFFER_SIZE, | |
436 b3Capacity = MAX_LABEL_BUFFER_SIZE, | |
437 reqLength=0; | |
438 // UParseError parseError; | |
439 | |
440 NamePrepTransform* prep = TestIDNA::getInstance(*status); | |
441 b1Len = 0; | |
442 UBool* caseFlags = NULL; | |
443 | |
444 //get the options | |
445 UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0)
; | |
446 UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); | |
447 | |
448 UBool srcIsASCII = TRUE; | |
449 UBool srcIsLDH = TRUE; | |
450 int32_t failPos =0; | |
451 | |
452 if(U_FAILURE(*status)){ | |
453 goto CLEANUP; | |
454 } | |
455 // step 1: find out if all the codepoints in src are ASCII | |
456 if(srcLength==-1){ | |
457 srcLength = 0; | |
458 for(;src[srcLength]!=0;){ | |
459 if(src[srcLength]> 0x7f){ | |
460 srcIsASCII = FALSE; | |
461 }if(prep->isLDHChar(src[srcLength])==FALSE){ | |
462 // here we do not assemble surrogates | |
463 // since we know that LDH code points | |
464 // are in the ASCII range only | |
465 srcIsLDH = FALSE; | |
466 failPos = srcLength; | |
467 } | |
468 srcLength++; | |
469 } | |
470 }else{ | |
471 for(int32_t j=0; j<srcLength; j++){ | |
472 if(src[j]> 0x7f){ | |
473 srcIsASCII = FALSE; | |
474 }else if(prep->isLDHChar(src[j])==FALSE){ | |
475 // here we do not assemble surrogates | |
476 // since we know that LDH code points | |
477 // are in the ASCII range only | |
478 srcIsLDH = FALSE; | |
479 failPos = j; | |
480 } | |
481 } | |
482 } | |
483 | |
484 if(srcIsASCII == FALSE){ | |
485 // step 2: process the string | |
486 b1Len = prep->process(src,srcLength,b1,b1Capacity,allowUnassigned, parse
Error, *status); | |
487 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
488 // redo processing of string | |
489 /* we do not have enough room so grow the buffer*/ | |
490 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
491 if(b1==NULL){ | |
492 *status = U_MEMORY_ALLOCATION_ERROR; | |
493 goto CLEANUP; | |
494 } | |
495 | |
496 *status = U_ZERO_ERROR; // reset error | |
497 | |
498 b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parse
Error, *status); | |
499 } | |
500 //bail out on error | |
501 if(U_FAILURE(*status)){ | |
502 goto CLEANUP; | |
503 } | |
504 }else{ | |
505 | |
506 // copy everything to b1 | |
507 if(srcLength < b1Capacity){ | |
508 uprv_memmove(b1,src, srcLength * U_SIZEOF_UCHAR); | |
509 }else{ | |
510 /* we do not have enough room so grow the buffer*/ | |
511 b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR); | |
512 if(b1==NULL){ | |
513 *status = U_MEMORY_ALLOCATION_ERROR; | |
514 goto CLEANUP; | |
515 } | |
516 uprv_memmove(b1,src, srcLength * U_SIZEOF_UCHAR); | |
517 } | |
518 b1Len = srcLength; | |
519 } | |
520 //step 3: verify ACE Prefix | |
521 if(startsWithPrefix(src,srcLength)){ | |
522 | |
523 //step 4: Remove the ACE Prefix | |
524 b1Prime = b1 + ACE_PREFIX_LENGTH; | |
525 b1PrimeLen = b1Len - ACE_PREFIX_LENGTH; | |
526 | |
527 //step 5: Decode using punycode | |
528 b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Capacity, *status); | |
529 //b2Len = u_strFromPunycode(b2, b2Capacity,b1Prime,b1PrimeLen, caseFlags
, status); | |
530 | |
531 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
532 // redo processing of string | |
533 /* we do not have enough room so grow the buffer*/ | |
534 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
535 if(b2==NULL){ | |
536 *status = U_MEMORY_ALLOCATION_ERROR; | |
537 goto CLEANUP; | |
538 } | |
539 | |
540 *status = U_ZERO_ERROR; // reset error | |
541 | |
542 b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Len, *status); | |
543 //b2Len = u_strFromPunycode(b2, b2Len,b1Prime,b1PrimeLen,caseFlags,
status); | |
544 } | |
545 | |
546 | |
547 //step 6:Apply toASCII | |
548 b3Len = idnaref_toASCII(b2,b2Len,b3,b3Capacity,options,parseError, statu
s); | |
549 | |
550 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
551 // redo processing of string | |
552 /* we do not have enough room so grow the buffer*/ | |
553 b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR); | |
554 if(b3==NULL){ | |
555 *status = U_MEMORY_ALLOCATION_ERROR; | |
556 goto CLEANUP; | |
557 } | |
558 | |
559 *status = U_ZERO_ERROR; // reset error | |
560 | |
561 b3Len = idnaref_toASCII(b2,b2Len,b3,b3Len, options, parseError, sta
tus); | |
562 | |
563 } | |
564 //bail out on error | |
565 if(U_FAILURE(*status)){ | |
566 goto CLEANUP; | |
567 } | |
568 | |
569 //step 7: verify | |
570 if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){ | |
571 *status = U_IDNA_VERIFICATION_ERROR; | |
572 goto CLEANUP; | |
573 } | |
574 | |
575 //step 8: return output of step 5 | |
576 reqLength = b2Len; | |
577 if(b2Len <= destCapacity) { | |
578 uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR); | |
579 } | |
580 }else{ | |
581 // verify that STD3 ASCII rules are satisfied | |
582 if(useSTD3ASCIIRules == TRUE){ | |
583 if( srcIsLDH == FALSE /* source contains some non-LDH characters */ | |
584 || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){ | |
585 *status = U_IDNA_STD3_ASCII_RULES_ERROR; | |
586 | |
587 /* populate the parseError struct */ | |
588 if(srcIsLDH==FALSE){ | |
589 // failPos is always set the index of failure | |
590 uprv_syntaxError(src,failPos, srcLength,parseError); | |
591 }else if(src[0] == HYPHEN){ | |
592 // fail position is 0 | |
593 uprv_syntaxError(src,0,srcLength,parseError); | |
594 }else{ | |
595 // the last index in the source is always length-1 | |
596 uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLengt
h, srcLength,parseError); | |
597 } | |
598 | |
599 goto CLEANUP; | |
600 } | |
601 } | |
602 //copy the source to destination | |
603 if(srcLength <= destCapacity){ | |
604 uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); | |
605 } | |
606 reqLength = srcLength; | |
607 } | |
608 | |
609 CLEANUP: | |
610 | |
611 if(b1 != b1Stack){ | |
612 uprv_free(b1); | |
613 } | |
614 if(b2 != b2Stack){ | |
615 uprv_free(b2); | |
616 } | |
617 uprv_free(caseFlags); | |
618 | |
619 // The RFC states that | |
620 // <quote> | |
621 // ToUnicode never fails. If any step fails, then the original input | |
622 // is returned immediately in that step. | |
623 // </quote> | |
624 // So if any step fails lets copy source to destination | |
625 if(U_FAILURE(*status)){ | |
626 //copy the source to destination | |
627 if(dest && srcLength <= destCapacity){ | |
628 if(srcLength == -1) { | |
629 uprv_memmove(dest,src,u_strlen(src)* U_SIZEOF_UCHAR); | |
630 } else { | |
631 uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); | |
632 } | |
633 } | |
634 reqLength = srcLength; | |
635 *status = U_ZERO_ERROR; | |
636 } | |
637 return u_terminateUChars(dest, destCapacity, reqLength, status); | |
638 } | |
639 | |
640 | |
641 static int32_t | |
642 getNextSeparator(UChar *src,int32_t srcLength,NamePrepTransform* prep, | |
643 UChar **limit, | |
644 UBool *done, | |
645 UErrorCode *status){ | |
646 if(srcLength == -1){ | |
647 int32_t i; | |
648 for(i=0 ; ;i++){ | |
649 if(src[i] == 0){ | |
650 *limit = src + i; // point to null | |
651 *done = TRUE; | |
652 return i; | |
653 } | |
654 if(prep->isLabelSeparator(src[i],*status)){ | |
655 *limit = src + (i+1); // go past the delimiter | |
656 return i; | |
657 | |
658 } | |
659 } | |
660 }else{ | |
661 int32_t i; | |
662 for(i=0;i<srcLength;i++){ | |
663 if(prep->isLabelSeparator(src[i],*status)){ | |
664 *limit = src + (i+1); // go past the delimiter | |
665 return i; | |
666 } | |
667 } | |
668 // we have not found the delimiter | |
669 if(i==srcLength){ | |
670 *limit = src+srcLength; | |
671 *done = TRUE; | |
672 } | |
673 return i; | |
674 } | |
675 } | |
676 | |
677 U_CFUNC int32_t U_EXPORT2 | |
678 idnaref_IDNToASCII( const UChar* src, int32_t srcLength, | |
679 UChar* dest, int32_t destCapacity, | |
680 int32_t options, | |
681 UParseError* parseError, | |
682 UErrorCode* status){ | |
683 | |
684 if(status == NULL || U_FAILURE(*status)){ | |
685 return 0; | |
686 } | |
687 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCa
pacity > 0)){ | |
688 *status = U_ILLEGAL_ARGUMENT_ERROR; | |
689 return 0; | |
690 } | |
691 | |
692 int32_t reqLength = 0; | |
693 // UParseError parseError; | |
694 | |
695 NamePrepTransform* prep = TestIDNA::getInstance(*status); | |
696 | |
697 //initialize pointers to stack buffers | |
698 UChar b1Stack[MAX_LABEL_BUFFER_SIZE]; | |
699 UChar *b1 = b1Stack; | |
700 int32_t b1Len, labelLen; | |
701 UChar* delimiter = (UChar*)src; | |
702 UChar* labelStart = (UChar*)src; | |
703 int32_t remainingLen = srcLength; | |
704 int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE; | |
705 | |
706 //get the options | |
707 // UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) !=
0); | |
708 // UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0)
; | |
709 UBool done = FALSE; | |
710 | |
711 if(U_FAILURE(*status)){ | |
712 goto CLEANUP; | |
713 } | |
714 | |
715 | |
716 if(srcLength == -1){ | |
717 for(;;){ | |
718 | |
719 if(*delimiter == 0){ | |
720 break; | |
721 } | |
722 | |
723 labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done,
status); | |
724 b1Len = 0; | |
725 if(!(labelLen==0 && done)){// make sure this is not a root label sep
arator. | |
726 | |
727 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity, | |
728 options, parseError, status); | |
729 | |
730 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
731 // redo processing of string | |
732 /* we do not have enough room so grow the buffer*/ | |
733 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
734 if(b1==NULL){ | |
735 *status = U_MEMORY_ALLOCATION_ERROR; | |
736 goto CLEANUP; | |
737 } | |
738 | |
739 *status = U_ZERO_ERROR; // reset error | |
740 | |
741 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len, | |
742 options, parseError, status); | |
743 | |
744 } | |
745 } | |
746 | |
747 if(U_FAILURE(*status)){ | |
748 goto CLEANUP; | |
749 } | |
750 int32_t tempLen = (reqLength + b1Len ); | |
751 // copy to dest | |
752 if( tempLen< destCapacity){ | |
753 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); | |
754 } | |
755 | |
756 reqLength = tempLen; | |
757 | |
758 // add the label separator | |
759 if(done == FALSE){ | |
760 if(reqLength < destCapacity){ | |
761 dest[reqLength] = FULL_STOP; | |
762 } | |
763 reqLength++; | |
764 } | |
765 | |
766 labelStart = delimiter; | |
767 } | |
768 }else{ | |
769 for(;;){ | |
770 | |
771 if(delimiter == src+srcLength){ | |
772 break; | |
773 } | |
774 | |
775 labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimit
er, &done, status); | |
776 | |
777 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity, | |
778 options,parseError, status); | |
779 | |
780 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
781 // redo processing of string | |
782 /* we do not have enough room so grow the buffer*/ | |
783 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
784 if(b1==NULL){ | |
785 *status = U_MEMORY_ALLOCATION_ERROR; | |
786 goto CLEANUP; | |
787 } | |
788 | |
789 *status = U_ZERO_ERROR; // reset error | |
790 | |
791 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len, | |
792 options, parseError, status); | |
793 | |
794 } | |
795 | |
796 if(U_FAILURE(*status)){ | |
797 goto CLEANUP; | |
798 } | |
799 int32_t tempLen = (reqLength + b1Len ); | |
800 // copy to dest | |
801 if( tempLen< destCapacity){ | |
802 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); | |
803 } | |
804 | |
805 reqLength = tempLen; | |
806 | |
807 // add the label separator | |
808 if(done == FALSE){ | |
809 if(reqLength < destCapacity){ | |
810 dest[reqLength] = FULL_STOP; | |
811 } | |
812 reqLength++; | |
813 } | |
814 | |
815 labelStart = delimiter; | |
816 remainingLen = srcLength - (delimiter - src); | |
817 } | |
818 } | |
819 | |
820 | |
821 CLEANUP: | |
822 | |
823 if(b1 != b1Stack){ | |
824 uprv_free(b1); | |
825 } | |
826 | |
827 // delete prep; | |
828 | |
829 return u_terminateUChars(dest, destCapacity, reqLength, status); | |
830 } | |
831 | |
832 U_CFUNC int32_t U_EXPORT2 | |
833 idnaref_IDNToUnicode( const UChar* src, int32_t srcLength, | |
834 UChar* dest, int32_t destCapacity, | |
835 int32_t options, | |
836 UParseError* parseError, | |
837 UErrorCode* status){ | |
838 | |
839 if(status == NULL || U_FAILURE(*status)){ | |
840 return 0; | |
841 } | |
842 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCa
pacity > 0)){ | |
843 *status = U_ILLEGAL_ARGUMENT_ERROR; | |
844 return 0; | |
845 } | |
846 | |
847 int32_t reqLength = 0; | |
848 | |
849 UBool done = FALSE; | |
850 | |
851 NamePrepTransform* prep = TestIDNA::getInstance(*status); | |
852 | |
853 //initialize pointers to stack buffers | |
854 UChar b1Stack[MAX_LABEL_BUFFER_SIZE]; | |
855 UChar *b1 = b1Stack; | |
856 int32_t b1Len, labelLen; | |
857 UChar* delimiter = (UChar*)src; | |
858 UChar* labelStart = (UChar*)src; | |
859 int32_t remainingLen = srcLength; | |
860 int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE; | |
861 | |
862 //get the options | |
863 // UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) !=
0); | |
864 // UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0)
; | |
865 | |
866 if(U_FAILURE(*status)){ | |
867 goto CLEANUP; | |
868 } | |
869 | |
870 if(srcLength == -1){ | |
871 for(;;){ | |
872 | |
873 if(*delimiter == 0){ | |
874 break; | |
875 } | |
876 | |
877 labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done,
status); | |
878 | |
879 if(labelLen==0 && done==FALSE){ | |
880 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; | |
881 } | |
882 b1Len = idnaref_toUnicode(labelStart, labelLen, b1, b1Capacity, | |
883 options, parseError, status); | |
884 | |
885 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
886 // redo processing of string | |
887 /* we do not have enough room so grow the buffer*/ | |
888 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
889 if(b1==NULL){ | |
890 *status = U_MEMORY_ALLOCATION_ERROR; | |
891 goto CLEANUP; | |
892 } | |
893 | |
894 *status = U_ZERO_ERROR; // reset error | |
895 | |
896 b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len, | |
897 options, parseError, status); | |
898 | |
899 } | |
900 | |
901 if(U_FAILURE(*status)){ | |
902 goto CLEANUP; | |
903 } | |
904 int32_t tempLen = (reqLength + b1Len ); | |
905 // copy to dest | |
906 if( tempLen< destCapacity){ | |
907 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); | |
908 } | |
909 | |
910 reqLength = tempLen; | |
911 // add the label separator | |
912 if(done == FALSE){ | |
913 if(reqLength < destCapacity){ | |
914 dest[reqLength] = FULL_STOP; | |
915 } | |
916 reqLength++; | |
917 } | |
918 | |
919 labelStart = delimiter; | |
920 } | |
921 }else{ | |
922 for(;;){ | |
923 | |
924 if(delimiter == src+srcLength){ | |
925 break; | |
926 } | |
927 | |
928 labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimit
er, &done, status); | |
929 | |
930 if(labelLen==0 && done==FALSE){ | |
931 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; | |
932 } | |
933 | |
934 b1Len = idnaref_toUnicode( labelStart,labelLen, b1, b1Capacity, | |
935 options, parseError, status); | |
936 | |
937 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
938 // redo processing of string | |
939 /* we do not have enough room so grow the buffer*/ | |
940 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
941 if(b1==NULL){ | |
942 *status = U_MEMORY_ALLOCATION_ERROR; | |
943 goto CLEANUP; | |
944 } | |
945 | |
946 *status = U_ZERO_ERROR; // reset error | |
947 | |
948 b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len, | |
949 options, parseError, status); | |
950 | |
951 } | |
952 | |
953 if(U_FAILURE(*status)){ | |
954 goto CLEANUP; | |
955 } | |
956 int32_t tempLen = (reqLength + b1Len ); | |
957 // copy to dest | |
958 if( tempLen< destCapacity){ | |
959 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); | |
960 } | |
961 | |
962 reqLength = tempLen; | |
963 | |
964 // add the label separator | |
965 if(done == FALSE){ | |
966 if(reqLength < destCapacity){ | |
967 dest[reqLength] = FULL_STOP; | |
968 } | |
969 reqLength++; | |
970 } | |
971 | |
972 labelStart = delimiter; | |
973 remainingLen = srcLength - (delimiter - src); | |
974 } | |
975 } | |
976 | |
977 CLEANUP: | |
978 | |
979 if(b1 != b1Stack){ | |
980 uprv_free(b1); | |
981 } | |
982 | |
983 // delete prep; | |
984 | |
985 return u_terminateUChars(dest, destCapacity, reqLength, status); | |
986 } | |
987 | |
988 U_CFUNC int32_t U_EXPORT2 | |
989 idnaref_compare( const UChar *s1, int32_t length1, | |
990 const UChar *s2, int32_t length2, | |
991 int32_t options, | |
992 UErrorCode* status){ | |
993 | |
994 if(status == NULL || U_FAILURE(*status)){ | |
995 return -1; | |
996 } | |
997 | |
998 UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE]; | |
999 UChar *b1 = b1Stack, *b2 = b2Stack; | |
1000 int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN
_BUFFER_SIZE; | |
1001 int32_t result = -1; | |
1002 | |
1003 UParseError parseError; | |
1004 | |
1005 b1Len = idnaref_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError
, status); | |
1006 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
1007 // redo processing of string | |
1008 /* we do not have enough room so grow the buffer*/ | |
1009 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
1010 if(b1==NULL){ | |
1011 *status = U_MEMORY_ALLOCATION_ERROR; | |
1012 goto CLEANUP; | |
1013 } | |
1014 | |
1015 *status = U_ZERO_ERROR; // reset error | |
1016 | |
1017 b1Len = idnaref_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, st
atus); | |
1018 | |
1019 } | |
1020 | |
1021 b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Capacity,options, &parseError, st
atus); | |
1022 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
1023 // redo processing of string | |
1024 /* we do not have enough room so grow the buffer*/ | |
1025 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
1026 if(b2==NULL){ | |
1027 *status = U_MEMORY_ALLOCATION_ERROR; | |
1028 goto CLEANUP; | |
1029 } | |
1030 | |
1031 *status = U_ZERO_ERROR; // reset error | |
1032 | |
1033 b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Len,options, &parseError, sta
tus); | |
1034 | |
1035 } | |
1036 // when toASCII is applied all label separators are replaced with FULL_STOP | |
1037 result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len); | |
1038 | |
1039 CLEANUP: | |
1040 if(b1 != b1Stack){ | |
1041 uprv_free(b1); | |
1042 } | |
1043 | |
1044 if(b2 != b2Stack){ | |
1045 uprv_free(b2); | |
1046 } | |
1047 | |
1048 return result; | |
1049 } | |
1050 #endif /* #if !UCONFIG_NO_IDNA */ | |
OLD | NEW |