| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 ******************************************************************************* | |
| 3 * | |
| 4 * Copyright (C) 2003-2011, International Business Machines | |
| 5 * Corporation and others. All Rights Reserved. | |
| 6 * | |
| 7 ******************************************************************************* | |
| 8 * file name: idnaref.cpp | |
| 9 * encoding: US-ASCII | |
| 10 * tab size: 8 (not used) | |
| 11 * indentation:4 | |
| 12 * | |
| 13 * created on: 2003feb1 | |
| 14 * created by: Ram Viswanadha | |
| 15 */ | |
| 16 | |
| 17 #include "unicode/utypes.h" | |
| 18 | |
| 19 #if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION | |
| 20 #include "idnaref.h" | |
| 21 #include "punyref.h" | |
| 22 #include "ustr_imp.h" | |
| 23 #include "cmemory.h" | |
| 24 #include "sprpimpl.h" | |
| 25 #include "nptrans.h" | |
| 26 #include "testidna.h" | |
| 27 #include "punycode.h" | |
| 28 #include "unicode/ustring.h" | |
| 29 | |
| 30 /* it is official IDNA ACE Prefix is "xn--" */ | |
| 31 static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ; | |
| 32 #define ACE_PREFIX_LENGTH 4 | |
| 33 | |
| 34 #define MAX_LABEL_LENGTH 63 | |
| 35 #define HYPHEN 0x002D | |
| 36 /* The Max length of the labels should not be more than 64 */ | |
| 37 #define MAX_LABEL_BUFFER_SIZE 100 | |
| 38 #define MAX_IDN_BUFFER_SIZE 300 | |
| 39 | |
| 40 #define CAPITAL_A 0x0041 | |
| 41 #define CAPITAL_Z 0x005A | |
| 42 #define LOWER_CASE_DELTA 0x0020 | |
| 43 #define FULL_STOP 0x002E | |
| 44 | |
| 45 | |
| 46 inline static UBool | |
| 47 startsWithPrefix(const UChar* src , int32_t srcLength){ | |
| 48 UBool startsWithPrefix = TRUE; | |
| 49 | |
| 50 if(srcLength < ACE_PREFIX_LENGTH){ | |
| 51 return FALSE; | |
| 52 } | |
| 53 | |
| 54 for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){ | |
| 55 if(u_tolower(src[i]) != ACE_PREFIX[i]){ | |
| 56 startsWithPrefix = FALSE; | |
| 57 } | |
| 58 } | |
| 59 return startsWithPrefix; | |
| 60 } | |
| 61 | |
| 62 inline static UChar | |
| 63 toASCIILower(UChar ch){ | |
| 64 if(CAPITAL_A <= ch && ch <= CAPITAL_Z){ | |
| 65 return ch + LOWER_CASE_DELTA; | |
| 66 } | |
| 67 return ch; | |
| 68 } | |
| 69 | |
| 70 inline static int32_t | |
| 71 compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len, | |
| 72 const UChar* s2, int32_t s2Len){ | |
| 73 if(s1Len != s2Len){ | |
| 74 return (s1Len > s2Len) ? s1Len : s2Len; | |
| 75 } | |
| 76 UChar c1,c2; | |
| 77 int32_t rc; | |
| 78 | |
| 79 for(int32_t i =0;/* no condition */;i++) { | |
| 80 /* If we reach the ends of both strings then they match */ | |
| 81 if(i == s1Len) { | |
| 82 return 0; | |
| 83 } | |
| 84 | |
| 85 c1 = s1[i]; | |
| 86 c2 = s2[i]; | |
| 87 | |
| 88 /* Case-insensitive comparison */ | |
| 89 if(c1!=c2) { | |
| 90 rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2); | |
| 91 if(rc!=0) { | |
| 92 return rc; | |
| 93 } | |
| 94 } | |
| 95 } | |
| 96 | |
| 97 } | |
| 98 | |
| 99 static UErrorCode getError(enum punycode_status status){ | |
| 100 switch(status){ | |
| 101 case punycode_success: | |
| 102 return U_ZERO_ERROR; | |
| 103 case punycode_bad_input: /* Input is invalid. */ | |
| 104 return U_INVALID_CHAR_FOUND; | |
| 105 case punycode_big_output: /* Output would exceed the space provided. */ | |
| 106 return U_BUFFER_OVERFLOW_ERROR; | |
| 107 case punycode_overflow : /* Input requires wider integers to process. */ | |
| 108 return U_INDEX_OUTOFBOUNDS_ERROR; | |
| 109 default: | |
| 110 return U_INTERNAL_PROGRAM_ERROR; | |
| 111 } | |
| 112 } | |
| 113 | |
| 114 static inline int32_t convertASCIIToUChars(const char* src,UChar* dest, int32_t
length){ | |
| 115 int i; | |
| 116 for(i=0;i<length;i++){ | |
| 117 dest[i] = src[i]; | |
| 118 } | |
| 119 return i; | |
| 120 } | |
| 121 static inline int32_t convertUCharsToASCII(const UChar* src,char* dest, int32_t
length){ | |
| 122 int i; | |
| 123 for(i=0;i<length;i++){ | |
| 124 dest[i] = (char)src[i]; | |
| 125 } | |
| 126 return i; | |
| 127 } | |
| 128 // wrapper around the reference Punycode implementation | |
| 129 static int32_t convertToPuny(const UChar* src, int32_t srcLength, | |
| 130 UChar* dest, int32_t destCapacity, | |
| 131 UErrorCode& status){ | |
| 132 uint32_t b1Stack[MAX_LABEL_BUFFER_SIZE]; | |
| 133 int32_t b1Len = 0, b1Capacity = MAX_LABEL_BUFFER_SIZE; | |
| 134 uint32_t* b1 = b1Stack; | |
| 135 char b2Stack[MAX_LABEL_BUFFER_SIZE]; | |
| 136 char* b2 = b2Stack; | |
| 137 int32_t b2Len =MAX_LABEL_BUFFER_SIZE ; | |
| 138 punycode_status error; | |
| 139 unsigned char* caseFlags = NULL; | |
| 140 | |
| 141 u_strToUTF32((UChar32*)b1,b1Capacity,&b1Len,src,srcLength,&status); | |
| 142 if(status == U_BUFFER_OVERFLOW_ERROR){ | |
| 143 // redo processing of string | |
| 144 /* we do not have enough room so grow the buffer*/ | |
| 145 b1 = (uint32_t*) uprv_malloc(b1Len * sizeof(uint32_t)); | |
| 146 if(b1==NULL){ | |
| 147 status = U_MEMORY_ALLOCATION_ERROR; | |
| 148 goto CLEANUP; | |
| 149 } | |
| 150 | |
| 151 status = U_ZERO_ERROR; // reset error | |
| 152 | |
| 153 u_strToUTF32((UChar32*)b1,b1Len,&b1Len,src,srcLength,&status); | |
| 154 } | |
| 155 if(U_FAILURE(status)){ | |
| 156 goto CLEANUP; | |
| 157 } | |
| 158 | |
| 159 //caseFlags = (unsigned char*) uprv_malloc(b1Len *sizeof(unsigned char)); | |
| 160 | |
| 161 error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2); | |
| 162 status = getError(error); | |
| 163 | |
| 164 if(status == U_BUFFER_OVERFLOW_ERROR){ | |
| 165 /* we do not have enough room so grow the buffer*/ | |
| 166 b2 = (char*) uprv_malloc( b2Len * sizeof(char)); | |
| 167 if(b2==NULL){ | |
| 168 status = U_MEMORY_ALLOCATION_ERROR; | |
| 169 goto CLEANUP; | |
| 170 } | |
| 171 | |
| 172 status = U_ZERO_ERROR; // reset error | |
| 173 | |
| 174 punycode_status error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&
b2Len, b2); | |
| 175 status = getError(error); | |
| 176 } | |
| 177 if(U_FAILURE(status)){ | |
| 178 goto CLEANUP; | |
| 179 } | |
| 180 | |
| 181 if(b2Len < destCapacity){ | |
| 182 convertASCIIToUChars(b2,dest,b2Len); | |
| 183 }else{ | |
| 184 status =U_BUFFER_OVERFLOW_ERROR; | |
| 185 } | |
| 186 | |
| 187 CLEANUP: | |
| 188 if(b1Stack != b1){ | |
| 189 uprv_free(b1); | |
| 190 } | |
| 191 if(b2Stack != b2){ | |
| 192 uprv_free(b2); | |
| 193 } | |
| 194 uprv_free(caseFlags); | |
| 195 | |
| 196 return b2Len; | |
| 197 } | |
| 198 | |
| 199 static int32_t convertFromPuny( const UChar* src, int32_t srcLength, | |
| 200 UChar* dest, int32_t destCapacity, | |
| 201 UErrorCode& status){ | |
| 202 char b1Stack[MAX_LABEL_BUFFER_SIZE]; | |
| 203 char* b1 = b1Stack; | |
| 204 int32_t destLen =0; | |
| 205 | |
| 206 convertUCharsToASCII(src, b1,srcLength); | |
| 207 | |
| 208 uint32_t b2Stack[MAX_LABEL_BUFFER_SIZE]; | |
| 209 uint32_t* b2 = b2Stack; | |
| 210 int32_t b2Len =MAX_LABEL_BUFFER_SIZE; | |
| 211 unsigned char* caseFlags = NULL; //(unsigned char*) uprv_malloc(srcLength *
sizeof(unsigned char*)); | |
| 212 punycode_status error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,ca
seFlags); | |
| 213 status = getError(error); | |
| 214 if(status == U_BUFFER_OVERFLOW_ERROR){ | |
| 215 b2 = (uint32_t*) uprv_malloc(b2Len * sizeof(uint32_t)); | |
| 216 if(b2 == NULL){ | |
| 217 status = U_MEMORY_ALLOCATION_ERROR; | |
| 218 goto CLEANUP; | |
| 219 } | |
| 220 error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags); | |
| 221 status = getError(error); | |
| 222 } | |
| 223 | |
| 224 if(U_FAILURE(status)){ | |
| 225 goto CLEANUP; | |
| 226 } | |
| 227 | |
| 228 u_strFromUTF32(dest,destCapacity,&destLen,(UChar32*)b2,b2Len,&status); | |
| 229 | |
| 230 CLEANUP: | |
| 231 if(b1Stack != b1){ | |
| 232 uprv_free(b1); | |
| 233 } | |
| 234 if(b2Stack != b2){ | |
| 235 uprv_free(b2); | |
| 236 } | |
| 237 uprv_free(caseFlags); | |
| 238 | |
| 239 return destLen; | |
| 240 } | |
| 241 | |
| 242 | |
| 243 U_CFUNC int32_t U_EXPORT2 | |
| 244 idnaref_toASCII(const UChar* src, int32_t srcLength, | |
| 245 UChar* dest, int32_t destCapacity, | |
| 246 int32_t options, | |
| 247 UParseError* parseError, | |
| 248 UErrorCode* status){ | |
| 249 | |
| 250 if(status == NULL || U_FAILURE(*status)){ | |
| 251 return 0; | |
| 252 } | |
| 253 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCa
pacity > 0)){ | |
| 254 *status = U_ILLEGAL_ARGUMENT_ERROR; | |
| 255 return 0; | |
| 256 } | |
| 257 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE]; | |
| 258 //initialize pointers to stack buffers | |
| 259 UChar *b1 = b1Stack, *b2 = b2Stack; | |
| 260 int32_t b1Len=0, b2Len=0, | |
| 261 b1Capacity = MAX_LABEL_BUFFER_SIZE, | |
| 262 b2Capacity = MAX_LABEL_BUFFER_SIZE , | |
| 263 reqLength=0; | |
| 264 | |
| 265 //get the options | |
| 266 UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0)
; | |
| 267 UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); | |
| 268 | |
| 269 UBool* caseFlags = NULL; | |
| 270 | |
| 271 // assume the source contains all ascii codepoints | |
| 272 UBool srcIsASCII = TRUE; | |
| 273 // assume the source contains all LDH codepoints | |
| 274 UBool srcIsLDH = TRUE; | |
| 275 int32_t j=0; | |
| 276 | |
| 277 if(srcLength == -1){ | |
| 278 srcLength = u_strlen(src); | |
| 279 } | |
| 280 | |
| 281 // step 1 | |
| 282 for( j=0;j<srcLength;j++){ | |
| 283 if(src[j] > 0x7F){ | |
| 284 srcIsASCII = FALSE; | |
| 285 } | |
| 286 b1[b1Len++] = src[j]; | |
| 287 } | |
| 288 | |
| 289 NamePrepTransform* prep = TestIDNA::getInstance(*status); | |
| 290 if(U_FAILURE(*status)){ | |
| 291 goto CLEANUP; | |
| 292 } | |
| 293 | |
| 294 // step 2 is performed only if the source contains non ASCII | |
| 295 if (!srcIsASCII) { | |
| 296 b1Len = prep->process(src,srcLength,b1, b1Capacity,allowUnassigned,parse
Error,*status); | |
| 297 | |
| 298 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
| 299 // redo processing of string | |
| 300 /* we do not have enough room so grow the buffer*/ | |
| 301 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
| 302 if(b1==NULL){ | |
| 303 *status = U_MEMORY_ALLOCATION_ERROR; | |
| 304 goto CLEANUP; | |
| 305 } | |
| 306 | |
| 307 *status = U_ZERO_ERROR; // reset error | |
| 308 | |
| 309 b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parse
Error, *status); | |
| 310 } | |
| 311 // error bail out | |
| 312 if(U_FAILURE(*status)){ | |
| 313 goto CLEANUP; | |
| 314 } | |
| 315 } | |
| 316 | |
| 317 if(b1Len == 0){ | |
| 318 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; | |
| 319 goto CLEANUP; | |
| 320 } | |
| 321 | |
| 322 srcIsASCII = TRUE; | |
| 323 // step 3 & 4 | |
| 324 for( j=0;j<b1Len;j++){ | |
| 325 if(b1[j] > 0x7F){// check if output of usprep_prepare is all ASCII | |
| 326 srcIsASCII = FALSE; | |
| 327 }else if(prep->isLDHChar(b1[j])==FALSE){ // if the char is in ASCII ran
ge verify that it is an LDH character{ | |
| 328 srcIsLDH = FALSE; | |
| 329 } | |
| 330 } | |
| 331 | |
| 332 if(useSTD3ASCIIRules == TRUE){ | |
| 333 // verify 3a and 3b | |
| 334 if( srcIsLDH == FALSE /* source contains some non-LDH characters */ | |
| 335 || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){ | |
| 336 *status = U_IDNA_STD3_ASCII_RULES_ERROR; | |
| 337 goto CLEANUP; | |
| 338 } | |
| 339 } | |
| 340 if(srcIsASCII){ | |
| 341 if(b1Len <= destCapacity){ | |
| 342 uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR); | |
| 343 reqLength = b1Len; | |
| 344 }else{ | |
| 345 reqLength = b1Len; | |
| 346 goto CLEANUP; | |
| 347 } | |
| 348 }else{ | |
| 349 // step 5 : verify the sequence does not begin with ACE prefix | |
| 350 if(!startsWithPrefix(b1,b1Len)){ | |
| 351 | |
| 352 //step 6: encode the sequence with punycode | |
| 353 //caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool)); | |
| 354 | |
| 355 b2Len = convertToPuny(b1,b1Len, b2,b2Capacity,*status); | |
| 356 //b2Len = u_strToPunycode(b2,b2Capacity,b1,b1Len, caseFlags, status)
; | |
| 357 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
| 358 // redo processing of string | |
| 359 /* we do not have enough room so grow the buffer*/ | |
| 360 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
| 361 if(b2 == NULL){ | |
| 362 *status = U_MEMORY_ALLOCATION_ERROR; | |
| 363 goto CLEANUP; | |
| 364 } | |
| 365 | |
| 366 *status = U_ZERO_ERROR; // reset error | |
| 367 | |
| 368 b2Len = convertToPuny(b1, b1Len, b2, b2Len, *status); | |
| 369 //b2Len = u_strToPunycode(b2,b2Len,b1,b1Len, caseFlags, status); | |
| 370 | |
| 371 } | |
| 372 //error bail out | |
| 373 if(U_FAILURE(*status)){ | |
| 374 goto CLEANUP; | |
| 375 } | |
| 376 reqLength = b2Len+ACE_PREFIX_LENGTH; | |
| 377 | |
| 378 if(reqLength > destCapacity){ | |
| 379 *status = U_BUFFER_OVERFLOW_ERROR; | |
| 380 goto CLEANUP; | |
| 381 } | |
| 382 //Step 7: prepend the ACE prefix | |
| 383 uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR); | |
| 384 //Step 6: copy the contents in b2 into dest | |
| 385 uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR); | |
| 386 | |
| 387 }else{ | |
| 388 *status = U_IDNA_ACE_PREFIX_ERROR; | |
| 389 goto CLEANUP; | |
| 390 } | |
| 391 } | |
| 392 | |
| 393 if(reqLength > MAX_LABEL_LENGTH){ | |
| 394 *status = U_IDNA_LABEL_TOO_LONG_ERROR; | |
| 395 } | |
| 396 | |
| 397 CLEANUP: | |
| 398 if(b1 != b1Stack){ | |
| 399 uprv_free(b1); | |
| 400 } | |
| 401 if(b2 != b2Stack){ | |
| 402 uprv_free(b2); | |
| 403 } | |
| 404 uprv_free(caseFlags); | |
| 405 | |
| 406 // delete prep; | |
| 407 | |
| 408 return u_terminateUChars(dest, destCapacity, reqLength, status); | |
| 409 } | |
| 410 | |
| 411 | |
| 412 U_CFUNC int32_t U_EXPORT2 | |
| 413 idnaref_toUnicode(const UChar* src, int32_t srcLength, | |
| 414 UChar* dest, int32_t destCapacity, | |
| 415 int32_t options, | |
| 416 UParseError* parseError, | |
| 417 UErrorCode* status){ | |
| 418 | |
| 419 if(status == NULL || U_FAILURE(*status)){ | |
| 420 return 0; | |
| 421 } | |
| 422 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCa
pacity > 0)){ | |
| 423 *status = U_ILLEGAL_ARGUMENT_ERROR; | |
| 424 return 0; | |
| 425 } | |
| 426 | |
| 427 | |
| 428 | |
| 429 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stac
k[MAX_LABEL_BUFFER_SIZE]; | |
| 430 | |
| 431 //initialize pointers to stack buffers | |
| 432 UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack; | |
| 433 int32_t b1Len, b2Len, b1PrimeLen, b3Len, | |
| 434 b1Capacity = MAX_LABEL_BUFFER_SIZE, | |
| 435 b2Capacity = MAX_LABEL_BUFFER_SIZE, | |
| 436 b3Capacity = MAX_LABEL_BUFFER_SIZE, | |
| 437 reqLength=0; | |
| 438 // UParseError parseError; | |
| 439 | |
| 440 NamePrepTransform* prep = TestIDNA::getInstance(*status); | |
| 441 b1Len = 0; | |
| 442 UBool* caseFlags = NULL; | |
| 443 | |
| 444 //get the options | |
| 445 UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0)
; | |
| 446 UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); | |
| 447 | |
| 448 UBool srcIsASCII = TRUE; | |
| 449 UBool srcIsLDH = TRUE; | |
| 450 int32_t failPos =0; | |
| 451 | |
| 452 if(U_FAILURE(*status)){ | |
| 453 goto CLEANUP; | |
| 454 } | |
| 455 // step 1: find out if all the codepoints in src are ASCII | |
| 456 if(srcLength==-1){ | |
| 457 srcLength = 0; | |
| 458 for(;src[srcLength]!=0;){ | |
| 459 if(src[srcLength]> 0x7f){ | |
| 460 srcIsASCII = FALSE; | |
| 461 }if(prep->isLDHChar(src[srcLength])==FALSE){ | |
| 462 // here we do not assemble surrogates | |
| 463 // since we know that LDH code points | |
| 464 // are in the ASCII range only | |
| 465 srcIsLDH = FALSE; | |
| 466 failPos = srcLength; | |
| 467 } | |
| 468 srcLength++; | |
| 469 } | |
| 470 }else{ | |
| 471 for(int32_t j=0; j<srcLength; j++){ | |
| 472 if(src[j]> 0x7f){ | |
| 473 srcIsASCII = FALSE; | |
| 474 }else if(prep->isLDHChar(src[j])==FALSE){ | |
| 475 // here we do not assemble surrogates | |
| 476 // since we know that LDH code points | |
| 477 // are in the ASCII range only | |
| 478 srcIsLDH = FALSE; | |
| 479 failPos = j; | |
| 480 } | |
| 481 } | |
| 482 } | |
| 483 | |
| 484 if(srcIsASCII == FALSE){ | |
| 485 // step 2: process the string | |
| 486 b1Len = prep->process(src,srcLength,b1,b1Capacity,allowUnassigned, parse
Error, *status); | |
| 487 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
| 488 // redo processing of string | |
| 489 /* we do not have enough room so grow the buffer*/ | |
| 490 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
| 491 if(b1==NULL){ | |
| 492 *status = U_MEMORY_ALLOCATION_ERROR; | |
| 493 goto CLEANUP; | |
| 494 } | |
| 495 | |
| 496 *status = U_ZERO_ERROR; // reset error | |
| 497 | |
| 498 b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parse
Error, *status); | |
| 499 } | |
| 500 //bail out on error | |
| 501 if(U_FAILURE(*status)){ | |
| 502 goto CLEANUP; | |
| 503 } | |
| 504 }else{ | |
| 505 | |
| 506 // copy everything to b1 | |
| 507 if(srcLength < b1Capacity){ | |
| 508 uprv_memmove(b1,src, srcLength * U_SIZEOF_UCHAR); | |
| 509 }else{ | |
| 510 /* we do not have enough room so grow the buffer*/ | |
| 511 b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR); | |
| 512 if(b1==NULL){ | |
| 513 *status = U_MEMORY_ALLOCATION_ERROR; | |
| 514 goto CLEANUP; | |
| 515 } | |
| 516 uprv_memmove(b1,src, srcLength * U_SIZEOF_UCHAR); | |
| 517 } | |
| 518 b1Len = srcLength; | |
| 519 } | |
| 520 //step 3: verify ACE Prefix | |
| 521 if(startsWithPrefix(src,srcLength)){ | |
| 522 | |
| 523 //step 4: Remove the ACE Prefix | |
| 524 b1Prime = b1 + ACE_PREFIX_LENGTH; | |
| 525 b1PrimeLen = b1Len - ACE_PREFIX_LENGTH; | |
| 526 | |
| 527 //step 5: Decode using punycode | |
| 528 b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Capacity, *status); | |
| 529 //b2Len = u_strFromPunycode(b2, b2Capacity,b1Prime,b1PrimeLen, caseFlags
, status); | |
| 530 | |
| 531 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
| 532 // redo processing of string | |
| 533 /* we do not have enough room so grow the buffer*/ | |
| 534 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
| 535 if(b2==NULL){ | |
| 536 *status = U_MEMORY_ALLOCATION_ERROR; | |
| 537 goto CLEANUP; | |
| 538 } | |
| 539 | |
| 540 *status = U_ZERO_ERROR; // reset error | |
| 541 | |
| 542 b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Len, *status); | |
| 543 //b2Len = u_strFromPunycode(b2, b2Len,b1Prime,b1PrimeLen,caseFlags,
status); | |
| 544 } | |
| 545 | |
| 546 | |
| 547 //step 6:Apply toASCII | |
| 548 b3Len = idnaref_toASCII(b2,b2Len,b3,b3Capacity,options,parseError, statu
s); | |
| 549 | |
| 550 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
| 551 // redo processing of string | |
| 552 /* we do not have enough room so grow the buffer*/ | |
| 553 b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR); | |
| 554 if(b3==NULL){ | |
| 555 *status = U_MEMORY_ALLOCATION_ERROR; | |
| 556 goto CLEANUP; | |
| 557 } | |
| 558 | |
| 559 *status = U_ZERO_ERROR; // reset error | |
| 560 | |
| 561 b3Len = idnaref_toASCII(b2,b2Len,b3,b3Len, options, parseError, sta
tus); | |
| 562 | |
| 563 } | |
| 564 //bail out on error | |
| 565 if(U_FAILURE(*status)){ | |
| 566 goto CLEANUP; | |
| 567 } | |
| 568 | |
| 569 //step 7: verify | |
| 570 if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){ | |
| 571 *status = U_IDNA_VERIFICATION_ERROR; | |
| 572 goto CLEANUP; | |
| 573 } | |
| 574 | |
| 575 //step 8: return output of step 5 | |
| 576 reqLength = b2Len; | |
| 577 if(b2Len <= destCapacity) { | |
| 578 uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR); | |
| 579 } | |
| 580 }else{ | |
| 581 // verify that STD3 ASCII rules are satisfied | |
| 582 if(useSTD3ASCIIRules == TRUE){ | |
| 583 if( srcIsLDH == FALSE /* source contains some non-LDH characters */ | |
| 584 || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){ | |
| 585 *status = U_IDNA_STD3_ASCII_RULES_ERROR; | |
| 586 | |
| 587 /* populate the parseError struct */ | |
| 588 if(srcIsLDH==FALSE){ | |
| 589 // failPos is always set the index of failure | |
| 590 uprv_syntaxError(src,failPos, srcLength,parseError); | |
| 591 }else if(src[0] == HYPHEN){ | |
| 592 // fail position is 0 | |
| 593 uprv_syntaxError(src,0,srcLength,parseError); | |
| 594 }else{ | |
| 595 // the last index in the source is always length-1 | |
| 596 uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLengt
h, srcLength,parseError); | |
| 597 } | |
| 598 | |
| 599 goto CLEANUP; | |
| 600 } | |
| 601 } | |
| 602 //copy the source to destination | |
| 603 if(srcLength <= destCapacity){ | |
| 604 uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); | |
| 605 } | |
| 606 reqLength = srcLength; | |
| 607 } | |
| 608 | |
| 609 CLEANUP: | |
| 610 | |
| 611 if(b1 != b1Stack){ | |
| 612 uprv_free(b1); | |
| 613 } | |
| 614 if(b2 != b2Stack){ | |
| 615 uprv_free(b2); | |
| 616 } | |
| 617 uprv_free(caseFlags); | |
| 618 | |
| 619 // The RFC states that | |
| 620 // <quote> | |
| 621 // ToUnicode never fails. If any step fails, then the original input | |
| 622 // is returned immediately in that step. | |
| 623 // </quote> | |
| 624 // So if any step fails lets copy source to destination | |
| 625 if(U_FAILURE(*status)){ | |
| 626 //copy the source to destination | |
| 627 if(dest && srcLength <= destCapacity){ | |
| 628 if(srcLength == -1) { | |
| 629 uprv_memmove(dest,src,u_strlen(src)* U_SIZEOF_UCHAR); | |
| 630 } else { | |
| 631 uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); | |
| 632 } | |
| 633 } | |
| 634 reqLength = srcLength; | |
| 635 *status = U_ZERO_ERROR; | |
| 636 } | |
| 637 return u_terminateUChars(dest, destCapacity, reqLength, status); | |
| 638 } | |
| 639 | |
| 640 | |
| 641 static int32_t | |
| 642 getNextSeparator(UChar *src,int32_t srcLength,NamePrepTransform* prep, | |
| 643 UChar **limit, | |
| 644 UBool *done, | |
| 645 UErrorCode *status){ | |
| 646 if(srcLength == -1){ | |
| 647 int32_t i; | |
| 648 for(i=0 ; ;i++){ | |
| 649 if(src[i] == 0){ | |
| 650 *limit = src + i; // point to null | |
| 651 *done = TRUE; | |
| 652 return i; | |
| 653 } | |
| 654 if(prep->isLabelSeparator(src[i],*status)){ | |
| 655 *limit = src + (i+1); // go past the delimiter | |
| 656 return i; | |
| 657 | |
| 658 } | |
| 659 } | |
| 660 }else{ | |
| 661 int32_t i; | |
| 662 for(i=0;i<srcLength;i++){ | |
| 663 if(prep->isLabelSeparator(src[i],*status)){ | |
| 664 *limit = src + (i+1); // go past the delimiter | |
| 665 return i; | |
| 666 } | |
| 667 } | |
| 668 // we have not found the delimiter | |
| 669 if(i==srcLength){ | |
| 670 *limit = src+srcLength; | |
| 671 *done = TRUE; | |
| 672 } | |
| 673 return i; | |
| 674 } | |
| 675 } | |
| 676 | |
| 677 U_CFUNC int32_t U_EXPORT2 | |
| 678 idnaref_IDNToASCII( const UChar* src, int32_t srcLength, | |
| 679 UChar* dest, int32_t destCapacity, | |
| 680 int32_t options, | |
| 681 UParseError* parseError, | |
| 682 UErrorCode* status){ | |
| 683 | |
| 684 if(status == NULL || U_FAILURE(*status)){ | |
| 685 return 0; | |
| 686 } | |
| 687 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCa
pacity > 0)){ | |
| 688 *status = U_ILLEGAL_ARGUMENT_ERROR; | |
| 689 return 0; | |
| 690 } | |
| 691 | |
| 692 int32_t reqLength = 0; | |
| 693 // UParseError parseError; | |
| 694 | |
| 695 NamePrepTransform* prep = TestIDNA::getInstance(*status); | |
| 696 | |
| 697 //initialize pointers to stack buffers | |
| 698 UChar b1Stack[MAX_LABEL_BUFFER_SIZE]; | |
| 699 UChar *b1 = b1Stack; | |
| 700 int32_t b1Len, labelLen; | |
| 701 UChar* delimiter = (UChar*)src; | |
| 702 UChar* labelStart = (UChar*)src; | |
| 703 int32_t remainingLen = srcLength; | |
| 704 int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE; | |
| 705 | |
| 706 //get the options | |
| 707 // UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) !=
0); | |
| 708 // UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0)
; | |
| 709 UBool done = FALSE; | |
| 710 | |
| 711 if(U_FAILURE(*status)){ | |
| 712 goto CLEANUP; | |
| 713 } | |
| 714 | |
| 715 | |
| 716 if(srcLength == -1){ | |
| 717 for(;;){ | |
| 718 | |
| 719 if(*delimiter == 0){ | |
| 720 break; | |
| 721 } | |
| 722 | |
| 723 labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done,
status); | |
| 724 b1Len = 0; | |
| 725 if(!(labelLen==0 && done)){// make sure this is not a root label sep
arator. | |
| 726 | |
| 727 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity, | |
| 728 options, parseError, status); | |
| 729 | |
| 730 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
| 731 // redo processing of string | |
| 732 /* we do not have enough room so grow the buffer*/ | |
| 733 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
| 734 if(b1==NULL){ | |
| 735 *status = U_MEMORY_ALLOCATION_ERROR; | |
| 736 goto CLEANUP; | |
| 737 } | |
| 738 | |
| 739 *status = U_ZERO_ERROR; // reset error | |
| 740 | |
| 741 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len, | |
| 742 options, parseError, status); | |
| 743 | |
| 744 } | |
| 745 } | |
| 746 | |
| 747 if(U_FAILURE(*status)){ | |
| 748 goto CLEANUP; | |
| 749 } | |
| 750 int32_t tempLen = (reqLength + b1Len ); | |
| 751 // copy to dest | |
| 752 if( tempLen< destCapacity){ | |
| 753 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); | |
| 754 } | |
| 755 | |
| 756 reqLength = tempLen; | |
| 757 | |
| 758 // add the label separator | |
| 759 if(done == FALSE){ | |
| 760 if(reqLength < destCapacity){ | |
| 761 dest[reqLength] = FULL_STOP; | |
| 762 } | |
| 763 reqLength++; | |
| 764 } | |
| 765 | |
| 766 labelStart = delimiter; | |
| 767 } | |
| 768 }else{ | |
| 769 for(;;){ | |
| 770 | |
| 771 if(delimiter == src+srcLength){ | |
| 772 break; | |
| 773 } | |
| 774 | |
| 775 labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimit
er, &done, status); | |
| 776 | |
| 777 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity, | |
| 778 options,parseError, status); | |
| 779 | |
| 780 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
| 781 // redo processing of string | |
| 782 /* we do not have enough room so grow the buffer*/ | |
| 783 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
| 784 if(b1==NULL){ | |
| 785 *status = U_MEMORY_ALLOCATION_ERROR; | |
| 786 goto CLEANUP; | |
| 787 } | |
| 788 | |
| 789 *status = U_ZERO_ERROR; // reset error | |
| 790 | |
| 791 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len, | |
| 792 options, parseError, status); | |
| 793 | |
| 794 } | |
| 795 | |
| 796 if(U_FAILURE(*status)){ | |
| 797 goto CLEANUP; | |
| 798 } | |
| 799 int32_t tempLen = (reqLength + b1Len ); | |
| 800 // copy to dest | |
| 801 if( tempLen< destCapacity){ | |
| 802 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); | |
| 803 } | |
| 804 | |
| 805 reqLength = tempLen; | |
| 806 | |
| 807 // add the label separator | |
| 808 if(done == FALSE){ | |
| 809 if(reqLength < destCapacity){ | |
| 810 dest[reqLength] = FULL_STOP; | |
| 811 } | |
| 812 reqLength++; | |
| 813 } | |
| 814 | |
| 815 labelStart = delimiter; | |
| 816 remainingLen = srcLength - (delimiter - src); | |
| 817 } | |
| 818 } | |
| 819 | |
| 820 | |
| 821 CLEANUP: | |
| 822 | |
| 823 if(b1 != b1Stack){ | |
| 824 uprv_free(b1); | |
| 825 } | |
| 826 | |
| 827 // delete prep; | |
| 828 | |
| 829 return u_terminateUChars(dest, destCapacity, reqLength, status); | |
| 830 } | |
| 831 | |
| 832 U_CFUNC int32_t U_EXPORT2 | |
| 833 idnaref_IDNToUnicode( const UChar* src, int32_t srcLength, | |
| 834 UChar* dest, int32_t destCapacity, | |
| 835 int32_t options, | |
| 836 UParseError* parseError, | |
| 837 UErrorCode* status){ | |
| 838 | |
| 839 if(status == NULL || U_FAILURE(*status)){ | |
| 840 return 0; | |
| 841 } | |
| 842 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCa
pacity > 0)){ | |
| 843 *status = U_ILLEGAL_ARGUMENT_ERROR; | |
| 844 return 0; | |
| 845 } | |
| 846 | |
| 847 int32_t reqLength = 0; | |
| 848 | |
| 849 UBool done = FALSE; | |
| 850 | |
| 851 NamePrepTransform* prep = TestIDNA::getInstance(*status); | |
| 852 | |
| 853 //initialize pointers to stack buffers | |
| 854 UChar b1Stack[MAX_LABEL_BUFFER_SIZE]; | |
| 855 UChar *b1 = b1Stack; | |
| 856 int32_t b1Len, labelLen; | |
| 857 UChar* delimiter = (UChar*)src; | |
| 858 UChar* labelStart = (UChar*)src; | |
| 859 int32_t remainingLen = srcLength; | |
| 860 int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE; | |
| 861 | |
| 862 //get the options | |
| 863 // UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) !=
0); | |
| 864 // UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0)
; | |
| 865 | |
| 866 if(U_FAILURE(*status)){ | |
| 867 goto CLEANUP; | |
| 868 } | |
| 869 | |
| 870 if(srcLength == -1){ | |
| 871 for(;;){ | |
| 872 | |
| 873 if(*delimiter == 0){ | |
| 874 break; | |
| 875 } | |
| 876 | |
| 877 labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done,
status); | |
| 878 | |
| 879 if(labelLen==0 && done==FALSE){ | |
| 880 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; | |
| 881 } | |
| 882 b1Len = idnaref_toUnicode(labelStart, labelLen, b1, b1Capacity, | |
| 883 options, parseError, status); | |
| 884 | |
| 885 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
| 886 // redo processing of string | |
| 887 /* we do not have enough room so grow the buffer*/ | |
| 888 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
| 889 if(b1==NULL){ | |
| 890 *status = U_MEMORY_ALLOCATION_ERROR; | |
| 891 goto CLEANUP; | |
| 892 } | |
| 893 | |
| 894 *status = U_ZERO_ERROR; // reset error | |
| 895 | |
| 896 b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len, | |
| 897 options, parseError, status); | |
| 898 | |
| 899 } | |
| 900 | |
| 901 if(U_FAILURE(*status)){ | |
| 902 goto CLEANUP; | |
| 903 } | |
| 904 int32_t tempLen = (reqLength + b1Len ); | |
| 905 // copy to dest | |
| 906 if( tempLen< destCapacity){ | |
| 907 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); | |
| 908 } | |
| 909 | |
| 910 reqLength = tempLen; | |
| 911 // add the label separator | |
| 912 if(done == FALSE){ | |
| 913 if(reqLength < destCapacity){ | |
| 914 dest[reqLength] = FULL_STOP; | |
| 915 } | |
| 916 reqLength++; | |
| 917 } | |
| 918 | |
| 919 labelStart = delimiter; | |
| 920 } | |
| 921 }else{ | |
| 922 for(;;){ | |
| 923 | |
| 924 if(delimiter == src+srcLength){ | |
| 925 break; | |
| 926 } | |
| 927 | |
| 928 labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimit
er, &done, status); | |
| 929 | |
| 930 if(labelLen==0 && done==FALSE){ | |
| 931 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; | |
| 932 } | |
| 933 | |
| 934 b1Len = idnaref_toUnicode( labelStart,labelLen, b1, b1Capacity, | |
| 935 options, parseError, status); | |
| 936 | |
| 937 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
| 938 // redo processing of string | |
| 939 /* we do not have enough room so grow the buffer*/ | |
| 940 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
| 941 if(b1==NULL){ | |
| 942 *status = U_MEMORY_ALLOCATION_ERROR; | |
| 943 goto CLEANUP; | |
| 944 } | |
| 945 | |
| 946 *status = U_ZERO_ERROR; // reset error | |
| 947 | |
| 948 b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len, | |
| 949 options, parseError, status); | |
| 950 | |
| 951 } | |
| 952 | |
| 953 if(U_FAILURE(*status)){ | |
| 954 goto CLEANUP; | |
| 955 } | |
| 956 int32_t tempLen = (reqLength + b1Len ); | |
| 957 // copy to dest | |
| 958 if( tempLen< destCapacity){ | |
| 959 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); | |
| 960 } | |
| 961 | |
| 962 reqLength = tempLen; | |
| 963 | |
| 964 // add the label separator | |
| 965 if(done == FALSE){ | |
| 966 if(reqLength < destCapacity){ | |
| 967 dest[reqLength] = FULL_STOP; | |
| 968 } | |
| 969 reqLength++; | |
| 970 } | |
| 971 | |
| 972 labelStart = delimiter; | |
| 973 remainingLen = srcLength - (delimiter - src); | |
| 974 } | |
| 975 } | |
| 976 | |
| 977 CLEANUP: | |
| 978 | |
| 979 if(b1 != b1Stack){ | |
| 980 uprv_free(b1); | |
| 981 } | |
| 982 | |
| 983 // delete prep; | |
| 984 | |
| 985 return u_terminateUChars(dest, destCapacity, reqLength, status); | |
| 986 } | |
| 987 | |
| 988 U_CFUNC int32_t U_EXPORT2 | |
| 989 idnaref_compare( const UChar *s1, int32_t length1, | |
| 990 const UChar *s2, int32_t length2, | |
| 991 int32_t options, | |
| 992 UErrorCode* status){ | |
| 993 | |
| 994 if(status == NULL || U_FAILURE(*status)){ | |
| 995 return -1; | |
| 996 } | |
| 997 | |
| 998 UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE]; | |
| 999 UChar *b1 = b1Stack, *b2 = b2Stack; | |
| 1000 int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN
_BUFFER_SIZE; | |
| 1001 int32_t result = -1; | |
| 1002 | |
| 1003 UParseError parseError; | |
| 1004 | |
| 1005 b1Len = idnaref_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError
, status); | |
| 1006 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
| 1007 // redo processing of string | |
| 1008 /* we do not have enough room so grow the buffer*/ | |
| 1009 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
| 1010 if(b1==NULL){ | |
| 1011 *status = U_MEMORY_ALLOCATION_ERROR; | |
| 1012 goto CLEANUP; | |
| 1013 } | |
| 1014 | |
| 1015 *status = U_ZERO_ERROR; // reset error | |
| 1016 | |
| 1017 b1Len = idnaref_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, st
atus); | |
| 1018 | |
| 1019 } | |
| 1020 | |
| 1021 b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Capacity,options, &parseError, st
atus); | |
| 1022 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
| 1023 // redo processing of string | |
| 1024 /* we do not have enough room so grow the buffer*/ | |
| 1025 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
| 1026 if(b2==NULL){ | |
| 1027 *status = U_MEMORY_ALLOCATION_ERROR; | |
| 1028 goto CLEANUP; | |
| 1029 } | |
| 1030 | |
| 1031 *status = U_ZERO_ERROR; // reset error | |
| 1032 | |
| 1033 b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Len,options, &parseError, sta
tus); | |
| 1034 | |
| 1035 } | |
| 1036 // when toASCII is applied all label separators are replaced with FULL_STOP | |
| 1037 result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len); | |
| 1038 | |
| 1039 CLEANUP: | |
| 1040 if(b1 != b1Stack){ | |
| 1041 uprv_free(b1); | |
| 1042 } | |
| 1043 | |
| 1044 if(b2 != b2Stack){ | |
| 1045 uprv_free(b2); | |
| 1046 } | |
| 1047 | |
| 1048 return result; | |
| 1049 } | |
| 1050 #endif /* #if !UCONFIG_NO_IDNA */ | |
| OLD | NEW |