OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * |
| 4 * Copyright (C) 2003-2007, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 ******************************************************************************* |
| 8 * file name: idnaref.cpp |
| 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) |
| 11 * indentation:4 |
| 12 * |
| 13 * created on: 2003feb1 |
| 14 * created by: Ram Viswanadha |
| 15 */ |
| 16 |
| 17 #include "unicode/utypes.h" |
| 18 |
| 19 #if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION |
| 20 #include "idnaref.h" |
| 21 #include "punyref.h" |
| 22 #include "ustr_imp.h" |
| 23 #include "cmemory.h" |
| 24 #include "sprpimpl.h" |
| 25 #include "nptrans.h" |
| 26 #include "testidna.h" |
| 27 #include "punycode.h" |
| 28 #include "unicode/ustring.h" |
| 29 |
| 30 /* it is official IDNA ACE Prefix is "xn--" */ |
| 31 static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ; |
| 32 #define ACE_PREFIX_LENGTH 4 |
| 33 |
| 34 #define MAX_LABEL_LENGTH 63 |
| 35 #define HYPHEN 0x002D |
| 36 /* The Max length of the labels should not be more than 64 */ |
| 37 #define MAX_LABEL_BUFFER_SIZE 100 |
| 38 #define MAX_IDN_BUFFER_SIZE 300 |
| 39 |
| 40 #define CAPITAL_A 0x0041 |
| 41 #define CAPITAL_Z 0x005A |
| 42 #define LOWER_CASE_DELTA 0x0020 |
| 43 #define FULL_STOP 0x002E |
| 44 |
| 45 |
| 46 inline static UBool |
| 47 startsWithPrefix(const UChar* src , int32_t srcLength){ |
| 48 UBool startsWithPrefix = TRUE; |
| 49 |
| 50 if(srcLength < ACE_PREFIX_LENGTH){ |
| 51 return FALSE; |
| 52 } |
| 53 |
| 54 for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){ |
| 55 if(u_tolower(src[i]) != ACE_PREFIX[i]){ |
| 56 startsWithPrefix = FALSE; |
| 57 } |
| 58 } |
| 59 return startsWithPrefix; |
| 60 } |
| 61 |
| 62 inline static UChar |
| 63 toASCIILower(UChar ch){ |
| 64 if(CAPITAL_A <= ch && ch <= CAPITAL_Z){ |
| 65 return ch + LOWER_CASE_DELTA; |
| 66 } |
| 67 return ch; |
| 68 } |
| 69 |
| 70 inline static int32_t |
| 71 compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len, |
| 72 const UChar* s2, int32_t s2Len){ |
| 73 if(s1Len != s2Len){ |
| 74 return (s1Len > s2Len) ? s1Len : s2Len; |
| 75 } |
| 76 UChar c1,c2; |
| 77 int32_t rc; |
| 78 |
| 79 for(int32_t i =0;/* no condition */;i++) { |
| 80 /* If we reach the ends of both strings then they match */ |
| 81 if(i == s1Len) { |
| 82 return 0; |
| 83 } |
| 84 |
| 85 c1 = s1[i]; |
| 86 c2 = s2[i]; |
| 87 |
| 88 /* Case-insensitive comparison */ |
| 89 if(c1!=c2) { |
| 90 rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2); |
| 91 if(rc!=0) { |
| 92 return rc; |
| 93 } |
| 94 } |
| 95 } |
| 96 |
| 97 } |
| 98 |
| 99 static UErrorCode getError(enum punycode_status status){ |
| 100 switch(status){ |
| 101 case punycode_success: |
| 102 return U_ZERO_ERROR; |
| 103 case punycode_bad_input: /* Input is invalid. */ |
| 104 return U_INVALID_CHAR_FOUND; |
| 105 case punycode_big_output: /* Output would exceed the space provided. */ |
| 106 return U_BUFFER_OVERFLOW_ERROR; |
| 107 case punycode_overflow : /* Input requires wider integers to process. */ |
| 108 return U_INDEX_OUTOFBOUNDS_ERROR; |
| 109 default: |
| 110 return U_INTERNAL_PROGRAM_ERROR; |
| 111 } |
| 112 } |
| 113 |
| 114 static inline int32_t convertASCIIToUChars(const char* src,UChar* dest, int32_t
length){ |
| 115 int i; |
| 116 for(i=0;i<length;i++){ |
| 117 dest[i] = src[i]; |
| 118 } |
| 119 return i; |
| 120 } |
| 121 static inline int32_t convertUCharsToASCII(const UChar* src,char* dest, int32_t
length){ |
| 122 int i; |
| 123 for(i=0;i<length;i++){ |
| 124 dest[i] = (char)src[i]; |
| 125 } |
| 126 return i; |
| 127 } |
| 128 // wrapper around the reference Punycode implementation |
| 129 static int32_t convertToPuny(const UChar* src, int32_t srcLength, |
| 130 UChar* dest, int32_t destCapacity, |
| 131 UErrorCode& status){ |
| 132 uint32_t b1Stack[MAX_LABEL_BUFFER_SIZE]; |
| 133 int32_t b1Len = 0, b1Capacity = MAX_LABEL_BUFFER_SIZE; |
| 134 uint32_t* b1 = b1Stack; |
| 135 char b2Stack[MAX_LABEL_BUFFER_SIZE]; |
| 136 char* b2 = b2Stack; |
| 137 int32_t b2Len =MAX_LABEL_BUFFER_SIZE ; |
| 138 punycode_status error; |
| 139 unsigned char* caseFlags = NULL; |
| 140 |
| 141 u_strToUTF32((UChar32*)b1,b1Capacity,&b1Len,src,srcLength,&status); |
| 142 if(status == U_BUFFER_OVERFLOW_ERROR){ |
| 143 // redo processing of string |
| 144 /* we do not have enough room so grow the buffer*/ |
| 145 b1 = (uint32_t*) uprv_malloc(b1Len * sizeof(uint32_t)); |
| 146 if(b1==NULL){ |
| 147 status = U_MEMORY_ALLOCATION_ERROR; |
| 148 goto CLEANUP; |
| 149 } |
| 150 |
| 151 status = U_ZERO_ERROR; // reset error |
| 152 |
| 153 u_strToUTF32((UChar32*)b1,b1Len,&b1Len,src,srcLength,&status); |
| 154 } |
| 155 if(U_FAILURE(status)){ |
| 156 goto CLEANUP; |
| 157 } |
| 158 |
| 159 //caseFlags = (unsigned char*) uprv_malloc(b1Len *sizeof(unsigned char)); |
| 160 |
| 161 error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2); |
| 162 status = getError(error); |
| 163 |
| 164 if(status == U_BUFFER_OVERFLOW_ERROR){ |
| 165 /* we do not have enough room so grow the buffer*/ |
| 166 b2 = (char*) uprv_malloc( b2Len * sizeof(char)); |
| 167 if(b2==NULL){ |
| 168 status = U_MEMORY_ALLOCATION_ERROR; |
| 169 goto CLEANUP; |
| 170 } |
| 171 |
| 172 status = U_ZERO_ERROR; // reset error |
| 173 |
| 174 punycode_status error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&
b2Len, b2); |
| 175 status = getError(error); |
| 176 } |
| 177 if(U_FAILURE(status)){ |
| 178 goto CLEANUP; |
| 179 } |
| 180 |
| 181 if(b2Len < destCapacity){ |
| 182 convertASCIIToUChars(b2,dest,b2Len); |
| 183 }else{ |
| 184 status =U_BUFFER_OVERFLOW_ERROR; |
| 185 } |
| 186 |
| 187 CLEANUP: |
| 188 if(b1Stack != b1){ |
| 189 uprv_free(b1); |
| 190 } |
| 191 if(b2Stack != b2){ |
| 192 uprv_free(b2); |
| 193 } |
| 194 uprv_free(caseFlags); |
| 195 |
| 196 return b2Len; |
| 197 } |
| 198 |
| 199 static int32_t convertFromPuny( const UChar* src, int32_t srcLength, |
| 200 UChar* dest, int32_t destCapacity, |
| 201 UErrorCode& status){ |
| 202 char b1Stack[MAX_LABEL_BUFFER_SIZE]; |
| 203 char* b1 = b1Stack; |
| 204 int32_t destLen =0; |
| 205 |
| 206 convertUCharsToASCII(src, b1,srcLength); |
| 207 |
| 208 uint32_t b2Stack[MAX_LABEL_BUFFER_SIZE]; |
| 209 uint32_t* b2 = b2Stack; |
| 210 int32_t b2Len =MAX_LABEL_BUFFER_SIZE; |
| 211 unsigned char* caseFlags = NULL; //(unsigned char*) uprv_malloc(srcLength *
sizeof(unsigned char*)); |
| 212 punycode_status error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,ca
seFlags); |
| 213 status = getError(error); |
| 214 if(status == U_BUFFER_OVERFLOW_ERROR){ |
| 215 b2 = (uint32_t*) uprv_malloc(b2Len * sizeof(uint32_t)); |
| 216 if(b2 == NULL){ |
| 217 status = U_MEMORY_ALLOCATION_ERROR; |
| 218 goto CLEANUP; |
| 219 } |
| 220 error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags); |
| 221 status = getError(error); |
| 222 } |
| 223 |
| 224 if(U_FAILURE(status)){ |
| 225 goto CLEANUP; |
| 226 } |
| 227 |
| 228 u_strFromUTF32(dest,destCapacity,&destLen,(UChar32*)b2,b2Len,&status); |
| 229 |
| 230 CLEANUP: |
| 231 if(b1Stack != b1){ |
| 232 uprv_free(b1); |
| 233 } |
| 234 if(b2Stack != b2){ |
| 235 uprv_free(b2); |
| 236 } |
| 237 uprv_free(caseFlags); |
| 238 |
| 239 return destLen; |
| 240 } |
| 241 |
| 242 |
| 243 U_CFUNC int32_t U_EXPORT2 |
| 244 idnaref_toASCII(const UChar* src, int32_t srcLength, |
| 245 UChar* dest, int32_t destCapacity, |
| 246 int32_t options, |
| 247 UParseError* parseError, |
| 248 UErrorCode* status){ |
| 249 |
| 250 if(status == NULL || U_FAILURE(*status)){ |
| 251 return 0; |
| 252 } |
| 253 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCa
pacity > 0)){ |
| 254 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 255 return 0; |
| 256 } |
| 257 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE]; |
| 258 //initialize pointers to stack buffers |
| 259 UChar *b1 = b1Stack, *b2 = b2Stack; |
| 260 int32_t b1Len=0, b2Len=0, |
| 261 b1Capacity = MAX_LABEL_BUFFER_SIZE, |
| 262 b2Capacity = MAX_LABEL_BUFFER_SIZE , |
| 263 reqLength=0; |
| 264 |
| 265 //get the options |
| 266 UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0)
; |
| 267 UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); |
| 268 |
| 269 UBool* caseFlags = NULL; |
| 270 |
| 271 // assume the source contains all ascii codepoints |
| 272 UBool srcIsASCII = TRUE; |
| 273 // assume the source contains all LDH codepoints |
| 274 UBool srcIsLDH = TRUE; |
| 275 int32_t j=0; |
| 276 |
| 277 if(srcLength == -1){ |
| 278 srcLength = u_strlen(src); |
| 279 } |
| 280 |
| 281 // step 1 |
| 282 for( j=0;j<srcLength;j++){ |
| 283 if(src[j] > 0x7F){ |
| 284 srcIsASCII = FALSE; |
| 285 } |
| 286 b1[b1Len++] = src[j]; |
| 287 } |
| 288 // step 2 |
| 289 NamePrepTransform* prep = TestIDNA::getInstance(*status); |
| 290 |
| 291 if(U_FAILURE(*status)){ |
| 292 goto CLEANUP; |
| 293 } |
| 294 |
| 295 b1Len = prep->process(src,srcLength,b1, b1Capacity,allowUnassigned,parseErro
r,*status); |
| 296 |
| 297 if(*status == U_BUFFER_OVERFLOW_ERROR){ |
| 298 // redo processing of string |
| 299 /* we do not have enough room so grow the buffer*/ |
| 300 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); |
| 301 if(b1==NULL){ |
| 302 *status = U_MEMORY_ALLOCATION_ERROR; |
| 303 goto CLEANUP; |
| 304 } |
| 305 |
| 306 *status = U_ZERO_ERROR; // reset error |
| 307 |
| 308 b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseErro
r, *status); |
| 309 } |
| 310 // error bail out |
| 311 if(U_FAILURE(*status)){ |
| 312 goto CLEANUP; |
| 313 } |
| 314 |
| 315 if(b1Len == 0){ |
| 316 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; |
| 317 goto CLEANUP; |
| 318 } |
| 319 |
| 320 srcIsASCII = TRUE; |
| 321 // step 3 & 4 |
| 322 for( j=0;j<b1Len;j++){ |
| 323 if(b1[j] > 0x7F){// check if output of usprep_prepare is all ASCII |
| 324 srcIsASCII = FALSE; |
| 325 }else if(prep->isLDHChar(b1[j])==FALSE){ // if the char is in ASCII ran
ge verify that it is an LDH character{ |
| 326 srcIsLDH = FALSE; |
| 327 } |
| 328 } |
| 329 |
| 330 if(useSTD3ASCIIRules == TRUE){ |
| 331 // verify 3a and 3b |
| 332 if( srcIsLDH == FALSE /* source contains some non-LDH characters */ |
| 333 || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){ |
| 334 *status = U_IDNA_STD3_ASCII_RULES_ERROR; |
| 335 goto CLEANUP; |
| 336 } |
| 337 } |
| 338 if(srcIsASCII){ |
| 339 if(b1Len <= destCapacity){ |
| 340 uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR); |
| 341 reqLength = b1Len; |
| 342 }else{ |
| 343 reqLength = b1Len; |
| 344 goto CLEANUP; |
| 345 } |
| 346 }else{ |
| 347 // step 5 : verify the sequence does not begin with ACE prefix |
| 348 if(!startsWithPrefix(b1,b1Len)){ |
| 349 |
| 350 //step 6: encode the sequence with punycode |
| 351 //caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool)); |
| 352 |
| 353 b2Len = convertToPuny(b1,b1Len, b2,b2Capacity,*status); |
| 354 //b2Len = u_strToPunycode(b2,b2Capacity,b1,b1Len, caseFlags, status)
; |
| 355 if(*status == U_BUFFER_OVERFLOW_ERROR){ |
| 356 // redo processing of string |
| 357 /* we do not have enough room so grow the buffer*/ |
| 358 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); |
| 359 if(b2 == NULL){ |
| 360 *status = U_MEMORY_ALLOCATION_ERROR; |
| 361 goto CLEANUP; |
| 362 } |
| 363 |
| 364 *status = U_ZERO_ERROR; // reset error |
| 365 |
| 366 b2Len = convertToPuny(b1, b1Len, b2, b2Len, *status); |
| 367 //b2Len = u_strToPunycode(b2,b2Len,b1,b1Len, caseFlags, status); |
| 368 |
| 369 } |
| 370 //error bail out |
| 371 if(U_FAILURE(*status)){ |
| 372 goto CLEANUP; |
| 373 } |
| 374 reqLength = b2Len+ACE_PREFIX_LENGTH; |
| 375 |
| 376 if(reqLength > destCapacity){ |
| 377 *status = U_BUFFER_OVERFLOW_ERROR; |
| 378 goto CLEANUP; |
| 379 } |
| 380 //Step 7: prepend the ACE prefix |
| 381 uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR); |
| 382 //Step 6: copy the contents in b2 into dest |
| 383 uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR); |
| 384 |
| 385 }else{ |
| 386 *status = U_IDNA_ACE_PREFIX_ERROR; |
| 387 goto CLEANUP; |
| 388 } |
| 389 } |
| 390 |
| 391 if(reqLength > MAX_LABEL_LENGTH){ |
| 392 *status = U_IDNA_LABEL_TOO_LONG_ERROR; |
| 393 } |
| 394 |
| 395 CLEANUP: |
| 396 if(b1 != b1Stack){ |
| 397 uprv_free(b1); |
| 398 } |
| 399 if(b2 != b2Stack){ |
| 400 uprv_free(b2); |
| 401 } |
| 402 uprv_free(caseFlags); |
| 403 |
| 404 // delete prep; |
| 405 |
| 406 return u_terminateUChars(dest, destCapacity, reqLength, status); |
| 407 } |
| 408 |
| 409 |
| 410 U_CFUNC int32_t U_EXPORT2 |
| 411 idnaref_toUnicode(const UChar* src, int32_t srcLength, |
| 412 UChar* dest, int32_t destCapacity, |
| 413 int32_t options, |
| 414 UParseError* parseError, |
| 415 UErrorCode* status){ |
| 416 |
| 417 if(status == NULL || U_FAILURE(*status)){ |
| 418 return 0; |
| 419 } |
| 420 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCa
pacity > 0)){ |
| 421 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 422 return 0; |
| 423 } |
| 424 |
| 425 |
| 426 |
| 427 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stac
k[MAX_LABEL_BUFFER_SIZE]; |
| 428 |
| 429 //initialize pointers to stack buffers |
| 430 UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack; |
| 431 int32_t b1Len, b2Len, b1PrimeLen, b3Len, |
| 432 b1Capacity = MAX_LABEL_BUFFER_SIZE, |
| 433 b2Capacity = MAX_LABEL_BUFFER_SIZE, |
| 434 b3Capacity = MAX_LABEL_BUFFER_SIZE, |
| 435 reqLength=0; |
| 436 // UParseError parseError; |
| 437 |
| 438 NamePrepTransform* prep = TestIDNA::getInstance(*status); |
| 439 b1Len = 0; |
| 440 UBool* caseFlags = NULL; |
| 441 |
| 442 //get the options |
| 443 UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0)
; |
| 444 UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); |
| 445 |
| 446 UBool srcIsASCII = TRUE; |
| 447 UBool srcIsLDH = TRUE; |
| 448 int32_t failPos =0; |
| 449 |
| 450 if(U_FAILURE(*status)){ |
| 451 goto CLEANUP; |
| 452 } |
| 453 // step 1: find out if all the codepoints in src are ASCII |
| 454 if(srcLength==-1){ |
| 455 srcLength = 0; |
| 456 for(;src[srcLength]!=0;){ |
| 457 if(src[srcLength]> 0x7f){ |
| 458 srcIsASCII = FALSE; |
| 459 }if(prep->isLDHChar(src[srcLength])==FALSE){ |
| 460 // here we do not assemble surrogates |
| 461 // since we know that LDH code points |
| 462 // are in the ASCII range only |
| 463 srcIsLDH = FALSE; |
| 464 failPos = srcLength; |
| 465 } |
| 466 srcLength++; |
| 467 } |
| 468 }else{ |
| 469 for(int32_t j=0; j<srcLength; j++){ |
| 470 if(src[j]> 0x7f){ |
| 471 srcIsASCII = FALSE; |
| 472 }else if(prep->isLDHChar(src[j])==FALSE){ |
| 473 // here we do not assemble surrogates |
| 474 // since we know that LDH code points |
| 475 // are in the ASCII range only |
| 476 srcIsLDH = FALSE; |
| 477 failPos = j; |
| 478 } |
| 479 } |
| 480 } |
| 481 |
| 482 if(srcIsASCII == FALSE){ |
| 483 // step 2: process the string |
| 484 b1Len = prep->process(src,srcLength,b1,b1Capacity,allowUnassigned, parse
Error, *status); |
| 485 if(*status == U_BUFFER_OVERFLOW_ERROR){ |
| 486 // redo processing of string |
| 487 /* we do not have enough room so grow the buffer*/ |
| 488 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); |
| 489 if(b1==NULL){ |
| 490 *status = U_MEMORY_ALLOCATION_ERROR; |
| 491 goto CLEANUP; |
| 492 } |
| 493 |
| 494 *status = U_ZERO_ERROR; // reset error |
| 495 |
| 496 b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parse
Error, *status); |
| 497 } |
| 498 //bail out on error |
| 499 if(U_FAILURE(*status)){ |
| 500 goto CLEANUP; |
| 501 } |
| 502 }else{ |
| 503 |
| 504 // copy everything to b1 |
| 505 if(srcLength < b1Capacity){ |
| 506 uprv_memmove(b1,src, srcLength * U_SIZEOF_UCHAR); |
| 507 }else{ |
| 508 /* we do not have enough room so grow the buffer*/ |
| 509 b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR); |
| 510 if(b1==NULL){ |
| 511 *status = U_MEMORY_ALLOCATION_ERROR; |
| 512 goto CLEANUP; |
| 513 } |
| 514 uprv_memmove(b1,src, srcLength * U_SIZEOF_UCHAR); |
| 515 } |
| 516 b1Len = srcLength; |
| 517 } |
| 518 //step 3: verify ACE Prefix |
| 519 if(startsWithPrefix(src,srcLength)){ |
| 520 |
| 521 //step 4: Remove the ACE Prefix |
| 522 b1Prime = b1 + ACE_PREFIX_LENGTH; |
| 523 b1PrimeLen = b1Len - ACE_PREFIX_LENGTH; |
| 524 |
| 525 //step 5: Decode using punycode |
| 526 b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Capacity, *status); |
| 527 //b2Len = u_strFromPunycode(b2, b2Capacity,b1Prime,b1PrimeLen, caseFlags
, status); |
| 528 |
| 529 if(*status == U_BUFFER_OVERFLOW_ERROR){ |
| 530 // redo processing of string |
| 531 /* we do not have enough room so grow the buffer*/ |
| 532 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); |
| 533 if(b2==NULL){ |
| 534 *status = U_MEMORY_ALLOCATION_ERROR; |
| 535 goto CLEANUP; |
| 536 } |
| 537 |
| 538 *status = U_ZERO_ERROR; // reset error |
| 539 |
| 540 b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Len, *status); |
| 541 //b2Len = u_strFromPunycode(b2, b2Len,b1Prime,b1PrimeLen,caseFlags,
status); |
| 542 } |
| 543 |
| 544 |
| 545 //step 6:Apply toASCII |
| 546 b3Len = idnaref_toASCII(b2,b2Len,b3,b3Capacity,options,parseError, statu
s); |
| 547 |
| 548 if(*status == U_BUFFER_OVERFLOW_ERROR){ |
| 549 // redo processing of string |
| 550 /* we do not have enough room so grow the buffer*/ |
| 551 b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR); |
| 552 if(b3==NULL){ |
| 553 *status = U_MEMORY_ALLOCATION_ERROR; |
| 554 goto CLEANUP; |
| 555 } |
| 556 |
| 557 *status = U_ZERO_ERROR; // reset error |
| 558 |
| 559 b3Len = idnaref_toASCII(b2,b2Len,b3,b3Len, options, parseError, sta
tus); |
| 560 |
| 561 } |
| 562 //bail out on error |
| 563 if(U_FAILURE(*status)){ |
| 564 goto CLEANUP; |
| 565 } |
| 566 |
| 567 //step 7: verify |
| 568 if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){ |
| 569 *status = U_IDNA_VERIFICATION_ERROR; |
| 570 goto CLEANUP; |
| 571 } |
| 572 |
| 573 //step 8: return output of step 5 |
| 574 reqLength = b2Len; |
| 575 if(b2Len <= destCapacity) { |
| 576 uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR); |
| 577 } |
| 578 }else{ |
| 579 // verify that STD3 ASCII rules are satisfied |
| 580 if(useSTD3ASCIIRules == TRUE){ |
| 581 if( srcIsLDH == FALSE /* source contains some non-LDH characters */ |
| 582 || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){ |
| 583 *status = U_IDNA_STD3_ASCII_RULES_ERROR; |
| 584 |
| 585 /* populate the parseError struct */ |
| 586 if(srcIsLDH==FALSE){ |
| 587 // failPos is always set the index of failure |
| 588 uprv_syntaxError(src,failPos, srcLength,parseError); |
| 589 }else if(src[0] == HYPHEN){ |
| 590 // fail position is 0 |
| 591 uprv_syntaxError(src,0,srcLength,parseError); |
| 592 }else{ |
| 593 // the last index in the source is always length-1 |
| 594 uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLengt
h, srcLength,parseError); |
| 595 } |
| 596 |
| 597 goto CLEANUP; |
| 598 } |
| 599 } |
| 600 //copy the source to destination |
| 601 if(srcLength <= destCapacity){ |
| 602 uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); |
| 603 } |
| 604 reqLength = srcLength; |
| 605 } |
| 606 |
| 607 CLEANUP: |
| 608 |
| 609 if(b1 != b1Stack){ |
| 610 uprv_free(b1); |
| 611 } |
| 612 if(b2 != b2Stack){ |
| 613 uprv_free(b2); |
| 614 } |
| 615 uprv_free(caseFlags); |
| 616 |
| 617 // The RFC states that |
| 618 // <quote> |
| 619 // ToUnicode never fails. If any step fails, then the original input |
| 620 // is returned immediately in that step. |
| 621 // </quote> |
| 622 // So if any step fails lets copy source to destination |
| 623 if(U_FAILURE(*status)){ |
| 624 //copy the source to destination |
| 625 if(dest && srcLength <= destCapacity){ |
| 626 if(srcLength == -1) { |
| 627 uprv_memmove(dest,src,u_strlen(src)* U_SIZEOF_UCHAR); |
| 628 } else { |
| 629 uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); |
| 630 } |
| 631 } |
| 632 reqLength = srcLength; |
| 633 *status = U_ZERO_ERROR; |
| 634 } |
| 635 return u_terminateUChars(dest, destCapacity, reqLength, status); |
| 636 } |
| 637 |
| 638 |
| 639 static int32_t |
| 640 getNextSeparator(UChar *src,int32_t srcLength,NamePrepTransform* prep, |
| 641 UChar **limit, |
| 642 UBool *done, |
| 643 UErrorCode *status){ |
| 644 if(srcLength == -1){ |
| 645 int32_t i; |
| 646 for(i=0 ; ;i++){ |
| 647 if(src[i] == 0){ |
| 648 *limit = src + i; // point to null |
| 649 *done = TRUE; |
| 650 return i; |
| 651 } |
| 652 if(prep->isLabelSeparator(src[i],*status)){ |
| 653 *limit = src + (i+1); // go past the delimiter |
| 654 return i; |
| 655 |
| 656 } |
| 657 } |
| 658 }else{ |
| 659 int32_t i; |
| 660 for(i=0;i<srcLength;i++){ |
| 661 if(prep->isLabelSeparator(src[i],*status)){ |
| 662 *limit = src + (i+1); // go past the delimiter |
| 663 return i; |
| 664 } |
| 665 } |
| 666 // we have not found the delimiter |
| 667 if(i==srcLength){ |
| 668 *limit = src+srcLength; |
| 669 *done = TRUE; |
| 670 } |
| 671 return i; |
| 672 } |
| 673 } |
| 674 |
| 675 U_CFUNC int32_t U_EXPORT2 |
| 676 idnaref_IDNToASCII( const UChar* src, int32_t srcLength, |
| 677 UChar* dest, int32_t destCapacity, |
| 678 int32_t options, |
| 679 UParseError* parseError, |
| 680 UErrorCode* status){ |
| 681 |
| 682 if(status == NULL || U_FAILURE(*status)){ |
| 683 return 0; |
| 684 } |
| 685 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCa
pacity > 0)){ |
| 686 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 687 return 0; |
| 688 } |
| 689 |
| 690 int32_t reqLength = 0; |
| 691 // UParseError parseError; |
| 692 |
| 693 NamePrepTransform* prep = TestIDNA::getInstance(*status); |
| 694 |
| 695 //initialize pointers to stack buffers |
| 696 UChar b1Stack[MAX_LABEL_BUFFER_SIZE]; |
| 697 UChar *b1 = b1Stack; |
| 698 int32_t b1Len, labelLen; |
| 699 UChar* delimiter = (UChar*)src; |
| 700 UChar* labelStart = (UChar*)src; |
| 701 int32_t remainingLen = srcLength; |
| 702 int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE; |
| 703 |
| 704 //get the options |
| 705 // UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) !=
0); |
| 706 // UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0)
; |
| 707 UBool done = FALSE; |
| 708 |
| 709 if(U_FAILURE(*status)){ |
| 710 goto CLEANUP; |
| 711 } |
| 712 |
| 713 |
| 714 if(srcLength == -1){ |
| 715 for(;;){ |
| 716 |
| 717 if(*delimiter == 0){ |
| 718 break; |
| 719 } |
| 720 |
| 721 labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done,
status); |
| 722 b1Len = 0; |
| 723 if(!(labelLen==0 && done)){// make sure this is not a root label sep
arator. |
| 724 |
| 725 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity, |
| 726 options, parseError, status); |
| 727 |
| 728 if(*status == U_BUFFER_OVERFLOW_ERROR){ |
| 729 // redo processing of string |
| 730 /* we do not have enough room so grow the buffer*/ |
| 731 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); |
| 732 if(b1==NULL){ |
| 733 *status = U_MEMORY_ALLOCATION_ERROR; |
| 734 goto CLEANUP; |
| 735 } |
| 736 |
| 737 *status = U_ZERO_ERROR; // reset error |
| 738 |
| 739 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len, |
| 740 options, parseError, status); |
| 741 |
| 742 } |
| 743 } |
| 744 |
| 745 if(U_FAILURE(*status)){ |
| 746 goto CLEANUP; |
| 747 } |
| 748 int32_t tempLen = (reqLength + b1Len ); |
| 749 // copy to dest |
| 750 if( tempLen< destCapacity){ |
| 751 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); |
| 752 } |
| 753 |
| 754 reqLength = tempLen; |
| 755 |
| 756 // add the label separator |
| 757 if(done == FALSE){ |
| 758 if(reqLength < destCapacity){ |
| 759 dest[reqLength] = FULL_STOP; |
| 760 } |
| 761 reqLength++; |
| 762 } |
| 763 |
| 764 labelStart = delimiter; |
| 765 } |
| 766 }else{ |
| 767 for(;;){ |
| 768 |
| 769 if(delimiter == src+srcLength){ |
| 770 break; |
| 771 } |
| 772 |
| 773 labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimit
er, &done, status); |
| 774 |
| 775 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity, |
| 776 options,parseError, status); |
| 777 |
| 778 if(*status == U_BUFFER_OVERFLOW_ERROR){ |
| 779 // redo processing of string |
| 780 /* we do not have enough room so grow the buffer*/ |
| 781 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); |
| 782 if(b1==NULL){ |
| 783 *status = U_MEMORY_ALLOCATION_ERROR; |
| 784 goto CLEANUP; |
| 785 } |
| 786 |
| 787 *status = U_ZERO_ERROR; // reset error |
| 788 |
| 789 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len, |
| 790 options, parseError, status); |
| 791 |
| 792 } |
| 793 |
| 794 if(U_FAILURE(*status)){ |
| 795 goto CLEANUP; |
| 796 } |
| 797 int32_t tempLen = (reqLength + b1Len ); |
| 798 // copy to dest |
| 799 if( tempLen< destCapacity){ |
| 800 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); |
| 801 } |
| 802 |
| 803 reqLength = tempLen; |
| 804 |
| 805 // add the label separator |
| 806 if(done == FALSE){ |
| 807 if(reqLength < destCapacity){ |
| 808 dest[reqLength] = FULL_STOP; |
| 809 } |
| 810 reqLength++; |
| 811 } |
| 812 |
| 813 labelStart = delimiter; |
| 814 remainingLen = srcLength - (delimiter - src); |
| 815 } |
| 816 } |
| 817 |
| 818 |
| 819 CLEANUP: |
| 820 |
| 821 if(b1 != b1Stack){ |
| 822 uprv_free(b1); |
| 823 } |
| 824 |
| 825 // delete prep; |
| 826 |
| 827 return u_terminateUChars(dest, destCapacity, reqLength, status); |
| 828 } |
| 829 |
| 830 U_CFUNC int32_t U_EXPORT2 |
| 831 idnaref_IDNToUnicode( const UChar* src, int32_t srcLength, |
| 832 UChar* dest, int32_t destCapacity, |
| 833 int32_t options, |
| 834 UParseError* parseError, |
| 835 UErrorCode* status){ |
| 836 |
| 837 if(status == NULL || U_FAILURE(*status)){ |
| 838 return 0; |
| 839 } |
| 840 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCa
pacity > 0)){ |
| 841 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 842 return 0; |
| 843 } |
| 844 |
| 845 int32_t reqLength = 0; |
| 846 |
| 847 UBool done = FALSE; |
| 848 |
| 849 NamePrepTransform* prep = TestIDNA::getInstance(*status); |
| 850 |
| 851 //initialize pointers to stack buffers |
| 852 UChar b1Stack[MAX_LABEL_BUFFER_SIZE]; |
| 853 UChar *b1 = b1Stack; |
| 854 int32_t b1Len, labelLen; |
| 855 UChar* delimiter = (UChar*)src; |
| 856 UChar* labelStart = (UChar*)src; |
| 857 int32_t remainingLen = srcLength; |
| 858 int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE; |
| 859 |
| 860 //get the options |
| 861 // UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) !=
0); |
| 862 // UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0)
; |
| 863 |
| 864 if(U_FAILURE(*status)){ |
| 865 goto CLEANUP; |
| 866 } |
| 867 |
| 868 if(srcLength == -1){ |
| 869 for(;;){ |
| 870 |
| 871 if(*delimiter == 0){ |
| 872 break; |
| 873 } |
| 874 |
| 875 labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done,
status); |
| 876 |
| 877 if(labelLen==0 && done==FALSE){ |
| 878 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; |
| 879 } |
| 880 b1Len = idnaref_toUnicode(labelStart, labelLen, b1, b1Capacity, |
| 881 options, parseError, status); |
| 882 |
| 883 if(*status == U_BUFFER_OVERFLOW_ERROR){ |
| 884 // redo processing of string |
| 885 /* we do not have enough room so grow the buffer*/ |
| 886 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); |
| 887 if(b1==NULL){ |
| 888 *status = U_MEMORY_ALLOCATION_ERROR; |
| 889 goto CLEANUP; |
| 890 } |
| 891 |
| 892 *status = U_ZERO_ERROR; // reset error |
| 893 |
| 894 b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len, |
| 895 options, parseError, status); |
| 896 |
| 897 } |
| 898 |
| 899 if(U_FAILURE(*status)){ |
| 900 goto CLEANUP; |
| 901 } |
| 902 int32_t tempLen = (reqLength + b1Len ); |
| 903 // copy to dest |
| 904 if( tempLen< destCapacity){ |
| 905 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); |
| 906 } |
| 907 |
| 908 reqLength = tempLen; |
| 909 // add the label separator |
| 910 if(done == FALSE){ |
| 911 if(reqLength < destCapacity){ |
| 912 dest[reqLength] = FULL_STOP; |
| 913 } |
| 914 reqLength++; |
| 915 } |
| 916 |
| 917 labelStart = delimiter; |
| 918 } |
| 919 }else{ |
| 920 for(;;){ |
| 921 |
| 922 if(delimiter == src+srcLength){ |
| 923 break; |
| 924 } |
| 925 |
| 926 labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimit
er, &done, status); |
| 927 |
| 928 if(labelLen==0 && done==FALSE){ |
| 929 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; |
| 930 } |
| 931 |
| 932 b1Len = idnaref_toUnicode( labelStart,labelLen, b1, b1Capacity, |
| 933 options, parseError, status); |
| 934 |
| 935 if(*status == U_BUFFER_OVERFLOW_ERROR){ |
| 936 // redo processing of string |
| 937 /* we do not have enough room so grow the buffer*/ |
| 938 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); |
| 939 if(b1==NULL){ |
| 940 *status = U_MEMORY_ALLOCATION_ERROR; |
| 941 goto CLEANUP; |
| 942 } |
| 943 |
| 944 *status = U_ZERO_ERROR; // reset error |
| 945 |
| 946 b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len, |
| 947 options, parseError, status); |
| 948 |
| 949 } |
| 950 |
| 951 if(U_FAILURE(*status)){ |
| 952 goto CLEANUP; |
| 953 } |
| 954 int32_t tempLen = (reqLength + b1Len ); |
| 955 // copy to dest |
| 956 if( tempLen< destCapacity){ |
| 957 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); |
| 958 } |
| 959 |
| 960 reqLength = tempLen; |
| 961 |
| 962 // add the label separator |
| 963 if(done == FALSE){ |
| 964 if(reqLength < destCapacity){ |
| 965 dest[reqLength] = FULL_STOP; |
| 966 } |
| 967 reqLength++; |
| 968 } |
| 969 |
| 970 labelStart = delimiter; |
| 971 remainingLen = srcLength - (delimiter - src); |
| 972 } |
| 973 } |
| 974 |
| 975 CLEANUP: |
| 976 |
| 977 if(b1 != b1Stack){ |
| 978 uprv_free(b1); |
| 979 } |
| 980 |
| 981 // delete prep; |
| 982 |
| 983 return u_terminateUChars(dest, destCapacity, reqLength, status); |
| 984 } |
| 985 |
| 986 U_CFUNC int32_t U_EXPORT2 |
| 987 idnaref_compare( const UChar *s1, int32_t length1, |
| 988 const UChar *s2, int32_t length2, |
| 989 int32_t options, |
| 990 UErrorCode* status){ |
| 991 |
| 992 if(status == NULL || U_FAILURE(*status)){ |
| 993 return -1; |
| 994 } |
| 995 |
| 996 UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE]; |
| 997 UChar *b1 = b1Stack, *b2 = b2Stack; |
| 998 int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN
_BUFFER_SIZE; |
| 999 int32_t result = -1; |
| 1000 |
| 1001 UParseError parseError; |
| 1002 |
| 1003 b1Len = idnaref_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError
, status); |
| 1004 if(*status == U_BUFFER_OVERFLOW_ERROR){ |
| 1005 // redo processing of string |
| 1006 /* we do not have enough room so grow the buffer*/ |
| 1007 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); |
| 1008 if(b1==NULL){ |
| 1009 *status = U_MEMORY_ALLOCATION_ERROR; |
| 1010 goto CLEANUP; |
| 1011 } |
| 1012 |
| 1013 *status = U_ZERO_ERROR; // reset error |
| 1014 |
| 1015 b1Len = idnaref_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, st
atus); |
| 1016 |
| 1017 } |
| 1018 |
| 1019 b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Capacity,options, &parseError, st
atus); |
| 1020 if(*status == U_BUFFER_OVERFLOW_ERROR){ |
| 1021 // redo processing of string |
| 1022 /* we do not have enough room so grow the buffer*/ |
| 1023 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); |
| 1024 if(b2==NULL){ |
| 1025 *status = U_MEMORY_ALLOCATION_ERROR; |
| 1026 goto CLEANUP; |
| 1027 } |
| 1028 |
| 1029 *status = U_ZERO_ERROR; // reset error |
| 1030 |
| 1031 b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Len,options, &parseError, sta
tus); |
| 1032 |
| 1033 } |
| 1034 // when toASCII is applied all label separators are replaced with FULL_STOP |
| 1035 result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len); |
| 1036 |
| 1037 CLEANUP: |
| 1038 if(b1 != b1Stack){ |
| 1039 uprv_free(b1); |
| 1040 } |
| 1041 |
| 1042 if(b2 != b2Stack){ |
| 1043 uprv_free(b2); |
| 1044 } |
| 1045 |
| 1046 return result; |
| 1047 } |
| 1048 #endif /* #if !UCONFIG_NO_IDNA */ |
OLD | NEW |