OLD | NEW |
1 /* | 1 /* |
2 ******************************************************************************* | 2 ******************************************************************************* |
3 * Copyright (C) 2010-2012, International Business Machines | 3 * Copyright (C) 2010-2014, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ******************************************************************************* | 5 ******************************************************************************* |
6 * file name: uts46.cpp | 6 * file name: uts46.cpp |
7 * encoding: US-ASCII | 7 * encoding: US-ASCII |
8 * tab size: 8 (not used) | 8 * tab size: 8 (not used) |
9 * indentation:4 | 9 * indentation:4 |
10 * | 10 * |
11 * created on: 2010mar09 | 11 * created on: 2010mar09 |
12 * created by: Markus W. Scherer | 12 * created by: Markus W. Scherer |
13 */ | 13 */ |
14 | 14 |
15 #include "unicode/utypes.h" | 15 #include "unicode/utypes.h" |
16 | 16 |
17 #if !UCONFIG_NO_IDNA | 17 #if !UCONFIG_NO_IDNA |
18 | 18 |
19 #include "unicode/idna.h" | 19 #include "unicode/idna.h" |
20 #include "unicode/normalizer2.h" | 20 #include "unicode/normalizer2.h" |
21 #include "unicode/uscript.h" | 21 #include "unicode/uscript.h" |
22 #include "unicode/ustring.h" | 22 #include "unicode/ustring.h" |
23 #include "unicode/utf16.h" | 23 #include "unicode/utf16.h" |
24 #include "cmemory.h" | 24 #include "cmemory.h" |
25 #include "cstring.h" | 25 #include "cstring.h" |
26 #include "punycode.h" | 26 #include "punycode.h" |
27 #include "ubidi_props.h" | 27 #include "ubidi_props.h" |
28 #include "ustr_imp.h" | 28 #include "ustr_imp.h" |
29 | 29 |
30 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | |
31 | |
32 // Note about tests for UIDNA_ERROR_DOMAIN_NAME_TOO_LONG: | 30 // Note about tests for UIDNA_ERROR_DOMAIN_NAME_TOO_LONG: |
33 // | 31 // |
34 // The domain name length limit is 255 octets in an internal DNS representation | 32 // The domain name length limit is 255 octets in an internal DNS representation |
35 // where the last ("root") label is the empty label | 33 // where the last ("root") label is the empty label |
36 // represented by length byte 0 alone. | 34 // represented by length byte 0 alone. |
37 // In a conventional string, this translates to 253 characters, or 254 | 35 // In a conventional string, this translates to 253 characters, or 254 |
38 // if there is a trailing dot for the root label. | 36 // if there is a trailing dot for the root label. |
39 | 37 |
40 U_NAMESPACE_BEGIN | 38 U_NAMESPACE_BEGIN |
41 | 39 |
(...skipping 272 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
314 if(&dest==&src || srcArray==NULL) { | 312 if(&dest==&src || srcArray==NULL) { |
315 errorCode=U_ILLEGAL_ARGUMENT_ERROR; | 313 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
316 dest.setToBogus(); | 314 dest.setToBogus(); |
317 return dest; | 315 return dest; |
318 } | 316 } |
319 // Arguments are fine, reset output values. | 317 // Arguments are fine, reset output values. |
320 dest.remove(); | 318 dest.remove(); |
321 info.reset(); | 319 info.reset(); |
322 int32_t srcLength=src.length(); | 320 int32_t srcLength=src.length(); |
323 if(srcLength==0) { | 321 if(srcLength==0) { |
324 if(toASCII) { | 322 info.errors|=UIDNA_ERROR_EMPTY_LABEL; |
325 info.errors|=UIDNA_ERROR_EMPTY_LABEL; | |
326 } | |
327 return dest; | 323 return dest; |
328 } | 324 } |
329 UChar *destArray=dest.getBuffer(srcLength); | 325 UChar *destArray=dest.getBuffer(srcLength); |
330 if(destArray==NULL) { | 326 if(destArray==NULL) { |
331 errorCode=U_MEMORY_ALLOCATION_ERROR; | 327 errorCode=U_MEMORY_ALLOCATION_ERROR; |
332 return dest; | 328 return dest; |
333 } | 329 } |
334 // ASCII fastpath | 330 // ASCII fastpath |
335 UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; | 331 UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; |
336 int32_t labelStart=0; | 332 int32_t labelStart=0; |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
374 if((i+1)==srcLength || srcArray[i+1]==0x2e) { | 370 if((i+1)==srcLength || srcArray[i+1]==0x2e) { |
375 // label ends with "-" | 371 // label ends with "-" |
376 info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; | 372 info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; |
377 } | 373 } |
378 } else if(c==0x2e) { // dot | 374 } else if(c==0x2e) { // dot |
379 if(isLabel) { | 375 if(isLabel) { |
380 // Replacing with U+FFFD can be complicated for toASCII. | 376 // Replacing with U+FFFD can be complicated for toASCII. |
381 ++i; // '.' was copied to dest already | 377 ++i; // '.' was copied to dest already |
382 break; | 378 break; |
383 } | 379 } |
384 if(toASCII) { | 380 if(i==labelStart) { |
385 // Permit an empty label at the end but not elsewhere. | 381 info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; |
386 if(i==labelStart && i<(srcLength-1)) { | 382 } |
387 info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; | 383 if(toASCII && (i-labelStart)>63) { |
388 } else if((i-labelStart)>63) { | 384 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; |
389 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | |
390 } | |
391 } | 385 } |
392 info.errors|=info.labelErrors; | 386 info.errors|=info.labelErrors; |
393 info.labelErrors=0; | 387 info.labelErrors=0; |
394 labelStart=i+1; | 388 labelStart=i+1; |
395 } | 389 } |
396 } | 390 } |
397 } | 391 } |
398 info.errors|=info.labelErrors; | 392 info.errors|=info.labelErrors; |
399 dest.releaseBuffer(i); | 393 dest.releaseBuffer(i); |
400 processUnicode(src, labelStart, i, isLabel, toASCII, dest, info, errorCode); | 394 processUnicode(src, labelStart, i, isLabel, toASCII, dest, info, errorCode); |
(...skipping 15 matching lines...) Expand all Loading... |
416 } | 410 } |
417 const char *srcArray=src.data(); | 411 const char *srcArray=src.data(); |
418 int32_t srcLength=src.length(); | 412 int32_t srcLength=src.length(); |
419 if(srcArray==NULL && srcLength!=0) { | 413 if(srcArray==NULL && srcLength!=0) { |
420 errorCode=U_ILLEGAL_ARGUMENT_ERROR; | 414 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
421 return; | 415 return; |
422 } | 416 } |
423 // Arguments are fine, reset output values. | 417 // Arguments are fine, reset output values. |
424 info.reset(); | 418 info.reset(); |
425 if(srcLength==0) { | 419 if(srcLength==0) { |
426 if(toASCII) { | 420 info.errors|=UIDNA_ERROR_EMPTY_LABEL; |
427 info.errors|=UIDNA_ERROR_EMPTY_LABEL; | |
428 } | |
429 dest.Flush(); | 421 dest.Flush(); |
430 return; | 422 return; |
431 } | 423 } |
432 UnicodeString destString; | 424 UnicodeString destString; |
433 int32_t labelStart=0; | 425 int32_t labelStart=0; |
434 if(srcLength<=256) { // length of stackArray[] | 426 if(srcLength<=256) { // length of stackArray[] |
435 // ASCII fastpath | 427 // ASCII fastpath |
436 char stackArray[256]; | 428 char stackArray[256]; |
437 int32_t destCapacity; | 429 int32_t destCapacity; |
438 char *destArray=dest.GetAppendBuffer(srcLength, srcLength+20, | 430 char *destArray=dest.GetAppendBuffer(srcLength, srcLength+20, |
439 stackArray, LENGTHOF(stackArray), &
destCapacity); | 431 stackArray, UPRV_LENGTHOF(stackArra
y), &destCapacity); |
440 UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; | 432 UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; |
441 int32_t i; | 433 int32_t i; |
442 for(i=0;; ++i) { | 434 for(i=0;; ++i) { |
443 if(i==srcLength) { | 435 if(i==srcLength) { |
444 if(toASCII) { | 436 if(toASCII) { |
445 if((i-labelStart)>63) { | 437 if((i-labelStart)>63) { |
446 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | 438 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; |
447 } | 439 } |
448 // There is a trailing dot if labelStart==i. | 440 // There is a trailing dot if labelStart==i. |
449 if(!isLabel && i>=254 && (i>254 || labelStart<i)) { | 441 if(!isLabel && i>=254 && (i>254 || labelStart<i)) { |
(...skipping 26 matching lines...) Expand all Loading... |
476 info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; | 468 info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; |
477 } | 469 } |
478 if((i+1)==srcLength || srcArray[i+1]==0x2e) { | 470 if((i+1)==srcLength || srcArray[i+1]==0x2e) { |
479 // label ends with "-" | 471 // label ends with "-" |
480 info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; | 472 info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; |
481 } | 473 } |
482 } else if(c==0x2e) { // dot | 474 } else if(c==0x2e) { // dot |
483 if(isLabel) { | 475 if(isLabel) { |
484 break; // Replacing with U+FFFD can be complicated for
toASCII. | 476 break; // Replacing with U+FFFD can be complicated for
toASCII. |
485 } | 477 } |
486 if(toASCII) { | 478 if(i==labelStart) { |
487 // Permit an empty label at the end but not elsewhere. | 479 info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; |
488 if(i==labelStart && i<(srcLength-1)) { | 480 } |
489 info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; | 481 if(toASCII && (i-labelStart)>63) { |
490 } else if((i-labelStart)>63) { | 482 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; |
491 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | |
492 } | |
493 } | 483 } |
494 info.errors|=info.labelErrors; | 484 info.errors|=info.labelErrors; |
495 info.labelErrors=0; | 485 info.labelErrors=0; |
496 labelStart=i+1; | 486 labelStart=i+1; |
497 } | 487 } |
498 } | 488 } |
499 } | 489 } |
500 info.errors|=info.labelErrors; | 490 info.errors|=info.labelErrors; |
501 // Convert the processed ASCII prefix of the current label to UTF-16. | 491 // Convert the processed ASCII prefix of the current label to UTF-16. |
502 int32_t mappingStart=i-labelStart; | 492 int32_t mappingStart=i-labelStart; |
(...skipping 238 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
741 labelString=&fromPunycode; | 731 labelString=&fromPunycode; |
742 label=fromPunycode.getBuffer(); | 732 label=fromPunycode.getBuffer(); |
743 labelStart=0; | 733 labelStart=0; |
744 labelLength=fromPunycode.length(); | 734 labelLength=fromPunycode.length(); |
745 } else { | 735 } else { |
746 wasPunycode=FALSE; | 736 wasPunycode=FALSE; |
747 labelString=&dest; | 737 labelString=&dest; |
748 } | 738 } |
749 // Validity check | 739 // Validity check |
750 if(labelLength==0) { | 740 if(labelLength==0) { |
751 if(toASCII) { | 741 info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; |
752 info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; | |
753 } | |
754 return replaceLabel(dest, destLabelStart, destLabelLength, *labelString,
labelLength); | 742 return replaceLabel(dest, destLabelStart, destLabelLength, *labelString,
labelLength); |
755 } | 743 } |
756 // labelLength>0 | 744 // labelLength>0 |
757 if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) { | 745 if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) { |
758 // label starts with "??--" | 746 // label starts with "??--" |
759 info.labelErrors|=UIDNA_ERROR_HYPHEN_3_4; | 747 info.labelErrors|=UIDNA_ERROR_HYPHEN_3_4; |
760 } | 748 } |
761 if(label[0]==0x2d) { | 749 if(label[0]==0x2d) { |
762 // label starts with "-" | 750 // label starts with "-" |
763 info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; | 751 info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; |
(...skipping 686 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1450 } | 1438 } |
1451 StringPiece src(name, length<0 ? uprv_strlen(name) : length); | 1439 StringPiece src(name, length<0 ? uprv_strlen(name) : length); |
1452 CheckedArrayByteSink sink(dest, capacity); | 1440 CheckedArrayByteSink sink(dest, capacity); |
1453 IDNAInfo info; | 1441 IDNAInfo info; |
1454 reinterpret_cast<const IDNA *>(idna)->nameToUnicodeUTF8(src, sink, info, *pE
rrorCode); | 1442 reinterpret_cast<const IDNA *>(idna)->nameToUnicodeUTF8(src, sink, info, *pE
rrorCode); |
1455 idnaInfoToStruct(info, pInfo); | 1443 idnaInfoToStruct(info, pInfo); |
1456 return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pError
Code); | 1444 return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pError
Code); |
1457 } | 1445 } |
1458 | 1446 |
1459 #endif // UCONFIG_NO_IDNA | 1447 #endif // UCONFIG_NO_IDNA |
OLD | NEW |