Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(43)

Side by Side Diff: source/common/uts46.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/common/utrie2_builder.cpp ('k') | source/common/utypeinfo.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ******************************************************************************* 2 *******************************************************************************
3 * Copyright (C) 2010-2012, International Business Machines 3 * Copyright (C) 2010-2014, International Business Machines
4 * Corporation and others. All Rights Reserved. 4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************* 5 *******************************************************************************
6 * file name: uts46.cpp 6 * file name: uts46.cpp
7 * encoding: US-ASCII 7 * encoding: US-ASCII
8 * tab size: 8 (not used) 8 * tab size: 8 (not used)
9 * indentation:4 9 * indentation:4
10 * 10 *
11 * created on: 2010mar09 11 * created on: 2010mar09
12 * created by: Markus W. Scherer 12 * created by: Markus W. Scherer
13 */ 13 */
14 14
15 #include "unicode/utypes.h" 15 #include "unicode/utypes.h"
16 16
17 #if !UCONFIG_NO_IDNA 17 #if !UCONFIG_NO_IDNA
18 18
19 #include "unicode/idna.h" 19 #include "unicode/idna.h"
20 #include "unicode/normalizer2.h" 20 #include "unicode/normalizer2.h"
21 #include "unicode/uscript.h" 21 #include "unicode/uscript.h"
22 #include "unicode/ustring.h" 22 #include "unicode/ustring.h"
23 #include "unicode/utf16.h" 23 #include "unicode/utf16.h"
24 #include "cmemory.h" 24 #include "cmemory.h"
25 #include "cstring.h" 25 #include "cstring.h"
26 #include "punycode.h" 26 #include "punycode.h"
27 #include "ubidi_props.h" 27 #include "ubidi_props.h"
28 #include "ustr_imp.h" 28 #include "ustr_imp.h"
29 29
30 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
31
32 // Note about tests for UIDNA_ERROR_DOMAIN_NAME_TOO_LONG: 30 // Note about tests for UIDNA_ERROR_DOMAIN_NAME_TOO_LONG:
33 // 31 //
34 // The domain name length limit is 255 octets in an internal DNS representation 32 // The domain name length limit is 255 octets in an internal DNS representation
35 // where the last ("root") label is the empty label 33 // where the last ("root") label is the empty label
36 // represented by length byte 0 alone. 34 // represented by length byte 0 alone.
37 // In a conventional string, this translates to 253 characters, or 254 35 // In a conventional string, this translates to 253 characters, or 254
38 // if there is a trailing dot for the root label. 36 // if there is a trailing dot for the root label.
39 37
40 U_NAMESPACE_BEGIN 38 U_NAMESPACE_BEGIN
41 39
(...skipping 272 matching lines...) Expand 10 before | Expand all | Expand 10 after
314 if(&dest==&src || srcArray==NULL) { 312 if(&dest==&src || srcArray==NULL) {
315 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 313 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
316 dest.setToBogus(); 314 dest.setToBogus();
317 return dest; 315 return dest;
318 } 316 }
319 // Arguments are fine, reset output values. 317 // Arguments are fine, reset output values.
320 dest.remove(); 318 dest.remove();
321 info.reset(); 319 info.reset();
322 int32_t srcLength=src.length(); 320 int32_t srcLength=src.length();
323 if(srcLength==0) { 321 if(srcLength==0) {
324 if(toASCII) { 322 info.errors|=UIDNA_ERROR_EMPTY_LABEL;
325 info.errors|=UIDNA_ERROR_EMPTY_LABEL;
326 }
327 return dest; 323 return dest;
328 } 324 }
329 UChar *destArray=dest.getBuffer(srcLength); 325 UChar *destArray=dest.getBuffer(srcLength);
330 if(destArray==NULL) { 326 if(destArray==NULL) {
331 errorCode=U_MEMORY_ALLOCATION_ERROR; 327 errorCode=U_MEMORY_ALLOCATION_ERROR;
332 return dest; 328 return dest;
333 } 329 }
334 // ASCII fastpath 330 // ASCII fastpath
335 UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; 331 UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0;
336 int32_t labelStart=0; 332 int32_t labelStart=0;
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
374 if((i+1)==srcLength || srcArray[i+1]==0x2e) { 370 if((i+1)==srcLength || srcArray[i+1]==0x2e) {
375 // label ends with "-" 371 // label ends with "-"
376 info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; 372 info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN;
377 } 373 }
378 } else if(c==0x2e) { // dot 374 } else if(c==0x2e) { // dot
379 if(isLabel) { 375 if(isLabel) {
380 // Replacing with U+FFFD can be complicated for toASCII. 376 // Replacing with U+FFFD can be complicated for toASCII.
381 ++i; // '.' was copied to dest already 377 ++i; // '.' was copied to dest already
382 break; 378 break;
383 } 379 }
384 if(toASCII) { 380 if(i==labelStart) {
385 // Permit an empty label at the end but not elsewhere. 381 info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL;
386 if(i==labelStart && i<(srcLength-1)) { 382 }
387 info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; 383 if(toASCII && (i-labelStart)>63) {
388 } else if((i-labelStart)>63) { 384 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
389 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
390 }
391 } 385 }
392 info.errors|=info.labelErrors; 386 info.errors|=info.labelErrors;
393 info.labelErrors=0; 387 info.labelErrors=0;
394 labelStart=i+1; 388 labelStart=i+1;
395 } 389 }
396 } 390 }
397 } 391 }
398 info.errors|=info.labelErrors; 392 info.errors|=info.labelErrors;
399 dest.releaseBuffer(i); 393 dest.releaseBuffer(i);
400 processUnicode(src, labelStart, i, isLabel, toASCII, dest, info, errorCode); 394 processUnicode(src, labelStart, i, isLabel, toASCII, dest, info, errorCode);
(...skipping 15 matching lines...) Expand all
416 } 410 }
417 const char *srcArray=src.data(); 411 const char *srcArray=src.data();
418 int32_t srcLength=src.length(); 412 int32_t srcLength=src.length();
419 if(srcArray==NULL && srcLength!=0) { 413 if(srcArray==NULL && srcLength!=0) {
420 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 414 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
421 return; 415 return;
422 } 416 }
423 // Arguments are fine, reset output values. 417 // Arguments are fine, reset output values.
424 info.reset(); 418 info.reset();
425 if(srcLength==0) { 419 if(srcLength==0) {
426 if(toASCII) { 420 info.errors|=UIDNA_ERROR_EMPTY_LABEL;
427 info.errors|=UIDNA_ERROR_EMPTY_LABEL;
428 }
429 dest.Flush(); 421 dest.Flush();
430 return; 422 return;
431 } 423 }
432 UnicodeString destString; 424 UnicodeString destString;
433 int32_t labelStart=0; 425 int32_t labelStart=0;
434 if(srcLength<=256) { // length of stackArray[] 426 if(srcLength<=256) { // length of stackArray[]
435 // ASCII fastpath 427 // ASCII fastpath
436 char stackArray[256]; 428 char stackArray[256];
437 int32_t destCapacity; 429 int32_t destCapacity;
438 char *destArray=dest.GetAppendBuffer(srcLength, srcLength+20, 430 char *destArray=dest.GetAppendBuffer(srcLength, srcLength+20,
439 stackArray, LENGTHOF(stackArray), & destCapacity); 431 stackArray, UPRV_LENGTHOF(stackArra y), &destCapacity);
440 UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; 432 UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0;
441 int32_t i; 433 int32_t i;
442 for(i=0;; ++i) { 434 for(i=0;; ++i) {
443 if(i==srcLength) { 435 if(i==srcLength) {
444 if(toASCII) { 436 if(toASCII) {
445 if((i-labelStart)>63) { 437 if((i-labelStart)>63) {
446 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; 438 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
447 } 439 }
448 // There is a trailing dot if labelStart==i. 440 // There is a trailing dot if labelStart==i.
449 if(!isLabel && i>=254 && (i>254 || labelStart<i)) { 441 if(!isLabel && i>=254 && (i>254 || labelStart<i)) {
(...skipping 26 matching lines...) Expand all
476 info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; 468 info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN;
477 } 469 }
478 if((i+1)==srcLength || srcArray[i+1]==0x2e) { 470 if((i+1)==srcLength || srcArray[i+1]==0x2e) {
479 // label ends with "-" 471 // label ends with "-"
480 info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; 472 info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN;
481 } 473 }
482 } else if(c==0x2e) { // dot 474 } else if(c==0x2e) { // dot
483 if(isLabel) { 475 if(isLabel) {
484 break; // Replacing with U+FFFD can be complicated for toASCII. 476 break; // Replacing with U+FFFD can be complicated for toASCII.
485 } 477 }
486 if(toASCII) { 478 if(i==labelStart) {
487 // Permit an empty label at the end but not elsewhere. 479 info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL;
488 if(i==labelStart && i<(srcLength-1)) { 480 }
489 info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; 481 if(toASCII && (i-labelStart)>63) {
490 } else if((i-labelStart)>63) { 482 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
491 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
492 }
493 } 483 }
494 info.errors|=info.labelErrors; 484 info.errors|=info.labelErrors;
495 info.labelErrors=0; 485 info.labelErrors=0;
496 labelStart=i+1; 486 labelStart=i+1;
497 } 487 }
498 } 488 }
499 } 489 }
500 info.errors|=info.labelErrors; 490 info.errors|=info.labelErrors;
501 // Convert the processed ASCII prefix of the current label to UTF-16. 491 // Convert the processed ASCII prefix of the current label to UTF-16.
502 int32_t mappingStart=i-labelStart; 492 int32_t mappingStart=i-labelStart;
(...skipping 238 matching lines...) Expand 10 before | Expand all | Expand 10 after
741 labelString=&fromPunycode; 731 labelString=&fromPunycode;
742 label=fromPunycode.getBuffer(); 732 label=fromPunycode.getBuffer();
743 labelStart=0; 733 labelStart=0;
744 labelLength=fromPunycode.length(); 734 labelLength=fromPunycode.length();
745 } else { 735 } else {
746 wasPunycode=FALSE; 736 wasPunycode=FALSE;
747 labelString=&dest; 737 labelString=&dest;
748 } 738 }
749 // Validity check 739 // Validity check
750 if(labelLength==0) { 740 if(labelLength==0) {
751 if(toASCII) { 741 info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL;
752 info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL;
753 }
754 return replaceLabel(dest, destLabelStart, destLabelLength, *labelString, labelLength); 742 return replaceLabel(dest, destLabelStart, destLabelLength, *labelString, labelLength);
755 } 743 }
756 // labelLength>0 744 // labelLength>0
757 if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) { 745 if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) {
758 // label starts with "??--" 746 // label starts with "??--"
759 info.labelErrors|=UIDNA_ERROR_HYPHEN_3_4; 747 info.labelErrors|=UIDNA_ERROR_HYPHEN_3_4;
760 } 748 }
761 if(label[0]==0x2d) { 749 if(label[0]==0x2d) {
762 // label starts with "-" 750 // label starts with "-"
763 info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; 751 info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN;
(...skipping 686 matching lines...) Expand 10 before | Expand all | Expand 10 after
1450 } 1438 }
1451 StringPiece src(name, length<0 ? uprv_strlen(name) : length); 1439 StringPiece src(name, length<0 ? uprv_strlen(name) : length);
1452 CheckedArrayByteSink sink(dest, capacity); 1440 CheckedArrayByteSink sink(dest, capacity);
1453 IDNAInfo info; 1441 IDNAInfo info;
1454 reinterpret_cast<const IDNA *>(idna)->nameToUnicodeUTF8(src, sink, info, *pE rrorCode); 1442 reinterpret_cast<const IDNA *>(idna)->nameToUnicodeUTF8(src, sink, info, *pE rrorCode);
1455 idnaInfoToStruct(info, pInfo); 1443 idnaInfoToStruct(info, pInfo);
1456 return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pError Code); 1444 return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pError Code);
1457 } 1445 }
1458 1446
1459 #endif // UCONFIG_NO_IDNA 1447 #endif // UCONFIG_NO_IDNA
OLDNEW
« no previous file with comments | « source/common/utrie2_builder.cpp ('k') | source/common/utypeinfo.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698