Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(9)

Side by Side Diff: source/common/usprep.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/common/ushape.cpp ('k') | source/common/ustr_cnv.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ******************************************************************************* 2 *******************************************************************************
3 * 3 *
4 * Copyright (C) 2003-2013, International Business Machines 4 * Copyright (C) 2003-2014, International Business Machines
5 * Corporation and others. All Rights Reserved. 5 * Corporation and others. All Rights Reserved.
6 * 6 *
7 ******************************************************************************* 7 *******************************************************************************
8 * file name: usprep.cpp 8 * file name: usprep.cpp
9 * encoding: US-ASCII 9 * encoding: US-ASCII
10 * tab size: 8 (not used) 10 * tab size: 8 (not used)
11 * indentation:4 11 * indentation:4
12 * 12 *
13 * created on: 2003jul2 13 * created on: 2003jul2
14 * created by: Ram Viswanadha 14 * created by: Ram Viswanadha
15 */ 15 */
16 16
17 #include "unicode/utypes.h" 17 #include "unicode/utypes.h"
18 18
19 #if !UCONFIG_NO_IDNA 19 #if !UCONFIG_NO_IDNA
20 20
21 #include "unicode/usprep.h" 21 #include "unicode/usprep.h"
22 22
23 #include "unicode/unorm.h" 23 #include "unicode/normalizer2.h"
24 #include "unicode/ustring.h" 24 #include "unicode/ustring.h"
25 #include "unicode/uchar.h" 25 #include "unicode/uchar.h"
26 #include "unicode/uversion.h" 26 #include "unicode/uversion.h"
27 #include "umutex.h" 27 #include "umutex.h"
28 #include "cmemory.h" 28 #include "cmemory.h"
29 #include "sprpimpl.h" 29 #include "sprpimpl.h"
30 #include "ustr_imp.h" 30 #include "ustr_imp.h"
31 #include "uhash.h" 31 #include "uhash.h"
32 #include "cstring.h" 32 #include "cstring.h"
33 #include "udataswp.h" 33 #include "udataswp.h"
34 #include "ucln_cmn.h" 34 #include "ucln_cmn.h"
35 #include "ubidi_props.h" 35 #include "ubidi_props.h"
36 #include "uprops.h"
36 37
37 U_NAMESPACE_USE 38 U_NAMESPACE_USE
38 39
39 U_CDECL_BEGIN 40 U_CDECL_BEGIN
40 41
41 /* 42 /*
42 Static cache for already opened StringPrep profiles 43 Static cache for already opened StringPrep profiles
43 */ 44 */
44 static UHashtable *SHARED_DATA_HASHTABLE = NULL; 45 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
45 static icu::UInitOnce gSharedDataInitOnce; 46 static icu::UInitOnce gSharedDataInitOnce;
(...skipping 449 matching lines...) Expand 10 before | Expand all | Expand 10 after
495 496
496 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){ 497 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
497 type = USPREP_DELETE; 498 type = USPREP_DELETE;
498 isIndex =FALSE; 499 isIndex =FALSE;
499 value = 0; 500 value = 0;
500 } 501 }
501 } 502 }
502 return type; 503 return type;
503 } 504 }
504 505
505 506 // TODO: change to writing to UnicodeString not UChar *
506
507 static int32_t 507 static int32_t
508 usprep_map( const UStringPrepProfile* profile, 508 usprep_map( const UStringPrepProfile* profile,
509 const UChar* src, int32_t srcLength, 509 const UChar* src, int32_t srcLength,
510 UChar* dest, int32_t destCapacity, 510 UChar* dest, int32_t destCapacity,
511 int32_t options, 511 int32_t options,
512 UParseError* parseError, 512 UParseError* parseError,
513 UErrorCode* status ){ 513 UErrorCode* status ){
514 514
515 uint16_t result; 515 uint16_t result;
516 int32_t destIndex=0; 516 int32_t destIndex=0;
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
591 dest[destIndex+1] = U16_TRAIL(ch); 591 dest[destIndex+1] = U16_TRAIL(ch);
592 } 592 }
593 destIndex +=2; 593 destIndex +=2;
594 } 594 }
595 595
596 } 596 }
597 597
598 return u_terminateUChars(dest, destCapacity, destIndex, status); 598 return u_terminateUChars(dest, destCapacity, destIndex, status);
599 } 599 }
600 600
601 601 /*
602 static int32_t
603 usprep_normalize( const UChar* src, int32_t srcLength,
604 UChar* dest, int32_t destCapacity,
605 UErrorCode* status ){
606 return unorm_normalize(
607 src, srcLength,
608 UNORM_NFKC, UNORM_UNICODE_3_2,
609 dest, destCapacity,
610 status);
611 }
612
613
614 /*
615 1) Map -- For each character in the input, check if it has a mapping 602 1) Map -- For each character in the input, check if it has a mapping
616 and, if so, replace it with its mapping. 603 and, if so, replace it with its mapping.
617 604
618 2) Normalize -- Possibly normalize the result of step 1 using Unicode 605 2) Normalize -- Possibly normalize the result of step 1 using Unicode
619 normalization. 606 normalization.
620 607
621 3) Prohibit -- Check for any characters that are not allowed in the 608 3) Prohibit -- Check for any characters that are not allowed in the
622 output. If any are found, return an error. 609 output. If any are found, return an error.
623 610
624 4) Check bidi -- Possibly check for right-to-left characters, and if 611 4) Check bidi -- Possibly check for right-to-left characters, and if
(...skipping 17 matching lines...) Expand all
642 629
643 1) The characters in section 5.8 MUST be prohibited. 630 1) The characters in section 5.8 MUST be prohibited.
644 631
645 2) If a string contains any RandALCat character, the string MUST NOT 632 2) If a string contains any RandALCat character, the string MUST NOT
646 contain any LCat character. 633 contain any LCat character.
647 634
648 3) If a string contains any RandALCat character, a RandALCat 635 3) If a string contains any RandALCat character, a RandALCat
649 character MUST be the first character of the string, and a 636 character MUST be the first character of the string, and a
650 RandALCat character MUST be the last character of the string. 637 RandALCat character MUST be the last character of the string.
651 */ 638 */
652
653 #define MAX_STACK_BUFFER_SIZE 300
654
655
656 U_CAPI int32_t U_EXPORT2 639 U_CAPI int32_t U_EXPORT2
657 usprep_prepare( const UStringPrepProfile* profile, 640 usprep_prepare( const UStringPrepProfile* profile,
658 const UChar* src, int32_t srcLength, 641 const UChar* src, int32_t srcLength,
659 UChar* dest, int32_t destCapacity, 642 UChar* dest, int32_t destCapacity,
660 int32_t options, 643 int32_t options,
661 UParseError* parseError, 644 UParseError* parseError,
662 UErrorCode* status ){ 645 UErrorCode* status ){
663 646
664 // check error status 647 // check error status
665 if(status == NULL || U_FAILURE(*status)){ 648 if(U_FAILURE(*status)){
666 return 0; 649 return 0;
667 } 650 }
668 651
669 //check arguments 652 //check arguments
670 if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity !=0)) { 653 if(profile==NULL ||
654 (src==NULL ? srcLength!=0 : srcLength<-1) ||
655 (dest==NULL ? destCapacity!=0 : destCapacity<0)) {
671 *status=U_ILLEGAL_ARGUMENT_ERROR; 656 *status=U_ILLEGAL_ARGUMENT_ERROR;
672 return 0; 657 return 0;
673 } 658 }
674 659
675 UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE]; 660 //get the string length
676 UChar *b1 = b1Stack, *b2 = b2Stack; 661 if(srcLength < 0){
677 int32_t b1Len, b2Len=0, 662 srcLength = u_strlen(src);
678 b1Capacity = MAX_STACK_BUFFER_SIZE , 663 }
679 b2Capacity = MAX_STACK_BUFFER_SIZE; 664 // map
680 uint16_t result; 665 UnicodeString s1;
681 int32_t b2Index = 0; 666 UChar *b1 = s1.getBuffer(srcLength);
667 if(b1==NULL){
668 *status = U_MEMORY_ALLOCATION_ERROR;
669 return 0;
670 }
671 int32_t b1Len = usprep_map(profile, src, srcLength,
672 b1, s1.getCapacity(), options, parseError, status );
673 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
674
675 if(*status == U_BUFFER_OVERFLOW_ERROR){
676 // redo processing of string
677 /* we do not have enough room so grow the buffer*/
678 b1 = s1.getBuffer(b1Len);
679 if(b1==NULL){
680 *status = U_MEMORY_ALLOCATION_ERROR;
681 return 0;
682 }
683
684 *status = U_ZERO_ERROR; // reset error
685 b1Len = usprep_map(profile, src, srcLength,
686 b1, s1.getCapacity(), options, parseError, status);
687 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
688 }
689 if(U_FAILURE(*status)){
690 return 0;
691 }
692
693 // normalize
694 UnicodeString s2;
695 if(profile->doNFKC){
696 const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status);
697 FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status));
698 if(U_FAILURE(*status)){
699 return 0;
700 }
701 fn2.normalize(s1, s2, *status);
702 }else{
703 s2.fastCopyFrom(s1);
704 }
705 if(U_FAILURE(*status)){
706 return 0;
707 }
708
709 // Prohibit and checkBiDi in one pass
710 const UChar *b2 = s2.getBuffer();
711 int32_t b2Len = s2.length();
682 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTI ON_COUNT; 712 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTI ON_COUNT;
683 UBool leftToRight=FALSE, rightToLeft=FALSE; 713 UBool leftToRight=FALSE, rightToLeft=FALSE;
684 int32_t rtlPos =-1, ltrPos =-1; 714 int32_t rtlPos =-1, ltrPos =-1;
685 715
686 //get the string length 716 for(int32_t b2Index=0; b2Index<b2Len;){
687 if(srcLength == -1){ 717 UChar32 ch = 0;
688 srcLength = u_strlen(src);
689 }
690 // map
691 b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseEr ror, status);
692
693 if(*status == U_BUFFER_OVERFLOW_ERROR){
694 // redo processing of string
695 /* we do not have enough room so grow the buffer*/
696 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
697 if(b1==NULL){
698 *status = U_MEMORY_ALLOCATION_ERROR;
699 goto CLEANUP;
700 }
701
702 *status = U_ZERO_ERROR; // reset error
703
704 b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseErr or, status);
705
706 }
707
708 // normalize
709 if(profile->doNFKC == TRUE){
710 b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status);
711
712 if(*status == U_BUFFER_OVERFLOW_ERROR){
713 // redo processing of string
714 /* we do not have enough room so grow the buffer*/
715 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
716 if(b2==NULL){
717 *status = U_MEMORY_ALLOCATION_ERROR;
718 goto CLEANUP;
719 }
720
721 *status = U_ZERO_ERROR; // reset error
722
723 b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status);
724
725 }
726
727 }else{
728 b2 = b1;
729 b2Len = b1Len;
730 }
731
732
733 if(U_FAILURE(*status)){
734 goto CLEANUP;
735 }
736
737 UChar32 ch;
738 UStringPrepType type;
739 int16_t value;
740 UBool isIndex;
741
742 // Prohibit and checkBiDi in one pass
743 for(b2Index=0; b2Index<b2Len;){
744
745 ch = 0;
746
747 U16_NEXT(b2, b2Index, b2Len, ch); 718 U16_NEXT(b2, b2Index, b2Len, ch);
748 719
720 uint16_t result;
749 UTRIE_GET16(&profile->sprepTrie,ch,result); 721 UTRIE_GET16(&profile->sprepTrie,ch,result);
750 722
751 type = getValues(result, value, isIndex); 723 int16_t value;
724 UBool isIndex;
725 UStringPrepType type = getValues(result, value, isIndex);
752 726
753 if( type == USPREP_PROHIBITED || 727 if( type == USPREP_PROHIBITED ||
754 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit sa ys it the code point is prohibited*/) 728 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit sa ys it the code point is prohibited*/)
755 ){ 729 ){
756 *status = U_STRINGPREP_PROHIBITED_ERROR; 730 *status = U_STRINGPREP_PROHIBITED_ERROR;
757 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError); 731 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
758 goto CLEANUP; 732 return 0;
759 } 733 }
760 734
761 if(profile->checkBiDi) { 735 if(profile->checkBiDi) {
762 direction = ubidi_getClass(profile->bdp, ch); 736 direction = ubidi_getClass(profile->bdp, ch);
763 if(firstCharDir == U_CHAR_DIRECTION_COUNT){ 737 if(firstCharDir == U_CHAR_DIRECTION_COUNT){
764 firstCharDir = direction; 738 firstCharDir = direction;
765 } 739 }
766 if(direction == U_LEFT_TO_RIGHT){ 740 if(direction == U_LEFT_TO_RIGHT){
767 leftToRight = TRUE; 741 leftToRight = TRUE;
768 ltrPos = b2Index-1; 742 ltrPos = b2Index-1;
769 } 743 }
770 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARAB IC){ 744 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARAB IC){
771 rightToLeft = TRUE; 745 rightToLeft = TRUE;
772 rtlPos = b2Index-1; 746 rtlPos = b2Index-1;
773 } 747 }
774 } 748 }
775 } 749 }
776 if(profile->checkBiDi == TRUE){ 750 if(profile->checkBiDi == TRUE){
777 // satisfy 2 751 // satisfy 2
778 if( leftToRight == TRUE && rightToLeft == TRUE){ 752 if( leftToRight == TRUE && rightToLeft == TRUE){
779 *status = U_STRINGPREP_CHECK_BIDI_ERROR; 753 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
780 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseE rror); 754 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseE rror);
781 goto CLEANUP; 755 return 0;
782 } 756 }
783 757
784 //satisfy 3 758 //satisfy 3
785 if( rightToLeft == TRUE && 759 if( rightToLeft == TRUE &&
786 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEF T_ARABIC) && 760 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEF T_ARABIC) &&
787 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARAB IC)) 761 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARAB IC))
788 ){ 762 ){
789 *status = U_STRINGPREP_CHECK_BIDI_ERROR; 763 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
790 uprv_syntaxError(b2, rtlPos, b2Len, parseError); 764 uprv_syntaxError(b2, rtlPos, b2Len, parseError);
791 return FALSE; 765 return FALSE;
792 } 766 }
793 } 767 }
794 if(b2Len>0 && b2Len <= destCapacity){ 768 return s2.extract(dest, destCapacity, *status);
795 uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
796 }
797
798 CLEANUP:
799 if(b1!=b1Stack){
800 uprv_free(b1);
801 b1=NULL;
802 }
803
804 if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){
805 uprv_free(b2);
806 b2=NULL;
807 }
808 return u_terminateUChars(dest, destCapacity, b2Len, status);
809 } 769 }
810 770
811 771
812 /* data swapping ------------------------------------------------------------ */ 772 /* data swapping ------------------------------------------------------------ */
813 773
814 U_CAPI int32_t U_EXPORT2 774 U_CAPI int32_t U_EXPORT2
815 usprep_swap(const UDataSwapper *ds, 775 usprep_swap(const UDataSwapper *ds,
816 const void *inData, int32_t length, void *outData, 776 const void *inData, int32_t length, void *outData,
817 UErrorCode *pErrorCode) { 777 UErrorCode *pErrorCode) {
818 const UDataInfo *pInfo; 778 const UDataInfo *pInfo;
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
896 offset+=count; 856 offset+=count;
897 857
898 /* swap the UTrie */ 858 /* swap the UTrie */
899 count=indexes[_SPREP_INDEX_TRIE_SIZE]; 859 count=indexes[_SPREP_INDEX_TRIE_SIZE];
900 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 860 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
901 offset+=count; 861 offset+=count;
902 862
903 /* swap the uint16_t mappingTable[] */ 863 /* swap the uint16_t mappingTable[] */
904 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; 864 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
905 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 865 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
906 offset+=count; 866 //offset+=count;
907 } 867 }
908 868
909 return headerSize+size; 869 return headerSize+size;
910 } 870 }
911 871
912 #endif /* #if !UCONFIG_NO_IDNA */ 872 #endif /* #if !UCONFIG_NO_IDNA */
OLDNEW
« no previous file with comments | « source/common/ushape.cpp ('k') | source/common/ustr_cnv.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698