| OLD | NEW |
| 1 /* | 1 /* |
| 2 ******************************************************************************* | 2 ******************************************************************************* |
| 3 * | 3 * |
| 4 * Copyright (C) 2003-2013, International Business Machines | 4 * Copyright (C) 2003-2014, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. | 5 * Corporation and others. All Rights Reserved. |
| 6 * | 6 * |
| 7 ******************************************************************************* | 7 ******************************************************************************* |
| 8 * file name: usprep.cpp | 8 * file name: usprep.cpp |
| 9 * encoding: US-ASCII | 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) | 10 * tab size: 8 (not used) |
| 11 * indentation:4 | 11 * indentation:4 |
| 12 * | 12 * |
| 13 * created on: 2003jul2 | 13 * created on: 2003jul2 |
| 14 * created by: Ram Viswanadha | 14 * created by: Ram Viswanadha |
| 15 */ | 15 */ |
| 16 | 16 |
| 17 #include "unicode/utypes.h" | 17 #include "unicode/utypes.h" |
| 18 | 18 |
| 19 #if !UCONFIG_NO_IDNA | 19 #if !UCONFIG_NO_IDNA |
| 20 | 20 |
| 21 #include "unicode/usprep.h" | 21 #include "unicode/usprep.h" |
| 22 | 22 |
| 23 #include "unicode/unorm.h" | 23 #include "unicode/normalizer2.h" |
| 24 #include "unicode/ustring.h" | 24 #include "unicode/ustring.h" |
| 25 #include "unicode/uchar.h" | 25 #include "unicode/uchar.h" |
| 26 #include "unicode/uversion.h" | 26 #include "unicode/uversion.h" |
| 27 #include "umutex.h" | 27 #include "umutex.h" |
| 28 #include "cmemory.h" | 28 #include "cmemory.h" |
| 29 #include "sprpimpl.h" | 29 #include "sprpimpl.h" |
| 30 #include "ustr_imp.h" | 30 #include "ustr_imp.h" |
| 31 #include "uhash.h" | 31 #include "uhash.h" |
| 32 #include "cstring.h" | 32 #include "cstring.h" |
| 33 #include "udataswp.h" | 33 #include "udataswp.h" |
| 34 #include "ucln_cmn.h" | 34 #include "ucln_cmn.h" |
| 35 #include "ubidi_props.h" | 35 #include "ubidi_props.h" |
| 36 #include "uprops.h" |
| 36 | 37 |
| 37 U_NAMESPACE_USE | 38 U_NAMESPACE_USE |
| 38 | 39 |
| 39 U_CDECL_BEGIN | 40 U_CDECL_BEGIN |
| 40 | 41 |
| 41 /* | 42 /* |
| 42 Static cache for already opened StringPrep profiles | 43 Static cache for already opened StringPrep profiles |
| 43 */ | 44 */ |
| 44 static UHashtable *SHARED_DATA_HASHTABLE = NULL; | 45 static UHashtable *SHARED_DATA_HASHTABLE = NULL; |
| 45 static icu::UInitOnce gSharedDataInitOnce; | 46 static icu::UInitOnce gSharedDataInitOnce; |
| (...skipping 449 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 495 | 496 |
| 496 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){ | 497 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){ |
| 497 type = USPREP_DELETE; | 498 type = USPREP_DELETE; |
| 498 isIndex =FALSE; | 499 isIndex =FALSE; |
| 499 value = 0; | 500 value = 0; |
| 500 } | 501 } |
| 501 } | 502 } |
| 502 return type; | 503 return type; |
| 503 } | 504 } |
| 504 | 505 |
| 505 | 506 // TODO: change to writing to UnicodeString not UChar * |
| 506 | |
| 507 static int32_t | 507 static int32_t |
| 508 usprep_map( const UStringPrepProfile* profile, | 508 usprep_map( const UStringPrepProfile* profile, |
| 509 const UChar* src, int32_t srcLength, | 509 const UChar* src, int32_t srcLength, |
| 510 UChar* dest, int32_t destCapacity, | 510 UChar* dest, int32_t destCapacity, |
| 511 int32_t options, | 511 int32_t options, |
| 512 UParseError* parseError, | 512 UParseError* parseError, |
| 513 UErrorCode* status ){ | 513 UErrorCode* status ){ |
| 514 | 514 |
| 515 uint16_t result; | 515 uint16_t result; |
| 516 int32_t destIndex=0; | 516 int32_t destIndex=0; |
| (...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 591 dest[destIndex+1] = U16_TRAIL(ch); | 591 dest[destIndex+1] = U16_TRAIL(ch); |
| 592 } | 592 } |
| 593 destIndex +=2; | 593 destIndex +=2; |
| 594 } | 594 } |
| 595 | 595 |
| 596 } | 596 } |
| 597 | 597 |
| 598 return u_terminateUChars(dest, destCapacity, destIndex, status); | 598 return u_terminateUChars(dest, destCapacity, destIndex, status); |
| 599 } | 599 } |
| 600 | 600 |
| 601 | 601 /* |
| 602 static int32_t | |
| 603 usprep_normalize( const UChar* src, int32_t srcLength, | |
| 604 UChar* dest, int32_t destCapacity, | |
| 605 UErrorCode* status ){ | |
| 606 return unorm_normalize( | |
| 607 src, srcLength, | |
| 608 UNORM_NFKC, UNORM_UNICODE_3_2, | |
| 609 dest, destCapacity, | |
| 610 status); | |
| 611 } | |
| 612 | |
| 613 | |
| 614 /* | |
| 615 1) Map -- For each character in the input, check if it has a mapping | 602 1) Map -- For each character in the input, check if it has a mapping |
| 616 and, if so, replace it with its mapping. | 603 and, if so, replace it with its mapping. |
| 617 | 604 |
| 618 2) Normalize -- Possibly normalize the result of step 1 using Unicode | 605 2) Normalize -- Possibly normalize the result of step 1 using Unicode |
| 619 normalization. | 606 normalization. |
| 620 | 607 |
| 621 3) Prohibit -- Check for any characters that are not allowed in the | 608 3) Prohibit -- Check for any characters that are not allowed in the |
| 622 output. If any are found, return an error. | 609 output. If any are found, return an error. |
| 623 | 610 |
| 624 4) Check bidi -- Possibly check for right-to-left characters, and if | 611 4) Check bidi -- Possibly check for right-to-left characters, and if |
| (...skipping 17 matching lines...) Expand all Loading... |
| 642 | 629 |
| 643 1) The characters in section 5.8 MUST be prohibited. | 630 1) The characters in section 5.8 MUST be prohibited. |
| 644 | 631 |
| 645 2) If a string contains any RandALCat character, the string MUST NOT | 632 2) If a string contains any RandALCat character, the string MUST NOT |
| 646 contain any LCat character. | 633 contain any LCat character. |
| 647 | 634 |
| 648 3) If a string contains any RandALCat character, a RandALCat | 635 3) If a string contains any RandALCat character, a RandALCat |
| 649 character MUST be the first character of the string, and a | 636 character MUST be the first character of the string, and a |
| 650 RandALCat character MUST be the last character of the string. | 637 RandALCat character MUST be the last character of the string. |
| 651 */ | 638 */ |
| 652 | |
| 653 #define MAX_STACK_BUFFER_SIZE 300 | |
| 654 | |
| 655 | |
| 656 U_CAPI int32_t U_EXPORT2 | 639 U_CAPI int32_t U_EXPORT2 |
| 657 usprep_prepare( const UStringPrepProfile* profile, | 640 usprep_prepare( const UStringPrepProfile* profile, |
| 658 const UChar* src, int32_t srcLength, | 641 const UChar* src, int32_t srcLength, |
| 659 UChar* dest, int32_t destCapacity, | 642 UChar* dest, int32_t destCapacity, |
| 660 int32_t options, | 643 int32_t options, |
| 661 UParseError* parseError, | 644 UParseError* parseError, |
| 662 UErrorCode* status ){ | 645 UErrorCode* status ){ |
| 663 | 646 |
| 664 // check error status | 647 // check error status |
| 665 if(status == NULL || U_FAILURE(*status)){ | 648 if(U_FAILURE(*status)){ |
| 666 return 0; | 649 return 0; |
| 667 } | 650 } |
| 668 | 651 |
| 669 //check arguments | 652 //check arguments |
| 670 if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity
!=0)) { | 653 if(profile==NULL || |
| 654 (src==NULL ? srcLength!=0 : srcLength<-1) || |
| 655 (dest==NULL ? destCapacity!=0 : destCapacity<0)) { |
| 671 *status=U_ILLEGAL_ARGUMENT_ERROR; | 656 *status=U_ILLEGAL_ARGUMENT_ERROR; |
| 672 return 0; | 657 return 0; |
| 673 } | 658 } |
| 674 | 659 |
| 675 UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE]; | 660 //get the string length |
| 676 UChar *b1 = b1Stack, *b2 = b2Stack; | 661 if(srcLength < 0){ |
| 677 int32_t b1Len, b2Len=0, | 662 srcLength = u_strlen(src); |
| 678 b1Capacity = MAX_STACK_BUFFER_SIZE , | 663 } |
| 679 b2Capacity = MAX_STACK_BUFFER_SIZE; | 664 // map |
| 680 uint16_t result; | 665 UnicodeString s1; |
| 681 int32_t b2Index = 0; | 666 UChar *b1 = s1.getBuffer(srcLength); |
| 667 if(b1==NULL){ |
| 668 *status = U_MEMORY_ALLOCATION_ERROR; |
| 669 return 0; |
| 670 } |
| 671 int32_t b1Len = usprep_map(profile, src, srcLength, |
| 672 b1, s1.getCapacity(), options, parseError, status
); |
| 673 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0); |
| 674 |
| 675 if(*status == U_BUFFER_OVERFLOW_ERROR){ |
| 676 // redo processing of string |
| 677 /* we do not have enough room so grow the buffer*/ |
| 678 b1 = s1.getBuffer(b1Len); |
| 679 if(b1==NULL){ |
| 680 *status = U_MEMORY_ALLOCATION_ERROR; |
| 681 return 0; |
| 682 } |
| 683 |
| 684 *status = U_ZERO_ERROR; // reset error |
| 685 b1Len = usprep_map(profile, src, srcLength, |
| 686 b1, s1.getCapacity(), options, parseError, status); |
| 687 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0); |
| 688 } |
| 689 if(U_FAILURE(*status)){ |
| 690 return 0; |
| 691 } |
| 692 |
| 693 // normalize |
| 694 UnicodeString s2; |
| 695 if(profile->doNFKC){ |
| 696 const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status); |
| 697 FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status)); |
| 698 if(U_FAILURE(*status)){ |
| 699 return 0; |
| 700 } |
| 701 fn2.normalize(s1, s2, *status); |
| 702 }else{ |
| 703 s2.fastCopyFrom(s1); |
| 704 } |
| 705 if(U_FAILURE(*status)){ |
| 706 return 0; |
| 707 } |
| 708 |
| 709 // Prohibit and checkBiDi in one pass |
| 710 const UChar *b2 = s2.getBuffer(); |
| 711 int32_t b2Len = s2.length(); |
| 682 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTI
ON_COUNT; | 712 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTI
ON_COUNT; |
| 683 UBool leftToRight=FALSE, rightToLeft=FALSE; | 713 UBool leftToRight=FALSE, rightToLeft=FALSE; |
| 684 int32_t rtlPos =-1, ltrPos =-1; | 714 int32_t rtlPos =-1, ltrPos =-1; |
| 685 | 715 |
| 686 //get the string length | 716 for(int32_t b2Index=0; b2Index<b2Len;){ |
| 687 if(srcLength == -1){ | 717 UChar32 ch = 0; |
| 688 srcLength = u_strlen(src); | |
| 689 } | |
| 690 // map | |
| 691 b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseEr
ror, status); | |
| 692 | |
| 693 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
| 694 // redo processing of string | |
| 695 /* we do not have enough room so grow the buffer*/ | |
| 696 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
| 697 if(b1==NULL){ | |
| 698 *status = U_MEMORY_ALLOCATION_ERROR; | |
| 699 goto CLEANUP; | |
| 700 } | |
| 701 | |
| 702 *status = U_ZERO_ERROR; // reset error | |
| 703 | |
| 704 b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseErr
or, status); | |
| 705 | |
| 706 } | |
| 707 | |
| 708 // normalize | |
| 709 if(profile->doNFKC == TRUE){ | |
| 710 b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status); | |
| 711 | |
| 712 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
| 713 // redo processing of string | |
| 714 /* we do not have enough room so grow the buffer*/ | |
| 715 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
| 716 if(b2==NULL){ | |
| 717 *status = U_MEMORY_ALLOCATION_ERROR; | |
| 718 goto CLEANUP; | |
| 719 } | |
| 720 | |
| 721 *status = U_ZERO_ERROR; // reset error | |
| 722 | |
| 723 b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status); | |
| 724 | |
| 725 } | |
| 726 | |
| 727 }else{ | |
| 728 b2 = b1; | |
| 729 b2Len = b1Len; | |
| 730 } | |
| 731 | |
| 732 | |
| 733 if(U_FAILURE(*status)){ | |
| 734 goto CLEANUP; | |
| 735 } | |
| 736 | |
| 737 UChar32 ch; | |
| 738 UStringPrepType type; | |
| 739 int16_t value; | |
| 740 UBool isIndex; | |
| 741 | |
| 742 // Prohibit and checkBiDi in one pass | |
| 743 for(b2Index=0; b2Index<b2Len;){ | |
| 744 | |
| 745 ch = 0; | |
| 746 | |
| 747 U16_NEXT(b2, b2Index, b2Len, ch); | 718 U16_NEXT(b2, b2Index, b2Len, ch); |
| 748 | 719 |
| 720 uint16_t result; |
| 749 UTRIE_GET16(&profile->sprepTrie,ch,result); | 721 UTRIE_GET16(&profile->sprepTrie,ch,result); |
| 750 | 722 |
| 751 type = getValues(result, value, isIndex); | 723 int16_t value; |
| 724 UBool isIndex; |
| 725 UStringPrepType type = getValues(result, value, isIndex); |
| 752 | 726 |
| 753 if( type == USPREP_PROHIBITED || | 727 if( type == USPREP_PROHIBITED || |
| 754 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit sa
ys it the code point is prohibited*/) | 728 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit sa
ys it the code point is prohibited*/) |
| 755 ){ | 729 ){ |
| 756 *status = U_STRINGPREP_PROHIBITED_ERROR; | 730 *status = U_STRINGPREP_PROHIBITED_ERROR; |
| 757 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError); | 731 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError); |
| 758 goto CLEANUP; | 732 return 0; |
| 759 } | 733 } |
| 760 | 734 |
| 761 if(profile->checkBiDi) { | 735 if(profile->checkBiDi) { |
| 762 direction = ubidi_getClass(profile->bdp, ch); | 736 direction = ubidi_getClass(profile->bdp, ch); |
| 763 if(firstCharDir == U_CHAR_DIRECTION_COUNT){ | 737 if(firstCharDir == U_CHAR_DIRECTION_COUNT){ |
| 764 firstCharDir = direction; | 738 firstCharDir = direction; |
| 765 } | 739 } |
| 766 if(direction == U_LEFT_TO_RIGHT){ | 740 if(direction == U_LEFT_TO_RIGHT){ |
| 767 leftToRight = TRUE; | 741 leftToRight = TRUE; |
| 768 ltrPos = b2Index-1; | 742 ltrPos = b2Index-1; |
| 769 } | 743 } |
| 770 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARAB
IC){ | 744 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARAB
IC){ |
| 771 rightToLeft = TRUE; | 745 rightToLeft = TRUE; |
| 772 rtlPos = b2Index-1; | 746 rtlPos = b2Index-1; |
| 773 } | 747 } |
| 774 } | 748 } |
| 775 } | 749 } |
| 776 if(profile->checkBiDi == TRUE){ | 750 if(profile->checkBiDi == TRUE){ |
| 777 // satisfy 2 | 751 // satisfy 2 |
| 778 if( leftToRight == TRUE && rightToLeft == TRUE){ | 752 if( leftToRight == TRUE && rightToLeft == TRUE){ |
| 779 *status = U_STRINGPREP_CHECK_BIDI_ERROR; | 753 *status = U_STRINGPREP_CHECK_BIDI_ERROR; |
| 780 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseE
rror); | 754 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseE
rror); |
| 781 goto CLEANUP; | 755 return 0; |
| 782 } | 756 } |
| 783 | 757 |
| 784 //satisfy 3 | 758 //satisfy 3 |
| 785 if( rightToLeft == TRUE && | 759 if( rightToLeft == TRUE && |
| 786 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEF
T_ARABIC) && | 760 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEF
T_ARABIC) && |
| 787 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARAB
IC)) | 761 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARAB
IC)) |
| 788 ){ | 762 ){ |
| 789 *status = U_STRINGPREP_CHECK_BIDI_ERROR; | 763 *status = U_STRINGPREP_CHECK_BIDI_ERROR; |
| 790 uprv_syntaxError(b2, rtlPos, b2Len, parseError); | 764 uprv_syntaxError(b2, rtlPos, b2Len, parseError); |
| 791 return FALSE; | 765 return FALSE; |
| 792 } | 766 } |
| 793 } | 767 } |
| 794 if(b2Len>0 && b2Len <= destCapacity){ | 768 return s2.extract(dest, destCapacity, *status); |
| 795 uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR); | |
| 796 } | |
| 797 | |
| 798 CLEANUP: | |
| 799 if(b1!=b1Stack){ | |
| 800 uprv_free(b1); | |
| 801 b1=NULL; | |
| 802 } | |
| 803 | |
| 804 if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){ | |
| 805 uprv_free(b2); | |
| 806 b2=NULL; | |
| 807 } | |
| 808 return u_terminateUChars(dest, destCapacity, b2Len, status); | |
| 809 } | 769 } |
| 810 | 770 |
| 811 | 771 |
| 812 /* data swapping ------------------------------------------------------------ */ | 772 /* data swapping ------------------------------------------------------------ */ |
| 813 | 773 |
| 814 U_CAPI int32_t U_EXPORT2 | 774 U_CAPI int32_t U_EXPORT2 |
| 815 usprep_swap(const UDataSwapper *ds, | 775 usprep_swap(const UDataSwapper *ds, |
| 816 const void *inData, int32_t length, void *outData, | 776 const void *inData, int32_t length, void *outData, |
| 817 UErrorCode *pErrorCode) { | 777 UErrorCode *pErrorCode) { |
| 818 const UDataInfo *pInfo; | 778 const UDataInfo *pInfo; |
| (...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 896 offset+=count; | 856 offset+=count; |
| 897 | 857 |
| 898 /* swap the UTrie */ | 858 /* swap the UTrie */ |
| 899 count=indexes[_SPREP_INDEX_TRIE_SIZE]; | 859 count=indexes[_SPREP_INDEX_TRIE_SIZE]; |
| 900 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); | 860 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); |
| 901 offset+=count; | 861 offset+=count; |
| 902 | 862 |
| 903 /* swap the uint16_t mappingTable[] */ | 863 /* swap the uint16_t mappingTable[] */ |
| 904 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; | 864 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; |
| 905 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); | 865 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); |
| 906 offset+=count; | 866 //offset+=count; |
| 907 } | 867 } |
| 908 | 868 |
| 909 return headerSize+size; | 869 return headerSize+size; |
| 910 } | 870 } |
| 911 | 871 |
| 912 #endif /* #if !UCONFIG_NO_IDNA */ | 872 #endif /* #if !UCONFIG_NO_IDNA */ |
| OLD | NEW |