OLD | NEW |
1 /* | 1 /* |
2 ******************************************************************************* | 2 ******************************************************************************* |
3 * | 3 * |
4 * Copyright (C) 2003-2013, International Business Machines | 4 * Copyright (C) 2003-2014, International Business Machines |
5 * Corporation and others. All Rights Reserved. | 5 * Corporation and others. All Rights Reserved. |
6 * | 6 * |
7 ******************************************************************************* | 7 ******************************************************************************* |
8 * file name: usprep.cpp | 8 * file name: usprep.cpp |
9 * encoding: US-ASCII | 9 * encoding: US-ASCII |
10 * tab size: 8 (not used) | 10 * tab size: 8 (not used) |
11 * indentation:4 | 11 * indentation:4 |
12 * | 12 * |
13 * created on: 2003jul2 | 13 * created on: 2003jul2 |
14 * created by: Ram Viswanadha | 14 * created by: Ram Viswanadha |
15 */ | 15 */ |
16 | 16 |
17 #include "unicode/utypes.h" | 17 #include "unicode/utypes.h" |
18 | 18 |
19 #if !UCONFIG_NO_IDNA | 19 #if !UCONFIG_NO_IDNA |
20 | 20 |
21 #include "unicode/usprep.h" | 21 #include "unicode/usprep.h" |
22 | 22 |
23 #include "unicode/unorm.h" | 23 #include "unicode/normalizer2.h" |
24 #include "unicode/ustring.h" | 24 #include "unicode/ustring.h" |
25 #include "unicode/uchar.h" | 25 #include "unicode/uchar.h" |
26 #include "unicode/uversion.h" | 26 #include "unicode/uversion.h" |
27 #include "umutex.h" | 27 #include "umutex.h" |
28 #include "cmemory.h" | 28 #include "cmemory.h" |
29 #include "sprpimpl.h" | 29 #include "sprpimpl.h" |
30 #include "ustr_imp.h" | 30 #include "ustr_imp.h" |
31 #include "uhash.h" | 31 #include "uhash.h" |
32 #include "cstring.h" | 32 #include "cstring.h" |
33 #include "udataswp.h" | 33 #include "udataswp.h" |
34 #include "ucln_cmn.h" | 34 #include "ucln_cmn.h" |
35 #include "ubidi_props.h" | 35 #include "ubidi_props.h" |
| 36 #include "uprops.h" |
36 | 37 |
37 U_NAMESPACE_USE | 38 U_NAMESPACE_USE |
38 | 39 |
39 U_CDECL_BEGIN | 40 U_CDECL_BEGIN |
40 | 41 |
41 /* | 42 /* |
42 Static cache for already opened StringPrep profiles | 43 Static cache for already opened StringPrep profiles |
43 */ | 44 */ |
44 static UHashtable *SHARED_DATA_HASHTABLE = NULL; | 45 static UHashtable *SHARED_DATA_HASHTABLE = NULL; |
45 static icu::UInitOnce gSharedDataInitOnce; | 46 static icu::UInitOnce gSharedDataInitOnce; |
(...skipping 449 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
495 | 496 |
496 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){ | 497 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){ |
497 type = USPREP_DELETE; | 498 type = USPREP_DELETE; |
498 isIndex =FALSE; | 499 isIndex =FALSE; |
499 value = 0; | 500 value = 0; |
500 } | 501 } |
501 } | 502 } |
502 return type; | 503 return type; |
503 } | 504 } |
504 | 505 |
505 | 506 // TODO: change to writing to UnicodeString not UChar * |
506 | |
507 static int32_t | 507 static int32_t |
508 usprep_map( const UStringPrepProfile* profile, | 508 usprep_map( const UStringPrepProfile* profile, |
509 const UChar* src, int32_t srcLength, | 509 const UChar* src, int32_t srcLength, |
510 UChar* dest, int32_t destCapacity, | 510 UChar* dest, int32_t destCapacity, |
511 int32_t options, | 511 int32_t options, |
512 UParseError* parseError, | 512 UParseError* parseError, |
513 UErrorCode* status ){ | 513 UErrorCode* status ){ |
514 | 514 |
515 uint16_t result; | 515 uint16_t result; |
516 int32_t destIndex=0; | 516 int32_t destIndex=0; |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
591 dest[destIndex+1] = U16_TRAIL(ch); | 591 dest[destIndex+1] = U16_TRAIL(ch); |
592 } | 592 } |
593 destIndex +=2; | 593 destIndex +=2; |
594 } | 594 } |
595 | 595 |
596 } | 596 } |
597 | 597 |
598 return u_terminateUChars(dest, destCapacity, destIndex, status); | 598 return u_terminateUChars(dest, destCapacity, destIndex, status); |
599 } | 599 } |
600 | 600 |
601 | 601 /* |
602 static int32_t | |
603 usprep_normalize( const UChar* src, int32_t srcLength, | |
604 UChar* dest, int32_t destCapacity, | |
605 UErrorCode* status ){ | |
606 return unorm_normalize( | |
607 src, srcLength, | |
608 UNORM_NFKC, UNORM_UNICODE_3_2, | |
609 dest, destCapacity, | |
610 status); | |
611 } | |
612 | |
613 | |
614 /* | |
615 1) Map -- For each character in the input, check if it has a mapping | 602 1) Map -- For each character in the input, check if it has a mapping |
616 and, if so, replace it with its mapping. | 603 and, if so, replace it with its mapping. |
617 | 604 |
618 2) Normalize -- Possibly normalize the result of step 1 using Unicode | 605 2) Normalize -- Possibly normalize the result of step 1 using Unicode |
619 normalization. | 606 normalization. |
620 | 607 |
621 3) Prohibit -- Check for any characters that are not allowed in the | 608 3) Prohibit -- Check for any characters that are not allowed in the |
622 output. If any are found, return an error. | 609 output. If any are found, return an error. |
623 | 610 |
624 4) Check bidi -- Possibly check for right-to-left characters, and if | 611 4) Check bidi -- Possibly check for right-to-left characters, and if |
(...skipping 17 matching lines...) Expand all Loading... |
642 | 629 |
643 1) The characters in section 5.8 MUST be prohibited. | 630 1) The characters in section 5.8 MUST be prohibited. |
644 | 631 |
645 2) If a string contains any RandALCat character, the string MUST NOT | 632 2) If a string contains any RandALCat character, the string MUST NOT |
646 contain any LCat character. | 633 contain any LCat character. |
647 | 634 |
648 3) If a string contains any RandALCat character, a RandALCat | 635 3) If a string contains any RandALCat character, a RandALCat |
649 character MUST be the first character of the string, and a | 636 character MUST be the first character of the string, and a |
650 RandALCat character MUST be the last character of the string. | 637 RandALCat character MUST be the last character of the string. |
651 */ | 638 */ |
652 | |
653 #define MAX_STACK_BUFFER_SIZE 300 | |
654 | |
655 | |
656 U_CAPI int32_t U_EXPORT2 | 639 U_CAPI int32_t U_EXPORT2 |
657 usprep_prepare( const UStringPrepProfile* profile, | 640 usprep_prepare( const UStringPrepProfile* profile, |
658 const UChar* src, int32_t srcLength, | 641 const UChar* src, int32_t srcLength, |
659 UChar* dest, int32_t destCapacity, | 642 UChar* dest, int32_t destCapacity, |
660 int32_t options, | 643 int32_t options, |
661 UParseError* parseError, | 644 UParseError* parseError, |
662 UErrorCode* status ){ | 645 UErrorCode* status ){ |
663 | 646 |
664 // check error status | 647 // check error status |
665 if(status == NULL || U_FAILURE(*status)){ | 648 if(U_FAILURE(*status)){ |
666 return 0; | 649 return 0; |
667 } | 650 } |
668 | 651 |
669 //check arguments | 652 //check arguments |
670 if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity
!=0)) { | 653 if(profile==NULL || |
| 654 (src==NULL ? srcLength!=0 : srcLength<-1) || |
| 655 (dest==NULL ? destCapacity!=0 : destCapacity<0)) { |
671 *status=U_ILLEGAL_ARGUMENT_ERROR; | 656 *status=U_ILLEGAL_ARGUMENT_ERROR; |
672 return 0; | 657 return 0; |
673 } | 658 } |
674 | 659 |
675 UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE]; | 660 //get the string length |
676 UChar *b1 = b1Stack, *b2 = b2Stack; | 661 if(srcLength < 0){ |
677 int32_t b1Len, b2Len=0, | 662 srcLength = u_strlen(src); |
678 b1Capacity = MAX_STACK_BUFFER_SIZE , | 663 } |
679 b2Capacity = MAX_STACK_BUFFER_SIZE; | 664 // map |
680 uint16_t result; | 665 UnicodeString s1; |
681 int32_t b2Index = 0; | 666 UChar *b1 = s1.getBuffer(srcLength); |
| 667 if(b1==NULL){ |
| 668 *status = U_MEMORY_ALLOCATION_ERROR; |
| 669 return 0; |
| 670 } |
| 671 int32_t b1Len = usprep_map(profile, src, srcLength, |
| 672 b1, s1.getCapacity(), options, parseError, status
); |
| 673 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0); |
| 674 |
| 675 if(*status == U_BUFFER_OVERFLOW_ERROR){ |
| 676 // redo processing of string |
| 677 /* we do not have enough room so grow the buffer*/ |
| 678 b1 = s1.getBuffer(b1Len); |
| 679 if(b1==NULL){ |
| 680 *status = U_MEMORY_ALLOCATION_ERROR; |
| 681 return 0; |
| 682 } |
| 683 |
| 684 *status = U_ZERO_ERROR; // reset error |
| 685 b1Len = usprep_map(profile, src, srcLength, |
| 686 b1, s1.getCapacity(), options, parseError, status); |
| 687 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0); |
| 688 } |
| 689 if(U_FAILURE(*status)){ |
| 690 return 0; |
| 691 } |
| 692 |
| 693 // normalize |
| 694 UnicodeString s2; |
| 695 if(profile->doNFKC){ |
| 696 const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status); |
| 697 FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status)); |
| 698 if(U_FAILURE(*status)){ |
| 699 return 0; |
| 700 } |
| 701 fn2.normalize(s1, s2, *status); |
| 702 }else{ |
| 703 s2.fastCopyFrom(s1); |
| 704 } |
| 705 if(U_FAILURE(*status)){ |
| 706 return 0; |
| 707 } |
| 708 |
| 709 // Prohibit and checkBiDi in one pass |
| 710 const UChar *b2 = s2.getBuffer(); |
| 711 int32_t b2Len = s2.length(); |
682 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTI
ON_COUNT; | 712 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTI
ON_COUNT; |
683 UBool leftToRight=FALSE, rightToLeft=FALSE; | 713 UBool leftToRight=FALSE, rightToLeft=FALSE; |
684 int32_t rtlPos =-1, ltrPos =-1; | 714 int32_t rtlPos =-1, ltrPos =-1; |
685 | 715 |
686 //get the string length | 716 for(int32_t b2Index=0; b2Index<b2Len;){ |
687 if(srcLength == -1){ | 717 UChar32 ch = 0; |
688 srcLength = u_strlen(src); | |
689 } | |
690 // map | |
691 b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseEr
ror, status); | |
692 | |
693 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
694 // redo processing of string | |
695 /* we do not have enough room so grow the buffer*/ | |
696 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
697 if(b1==NULL){ | |
698 *status = U_MEMORY_ALLOCATION_ERROR; | |
699 goto CLEANUP; | |
700 } | |
701 | |
702 *status = U_ZERO_ERROR; // reset error | |
703 | |
704 b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseErr
or, status); | |
705 | |
706 } | |
707 | |
708 // normalize | |
709 if(profile->doNFKC == TRUE){ | |
710 b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status); | |
711 | |
712 if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
713 // redo processing of string | |
714 /* we do not have enough room so grow the buffer*/ | |
715 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
716 if(b2==NULL){ | |
717 *status = U_MEMORY_ALLOCATION_ERROR; | |
718 goto CLEANUP; | |
719 } | |
720 | |
721 *status = U_ZERO_ERROR; // reset error | |
722 | |
723 b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status); | |
724 | |
725 } | |
726 | |
727 }else{ | |
728 b2 = b1; | |
729 b2Len = b1Len; | |
730 } | |
731 | |
732 | |
733 if(U_FAILURE(*status)){ | |
734 goto CLEANUP; | |
735 } | |
736 | |
737 UChar32 ch; | |
738 UStringPrepType type; | |
739 int16_t value; | |
740 UBool isIndex; | |
741 | |
742 // Prohibit and checkBiDi in one pass | |
743 for(b2Index=0; b2Index<b2Len;){ | |
744 | |
745 ch = 0; | |
746 | |
747 U16_NEXT(b2, b2Index, b2Len, ch); | 718 U16_NEXT(b2, b2Index, b2Len, ch); |
748 | 719 |
| 720 uint16_t result; |
749 UTRIE_GET16(&profile->sprepTrie,ch,result); | 721 UTRIE_GET16(&profile->sprepTrie,ch,result); |
750 | 722 |
751 type = getValues(result, value, isIndex); | 723 int16_t value; |
| 724 UBool isIndex; |
| 725 UStringPrepType type = getValues(result, value, isIndex); |
752 | 726 |
753 if( type == USPREP_PROHIBITED || | 727 if( type == USPREP_PROHIBITED || |
754 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit sa
ys it the code point is prohibited*/) | 728 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit sa
ys it the code point is prohibited*/) |
755 ){ | 729 ){ |
756 *status = U_STRINGPREP_PROHIBITED_ERROR; | 730 *status = U_STRINGPREP_PROHIBITED_ERROR; |
757 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError); | 731 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError); |
758 goto CLEANUP; | 732 return 0; |
759 } | 733 } |
760 | 734 |
761 if(profile->checkBiDi) { | 735 if(profile->checkBiDi) { |
762 direction = ubidi_getClass(profile->bdp, ch); | 736 direction = ubidi_getClass(profile->bdp, ch); |
763 if(firstCharDir == U_CHAR_DIRECTION_COUNT){ | 737 if(firstCharDir == U_CHAR_DIRECTION_COUNT){ |
764 firstCharDir = direction; | 738 firstCharDir = direction; |
765 } | 739 } |
766 if(direction == U_LEFT_TO_RIGHT){ | 740 if(direction == U_LEFT_TO_RIGHT){ |
767 leftToRight = TRUE; | 741 leftToRight = TRUE; |
768 ltrPos = b2Index-1; | 742 ltrPos = b2Index-1; |
769 } | 743 } |
770 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARAB
IC){ | 744 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARAB
IC){ |
771 rightToLeft = TRUE; | 745 rightToLeft = TRUE; |
772 rtlPos = b2Index-1; | 746 rtlPos = b2Index-1; |
773 } | 747 } |
774 } | 748 } |
775 } | 749 } |
776 if(profile->checkBiDi == TRUE){ | 750 if(profile->checkBiDi == TRUE){ |
777 // satisfy 2 | 751 // satisfy 2 |
778 if( leftToRight == TRUE && rightToLeft == TRUE){ | 752 if( leftToRight == TRUE && rightToLeft == TRUE){ |
779 *status = U_STRINGPREP_CHECK_BIDI_ERROR; | 753 *status = U_STRINGPREP_CHECK_BIDI_ERROR; |
780 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseE
rror); | 754 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseE
rror); |
781 goto CLEANUP; | 755 return 0; |
782 } | 756 } |
783 | 757 |
784 //satisfy 3 | 758 //satisfy 3 |
785 if( rightToLeft == TRUE && | 759 if( rightToLeft == TRUE && |
786 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEF
T_ARABIC) && | 760 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEF
T_ARABIC) && |
787 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARAB
IC)) | 761 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARAB
IC)) |
788 ){ | 762 ){ |
789 *status = U_STRINGPREP_CHECK_BIDI_ERROR; | 763 *status = U_STRINGPREP_CHECK_BIDI_ERROR; |
790 uprv_syntaxError(b2, rtlPos, b2Len, parseError); | 764 uprv_syntaxError(b2, rtlPos, b2Len, parseError); |
791 return FALSE; | 765 return FALSE; |
792 } | 766 } |
793 } | 767 } |
794 if(b2Len>0 && b2Len <= destCapacity){ | 768 return s2.extract(dest, destCapacity, *status); |
795 uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR); | |
796 } | |
797 | |
798 CLEANUP: | |
799 if(b1!=b1Stack){ | |
800 uprv_free(b1); | |
801 b1=NULL; | |
802 } | |
803 | |
804 if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){ | |
805 uprv_free(b2); | |
806 b2=NULL; | |
807 } | |
808 return u_terminateUChars(dest, destCapacity, b2Len, status); | |
809 } | 769 } |
810 | 770 |
811 | 771 |
812 /* data swapping ------------------------------------------------------------ */ | 772 /* data swapping ------------------------------------------------------------ */ |
813 | 773 |
814 U_CAPI int32_t U_EXPORT2 | 774 U_CAPI int32_t U_EXPORT2 |
815 usprep_swap(const UDataSwapper *ds, | 775 usprep_swap(const UDataSwapper *ds, |
816 const void *inData, int32_t length, void *outData, | 776 const void *inData, int32_t length, void *outData, |
817 UErrorCode *pErrorCode) { | 777 UErrorCode *pErrorCode) { |
818 const UDataInfo *pInfo; | 778 const UDataInfo *pInfo; |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
896 offset+=count; | 856 offset+=count; |
897 | 857 |
898 /* swap the UTrie */ | 858 /* swap the UTrie */ |
899 count=indexes[_SPREP_INDEX_TRIE_SIZE]; | 859 count=indexes[_SPREP_INDEX_TRIE_SIZE]; |
900 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); | 860 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); |
901 offset+=count; | 861 offset+=count; |
902 | 862 |
903 /* swap the uint16_t mappingTable[] */ | 863 /* swap the uint16_t mappingTable[] */ |
904 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; | 864 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; |
905 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); | 865 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); |
906 offset+=count; | 866 //offset+=count; |
907 } | 867 } |
908 | 868 |
909 return headerSize+size; | 869 return headerSize+size; |
910 } | 870 } |
911 | 871 |
912 #endif /* #if !UCONFIG_NO_IDNA */ | 872 #endif /* #if !UCONFIG_NO_IDNA */ |
OLD | NEW |