source/common/usprep.cpp - Issue 845603002: Update ICU to 54.1 step 1

Side by Side Diff: source/common/usprep.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master

Patch Set: remove unusued directories Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 *******************************************************************************	2 *******************************************************************************

3 *	3 *

4 * Copyright (C) 2003-2013, International Business Machines	4 * Copyright (C) 2003-2014, International Business Machines

5 * Corporation and others. All Rights Reserved.	5 * Corporation and others. All Rights Reserved.

6 *	6 *

7 *******************************************************************************	7 *******************************************************************************

8 * file name: usprep.cpp	8 * file name: usprep.cpp

9 * encoding: US-ASCII	9 * encoding: US-ASCII

10 * tab size: 8 (not used)	10 * tab size: 8 (not used)

11 * indentation:4	11 * indentation:4

12 *	12 *

13 * created on: 2003jul2	13 * created on: 2003jul2

14 * created by: Ram Viswanadha	14 * created by: Ram Viswanadha

15 */	15 */

16	16

17 #include "unicode/utypes.h"	17 #include "unicode/utypes.h"

18	18

19 #if !UCONFIG_NO_IDNA	19 #if !UCONFIG_NO_IDNA

20	20

21 #include "unicode/usprep.h"	21 #include "unicode/usprep.h"

22	22

23 #include "unicode/unorm.h"	23 #include "unicode/normalizer2.h"

24 #include "unicode/ustring.h"	24 #include "unicode/ustring.h"

25 #include "unicode/uchar.h"	25 #include "unicode/uchar.h"

26 #include "unicode/uversion.h"	26 #include "unicode/uversion.h"

27 #include "umutex.h"	27 #include "umutex.h"

28 #include "cmemory.h"	28 #include "cmemory.h"

29 #include "sprpimpl.h"	29 #include "sprpimpl.h"

30 #include "ustr_imp.h"	30 #include "ustr_imp.h"

31 #include "uhash.h"	31 #include "uhash.h"

32 #include "cstring.h"	32 #include "cstring.h"

33 #include "udataswp.h"	33 #include "udataswp.h"

34 #include "ucln_cmn.h"	34 #include "ucln_cmn.h"

35 #include "ubidi_props.h"	35 #include "ubidi_props.h"

	36 #include "uprops.h"

36	37

37 U_NAMESPACE_USE	38 U_NAMESPACE_USE

38	39

39 U_CDECL_BEGIN	40 U_CDECL_BEGIN

40	41

41 /*	42 /*

42 Static cache for already opened StringPrep profiles	43 Static cache for already opened StringPrep profiles

43 */	44 */

44 static UHashtable *SHARED_DATA_HASHTABLE = NULL;	45 static UHashtable *SHARED_DATA_HASHTABLE = NULL;

45 static icu::UInitOnce gSharedDataInitOnce;	46 static icu::UInitOnce gSharedDataInitOnce;

(...skipping 449 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
495	496

496 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){	497 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){

497 type = USPREP_DELETE;	498 type = USPREP_DELETE;

498 isIndex =FALSE;	499 isIndex =FALSE;

499 value = 0;	500 value = 0;

500 }	501 }

501 }	502 }

502 return type;	503 return type;

503 }	504 }

504	505

505	506 // TODO: change to writing to UnicodeString not UChar *

506

507 static int32_t	507 static int32_t

508 usprep_map( const UStringPrepProfile* profile,	508 usprep_map( const UStringPrepProfile* profile,

509 const UChar* src, int32_t srcLength,	509 const UChar* src, int32_t srcLength,

510 UChar* dest, int32_t destCapacity,	510 UChar* dest, int32_t destCapacity,

511 int32_t options,	511 int32_t options,

512 UParseError* parseError,	512 UParseError* parseError,

513 UErrorCode* status ){	513 UErrorCode* status ){

514	514

515 uint16_t result;	515 uint16_t result;

516 int32_t destIndex=0;	516 int32_t destIndex=0;

(...skipping 74 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
591 dest[destIndex+1] = U16_TRAIL(ch);	591 dest[destIndex+1] = U16_TRAIL(ch);

592 }	592 }

593 destIndex +=2;	593 destIndex +=2;

594 }	594 }

595	595

596 }	596 }

597	597

598 return u_terminateUChars(dest, destCapacity, destIndex, status);	598 return u_terminateUChars(dest, destCapacity, destIndex, status);

599 }	599 }

600	600

601	601 /*

602 static int32_t

603 usprep_normalize( const UChar* src, int32_t srcLength,

604 UChar* dest, int32_t destCapacity,

605 UErrorCode* status ){

606 return unorm_normalize(

607 src, srcLength,

608 UNORM_NFKC, UNORM_UNICODE_3_2,

609 dest, destCapacity,

610 status);

611 }

612

613

614 /*

615 1) Map -- For each character in the input, check if it has a mapping	602 1) Map -- For each character in the input, check if it has a mapping

616 and, if so, replace it with its mapping.	603 and, if so, replace it with its mapping.

617	604

618 2) Normalize -- Possibly normalize the result of step 1 using Unicode	605 2) Normalize -- Possibly normalize the result of step 1 using Unicode

619 normalization.	606 normalization.

620	607

621 3) Prohibit -- Check for any characters that are not allowed in the	608 3) Prohibit -- Check for any characters that are not allowed in the

622 output. If any are found, return an error.	609 output. If any are found, return an error.

623	610

624 4) Check bidi -- Possibly check for right-to-left characters, and if	611 4) Check bidi -- Possibly check for right-to-left characters, and if

(...skipping 17 matching lines...) Expand all Loading...
642	629

643 1) The characters in section 5.8 MUST be prohibited.	630 1) The characters in section 5.8 MUST be prohibited.

644	631

645 2) If a string contains any RandALCat character, the string MUST NOT	632 2) If a string contains any RandALCat character, the string MUST NOT

646 contain any LCat character.	633 contain any LCat character.

647	634

648 3) If a string contains any RandALCat character, a RandALCat	635 3) If a string contains any RandALCat character, a RandALCat

649 character MUST be the first character of the string, and a	636 character MUST be the first character of the string, and a

650 RandALCat character MUST be the last character of the string.	637 RandALCat character MUST be the last character of the string.

651 */	638 */

652

653 #define MAX_STACK_BUFFER_SIZE 300

654

655

656 U_CAPI int32_t U_EXPORT2	639 U_CAPI int32_t U_EXPORT2

657 usprep_prepare( const UStringPrepProfile* profile,	640 usprep_prepare( const UStringPrepProfile* profile,

658 const UChar* src, int32_t srcLength,	641 const UChar* src, int32_t srcLength,

659 UChar* dest, int32_t destCapacity,	642 UChar* dest, int32_t destCapacity,

660 int32_t options,	643 int32_t options,

661 UParseError* parseError,	644 UParseError* parseError,

662 UErrorCode* status ){	645 UErrorCode* status ){

663	646

664 // check error status	647 // check error status

665 if(status == NULL \|\| U_FAILURE(*status)){	648 if(U_FAILURE(*status)){

666 return 0;	649 return 0;

667 }	650 }

668	651

669 //check arguments	652 //check arguments

670 if(profile==NULL \|\| src==NULL \|\| srcLength<-1 \|\| (dest==NULL && destCapacity !=0)) {	653 if(profile==NULL \|\|

	654 (src==NULL ? srcLength!=0 : srcLength<-1) \|\|

	655 (dest==NULL ? destCapacity!=0 : destCapacity<0)) {

671 *status=U_ILLEGAL_ARGUMENT_ERROR;	656 *status=U_ILLEGAL_ARGUMENT_ERROR;

672 return 0;	657 return 0;

673 }	658 }

674	659

675 UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];	660 //get the string length

676 UChar b1 = b1Stack, b2 = b2Stack;	661 if(srcLength < 0){

677 int32_t b1Len, b2Len=0,	662 srcLength = u_strlen(src);

678 b1Capacity = MAX_STACK_BUFFER_SIZE ,	663 }

679 b2Capacity = MAX_STACK_BUFFER_SIZE;	664 // map

680 uint16_t result;	665 UnicodeString s1;

681 int32_t b2Index = 0;	666 UChar *b1 = s1.getBuffer(srcLength);

	667 if(b1==NULL){

	668 *status = U_MEMORY_ALLOCATION_ERROR;

	669 return 0;

	670 }

	671 int32_t b1Len = usprep_map(profile, src, srcLength,

	672 b1, s1.getCapacity(), options, parseError, status );

	673 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);

	674

	675 if(*status == U_BUFFER_OVERFLOW_ERROR){

	676 // redo processing of string

	677 /* we do not have enough room so grow the buffer*/

	678 b1 = s1.getBuffer(b1Len);

	679 if(b1==NULL){

	680 *status = U_MEMORY_ALLOCATION_ERROR;

	681 return 0;

	682 }

	683

	684 *status = U_ZERO_ERROR; // reset error

	685 b1Len = usprep_map(profile, src, srcLength,

	686 b1, s1.getCapacity(), options, parseError, status);

	687 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);

	688 }

	689 if(U_FAILURE(*status)){

	690 return 0;

	691 }

	692

	693 // normalize

	694 UnicodeString s2;

	695 if(profile->doNFKC){

	696 const Normalizer2 n2 = Normalizer2::getNFKCInstance(status);

	697 FilteredNormalizer2 fn2(n2, uniset_getUnicode32Instance(*status));

	698 if(U_FAILURE(*status)){

	699 return 0;

	700 }

	701 fn2.normalize(s1, s2, *status);

	702 }else{

	703 s2.fastCopyFrom(s1);

	704 }

	705 if(U_FAILURE(*status)){

	706 return 0;

	707 }

	708

	709 // Prohibit and checkBiDi in one pass

	710 const UChar *b2 = s2.getBuffer();

	711 int32_t b2Len = s2.length();

682 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTI ON_COUNT;	712 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTI ON_COUNT;

683 UBool leftToRight=FALSE, rightToLeft=FALSE;	713 UBool leftToRight=FALSE, rightToLeft=FALSE;

684 int32_t rtlPos =-1, ltrPos =-1;	714 int32_t rtlPos =-1, ltrPos =-1;

685	715

686 //get the string length	716 for(int32_t b2Index=0; b2Index<b2Len;){

687 if(srcLength == -1){	717 UChar32 ch = 0;

688 srcLength = u_strlen(src);

689 }

690 // map

691 b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseEr ror, status);

692

693 if(*status == U_BUFFER_OVERFLOW_ERROR){

694 // redo processing of string

695 /* we do not have enough room so grow the buffer*/

696 b1 = (UChar) uprv_malloc(b1Len U_SIZEOF_UCHAR);

697 if(b1==NULL){

698 *status = U_MEMORY_ALLOCATION_ERROR;

699 goto CLEANUP;

700 }

701

702 *status = U_ZERO_ERROR; // reset error

703

704 b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseErr or, status);

705

706 }

707

708 // normalize

709 if(profile->doNFKC == TRUE){

710 b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status);

711

712 if(*status == U_BUFFER_OVERFLOW_ERROR){

713 // redo processing of string

714 /* we do not have enough room so grow the buffer*/

715 b2 = (UChar) uprv_malloc(b2Len U_SIZEOF_UCHAR);

716 if(b2==NULL){

717 *status = U_MEMORY_ALLOCATION_ERROR;

718 goto CLEANUP;

719 }

720

721 *status = U_ZERO_ERROR; // reset error

722

723 b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status);

724

725 }

726

727 }else{

728 b2 = b1;

729 b2Len = b1Len;

730 }

731

732

733 if(U_FAILURE(*status)){

734 goto CLEANUP;

735 }

736

737 UChar32 ch;

738 UStringPrepType type;

739 int16_t value;

740 UBool isIndex;

741

742 // Prohibit and checkBiDi in one pass

743 for(b2Index=0; b2Index<b2Len;){

744

745 ch = 0;

746

747 U16_NEXT(b2, b2Index, b2Len, ch);	718 U16_NEXT(b2, b2Index, b2Len, ch);

748	719

	720 uint16_t result;

749 UTRIE_GET16(&profile->sprepTrie,ch,result);	721 UTRIE_GET16(&profile->sprepTrie,ch,result);

750	722

751 type = getValues(result, value, isIndex);	723 int16_t value;

	724 UBool isIndex;

	725 UStringPrepType type = getValues(result, value, isIndex);

752	726

753 if( type == USPREP_PROHIBITED \|\|	727 if( type == USPREP_PROHIBITED \|\|

754 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit sa ys it the code point is prohibited*/)	728 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit sa ys it the code point is prohibited*/)

755 ){	729 ){

756 *status = U_STRINGPREP_PROHIBITED_ERROR;	730 *status = U_STRINGPREP_PROHIBITED_ERROR;

757 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);	731 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);

758 goto CLEANUP;	732 return 0;

759 }	733 }

760	734

761 if(profile->checkBiDi) {	735 if(profile->checkBiDi) {

762 direction = ubidi_getClass(profile->bdp, ch);	736 direction = ubidi_getClass(profile->bdp, ch);

763 if(firstCharDir == U_CHAR_DIRECTION_COUNT){	737 if(firstCharDir == U_CHAR_DIRECTION_COUNT){

764 firstCharDir = direction;	738 firstCharDir = direction;

765 }	739 }

766 if(direction == U_LEFT_TO_RIGHT){	740 if(direction == U_LEFT_TO_RIGHT){

767 leftToRight = TRUE;	741 leftToRight = TRUE;

768 ltrPos = b2Index-1;	742 ltrPos = b2Index-1;

769 }	743 }

770 if(direction == U_RIGHT_TO_LEFT \|\| direction == U_RIGHT_TO_LEFT_ARAB IC){	744 if(direction == U_RIGHT_TO_LEFT \|\| direction == U_RIGHT_TO_LEFT_ARAB IC){

771 rightToLeft = TRUE;	745 rightToLeft = TRUE;

772 rtlPos = b2Index-1;	746 rtlPos = b2Index-1;

773 }	747 }

774 }	748 }

775 }	749 }

776 if(profile->checkBiDi == TRUE){	750 if(profile->checkBiDi == TRUE){

777 // satisfy 2	751 // satisfy 2

778 if( leftToRight == TRUE && rightToLeft == TRUE){	752 if( leftToRight == TRUE && rightToLeft == TRUE){

779 *status = U_STRINGPREP_CHECK_BIDI_ERROR;	753 *status = U_STRINGPREP_CHECK_BIDI_ERROR;

780 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseE rror);	754 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseE rror);

781 goto CLEANUP;	755 return 0;

782 }	756 }

783	757

784 //satisfy 3	758 //satisfy 3

785 if( rightToLeft == TRUE &&	759 if( rightToLeft == TRUE &&

786 !((firstCharDir == U_RIGHT_TO_LEFT \|\| firstCharDir == U_RIGHT_TO_LEF T_ARABIC) &&	760 !((firstCharDir == U_RIGHT_TO_LEFT \|\| firstCharDir == U_RIGHT_TO_LEF T_ARABIC) &&

787 (direction == U_RIGHT_TO_LEFT \|\| direction == U_RIGHT_TO_LEFT_ARAB IC))	761 (direction == U_RIGHT_TO_LEFT \|\| direction == U_RIGHT_TO_LEFT_ARAB IC))

788 ){	762 ){

789 *status = U_STRINGPREP_CHECK_BIDI_ERROR;	763 *status = U_STRINGPREP_CHECK_BIDI_ERROR;

790 uprv_syntaxError(b2, rtlPos, b2Len, parseError);	764 uprv_syntaxError(b2, rtlPos, b2Len, parseError);

791 return FALSE;	765 return FALSE;

792 }	766 }

793 }	767 }

794 if(b2Len>0 && b2Len <= destCapacity){	768 return s2.extract(dest, destCapacity, *status);

795 uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);

796 }

797

798 CLEANUP:

799 if(b1!=b1Stack){

800 uprv_free(b1);

801 b1=NULL;

802 }

803

804 if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){

805 uprv_free(b2);

806 b2=NULL;

807 }

808 return u_terminateUChars(dest, destCapacity, b2Len, status);

809 }	769 }

810	770

811	771

812 /* data swapping ------------------------------------------------------------ */	772 /* data swapping ------------------------------------------------------------ */

813	773

814 U_CAPI int32_t U_EXPORT2	774 U_CAPI int32_t U_EXPORT2

815 usprep_swap(const UDataSwapper *ds,	775 usprep_swap(const UDataSwapper *ds,

816 const void inData, int32_t length, void outData,	776 const void inData, int32_t length, void outData,

817 UErrorCode *pErrorCode) {	777 UErrorCode *pErrorCode) {

818 const UDataInfo *pInfo;	778 const UDataInfo *pInfo;

(...skipping 77 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
896 offset+=count;	856 offset+=count;

897	857

898 /* swap the UTrie */	858 /* swap the UTrie */

899 count=indexes[_SPREP_INDEX_TRIE_SIZE];	859 count=indexes[_SPREP_INDEX_TRIE_SIZE];

900 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);	860 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);

901 offset+=count;	861 offset+=count;

902	862

903 /* swap the uint16_t mappingTable[] */	863 /* swap the uint16_t mappingTable[] */

904 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];	864 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];

905 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);	865 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);

906 offset+=count;	866 //offset+=count;

907 }	867 }

908	868

909 return headerSize+size;	869 return headerSize+size;

910 }	870 }

911	871

912 #endif /* #if !UCONFIG_NO_IDNA */	872 #endif /* #if !UCONFIG_NO_IDNA */

OLD	NEW

« no previous file with comments | « source/common/ushape.cpp ('k') | source/common/ustr_cnv.c » ('j') | no next file with comments »