source/common/ustrcase.cpp - Issue 1621843002: ICU 56 update step 1

Side by Side Diff: source/common/ustrcase.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561

Patch Set: Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 *******************************************************************************	2 *******************************************************************************

3 *	3 *

4 * Copyright (C) 2001-2014, International Business Machines	4 * Copyright (C) 2001-2015, International Business Machines

5 * Corporation and others. All Rights Reserved.	5 * Corporation and others. All Rights Reserved.

6 *	6 *

7 *******************************************************************************	7 *******************************************************************************

8 * file name: ustrcase.cpp	8 * file name: ustrcase.cpp

9 * encoding: US-ASCII	9 * encoding: US-ASCII

10 * tab size: 8 (not used)	10 * tab size: 8 (not used)

11 * indentation:4	11 * indentation:4

12 *	12 *

13 * created on: 2002feb20	13 * created on: 2002feb20

14 * created by: Markus W. Scherer	14 * created by: Markus W. Scherer

15 *	15 *

16 * Implementation file for string casing C API functions.	16 * Implementation file for string casing C API functions.

17 * Uses functions from uchar.c for basic functionality that requires access	17 * Uses functions from uchar.c for basic functionality that requires access

18 * to the Unicode Character Database (uprops.dat).	18 * to the Unicode Character Database (uprops.dat).

19 */	19 */

20	20

21 #include "unicode/utypes.h"	21 #include "unicode/utypes.h"

22 #include "unicode/brkiter.h"	22 #include "unicode/brkiter.h"

23 #include "unicode/ustring.h"	23 #include "unicode/ustring.h"

24 #include "unicode/ucasemap.h"	24 #include "unicode/ucasemap.h"

25 #include "unicode/ubrk.h"	25 #include "unicode/ubrk.h"

26 #include "unicode/utf.h"	26 #include "unicode/utf.h"

27 #include "unicode/utf16.h"	27 #include "unicode/utf16.h"

28 #include "cmemory.h"	28 #include "cmemory.h"

29 #include "ucase.h"	29 #include "ucase.h"

30 #include "ustr_imp.h"	30 #include "ustr_imp.h"

	31 #include "uassert.h"

31	32

32 U_NAMESPACE_USE	33 U_NAMESPACE_USE

33	34

34 /* string casing ------------------------------------------------------------ */	35 /* string casing ------------------------------------------------------------ */

35	36

36 /* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */	37 /* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */

37 static inline int32_t	38 static inline int32_t

38 appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,	39 appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,

39 int32_t result, const UChar *s) {	40 int32_t result, const UChar *s) {

40 UChar32 c;	41 UChar32 c;

(...skipping 415 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
456 * It makes caseless (but not canonical caseless) matches independent of	457 * It makes caseless (but not canonical caseless) matches independent of

457 * the normalization code.	458 * the normalization code.

458 */	459 */

459	460

460 /* stack element for previous-level source/decomposition pointers */	461 /* stack element for previous-level source/decomposition pointers */

461 struct CmpEquivLevel {	462 struct CmpEquivLevel {

462 const UChar start, s, *limit;	463 const UChar start, s, *limit;

463 };	464 };

464 typedef struct CmpEquivLevel CmpEquivLevel;	465 typedef struct CmpEquivLevel CmpEquivLevel;

465	466

466 /* internal function */	467 /**

467 U_CFUNC int32_t	468 * Internal implementation code comparing string with case fold.

468 u_strcmpFold(const UChar *s1, int32_t length1,	469 * This function is called from u_strcmpFold() and u_caseInsensitivePrefixMatch( ).

469 const UChar *s2, int32_t length2,	470 *

470 uint32_t options,	471 * @param s1 input string 1

471 UErrorCode *pErrorCode) {	472 * @param length1 length of string 1, or -1 (NULL terminated)

	473 * @param s2 input string 2

	474 * @param length2 length of string 2, or -1 (NULL terminated)

	475 * @param options compare options

	476 * @param matchLen1 (output) length of partial prefix match in s1

	477 * @param matchLen2 (output) length of partial prefix match in s2

	478 * @param pErrorCode receives error status

	479 * @return The result of comparison

	480 */

	481 static int32_t _cmpFold(

	482 const UChar *s1, int32_t length1,

	483 const UChar *s2, int32_t length2,

	484 uint32_t options,

	485 int32_t matchLen1, int32_t matchLen2,

	486 UErrorCode *pErrorCode) {

	487 int32_t cmpRes = 0;

	488

472 const UCaseProps *csp;	489 const UCaseProps *csp;

473	490

474 /* current-level start/limit - s1/s2 as current */	491 /* current-level start/limit - s1/s2 as current */

475 const UChar start1, start2, limit1, limit2;	492 const UChar start1, start2, limit1, limit2;

476	493

	494 /* points to the original start address */

	495 const UChar org1, org2;

	496

	497 /* points to the end of match + 1 */

	498 const UChar m1, m2;

	499

477 /* case folding variables */	500 /* case folding variables */

478 const UChar *p;	501 const UChar *p;

479 int32_t length;	502 int32_t length;

480	503

481 /* stacks of previous-level start/current/limit */	504 /* stacks of previous-level start/current/limit */

482 CmpEquivLevel stack1[2], stack2[2];	505 CmpEquivLevel stack1[2], stack2[2];

483	506

484 /* case folding buffers, only use current-level start/limit */	507 /* case folding buffers, only use current-level start/limit */

485 UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];	508 UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];

486	509

487 /* track which is the current level per string */	510 /* track which is the current level per string */

488 int32_t level1, level2;	511 int32_t level1, level2;

489	512

490 /* current code units, and code points for lookups */	513 /* current code units, and code points for lookups */

491 UChar32 c1, c2, cp1, cp2;	514 UChar32 c1, c2, cp1, cp2;

492	515

493 /* no argument error checking because this itself is not an API */	516 /* no argument error checking because this itself is not an API */

494	517

495 /*	518 /*

496 * assume that at least the option U_COMPARE_IGNORE_CASE is set	519 * assume that at least the option U_COMPARE_IGNORE_CASE is set

497 * otherwise this function would have to behave exactly as uprv_strCompare()	520 * otherwise this function would have to behave exactly as uprv_strCompare()

498 */	521 */

499 csp=ucase_getSingleton();	522 csp=ucase_getSingleton();

500 if(U_FAILURE(*pErrorCode)) {	523 if(U_FAILURE(*pErrorCode)) {

501 return 0;	524 return 0;

502 }	525 }

503	526

504 /* initialize */	527 /* initialize */

505 start1=s1;	528 if(matchLen1) {

	529 U_ASSERT(matchLen2 !=NULL);

	530 *matchLen1=0;

	531 *matchLen2=0;

	532 }

	533

	534 start1=m1=org1=s1;

506 if(length1==-1) {	535 if(length1==-1) {

507 limit1=NULL;	536 limit1=NULL;

508 } else {	537 } else {

509 limit1=s1+length1;	538 limit1=s1+length1;

510 }	539 }

511	540

512 start2=s2;	541 start2=m2=org2=s2;

513 if(length2==-1) {	542 if(length2==-1) {

514 limit2=NULL;	543 limit2=NULL;

515 } else {	544 } else {

516 limit2=s2+length2;	545 limit2=s2+length2;

517 }	546 }

518	547

519 level1=level2=0;	548 level1=level2=0;

520 c1=c2=-1;	549 c1=c2=-1;

521	550

522 /* comparison loop */	551 /* comparison loop */

(...skipping 47 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
570 s2=stack2[level2].s; /Not uninitialized/	599 s2=stack2[level2].s; /Not uninitialized/

571 limit2=stack2[level2].limit; /Not uninitialized/	600 limit2=stack2[level2].limit; /Not uninitialized/

572 }	601 }

573 }	602 }

574	603

575 /*	604 /*

576 * compare c1 and c2	605 * compare c1 and c2

577 * either variable c1, c2 is -1 only if the corresponding string is fini shed	606 * either variable c1, c2 is -1 only if the corresponding string is fini shed

578 */	607 */

579 if(c1==c2) {	608 if(c1==c2) {

	609 const UChar next1, next2;

	610

580 if(c1<0) {	611 if(c1<0) {

581 return 0; /* c1==c2==-1 indicating end of strings */	612 cmpRes=0; /* c1==c2==-1 indicating end of strings */

	613 break;

	614 }

	615

	616 /*

	617 * Note: Move the match positions in both strings at the same time

	618 * only when corresponding code point(s) in the original string s

	619 * are fully consumed. For example, when comparing s1="Fust" an d

	620 * s2="Fu\u00dfball", s2[2] is folded into "ss", and s1[2] matc hes

	621 * the first code point in the case-folded data. But the second "s"

	622 * has no matching code point in s1, so this implementation ret urns

	623 * 2 as the prefix match length ("Fu").

	624 */

	625 next1=next2=NULL;

	626 if(level1==0) {

	627 next1=s1;

	628 } else if(s1==limit1) {

	629 /* Note: This implementation only use a single level of stack.

	630 * If this code needs to be changed to use multiple levels

	631 * of stacks, the code above should check if the current

	632 * code is at the end of all stacks.

	633 */

	634 U_ASSERT(level1==1);

	635

	636 /* is s1 at the end of the current stack? */

	637 next1=stack1[0].s;

	638 }

	639

	640 if (next1!=NULL) {

	641 if(level2==0) {

	642 next2=s2;

	643 } else if(s2==limit2) {

	644 U_ASSERT(level2==1);

	645

	646 /* is s2 at the end of the current stack? */

	647 next2=stack2[0].s;

	648 }

	649 if(next2!=NULL) {

	650 m1=next1;

	651 m2=next2;

	652 }

582 }	653 }

583 c1=c2=-1; /* make us fetch new code units */	654 c1=c2=-1; /* make us fetch new code units */

584 continue;	655 continue;

585 } else if(c1<0) {	656 } else if(c1<0) {

586 return -1; /* string 1 ends before string 2 */	657 cmpRes=-1; /* string 1 ends before string 2 */

	658 break;

587 } else if(c2<0) {	659 } else if(c2<0) {

588 return 1; /* string 2 ends before string 1 */	660 cmpRes=1; /* string 2 ends before string 1 */

	661 break;

589 }	662 }

590 /* c1!=c2 && c1>=0 && c2>=0 */	663 /* c1!=c2 && c1>=0 && c2>=0 */

591	664

592 /* get complete code points for c1, c2 for lookups if either is a surrog ate */	665 /* get complete code points for c1, c2 for lookups if either is a surrog ate */

593 cp1=c1;	666 cp1=c1;

594 if(U_IS_SURROGATE(c1)) {	667 if(U_IS_SURROGATE(c1)) {

595 UChar c;	668 UChar c;

596	669

597 if(U_IS_SURROGATE_LEAD(c1)) {	670 if(U_IS_SURROGATE_LEAD(c1)) {

598 if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {	671 if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
637 ++s1;	710 ++s1;

638 } else /* isTrail(c1) */ {	711 } else /* isTrail(c1) */ {

639 /*	712 /*

640 * we got a supplementary code point when hitting its trail surrogate,	713 * we got a supplementary code point when hitting its trail surrogate,

641 * therefore the lead surrogate must have been the same as i n the other string;	714 * therefore the lead surrogate must have been the same as i n the other string;

642 * compare this decomposition with the lead surrogate in the other string	715 * compare this decomposition with the lead surrogate in the other string

643 * remember that this simulates bulk text replacement:	716 * remember that this simulates bulk text replacement:

644 * the decomposition would replace the entire code point	717 * the decomposition would replace the entire code point

645 */	718 */

646 --s2;	719 --s2;

	720 --m2;

647 c2=*(s2-1);	721 c2=*(s2-1);

648 }	722 }

649 }	723 }

650	724

651 /* push current level pointers */	725 /* push current level pointers */

652 stack1[0].start=start1;	726 stack1[0].start=start1;

653 stack1[0].s=s1;	727 stack1[0].s=s1;

654 stack1[0].limit=limit1;	728 stack1[0].limit=limit1;

655 ++level1;	729 ++level1;

656	730

(...skipping 25 matching lines...) Expand all Loading...
682 ++s2;	756 ++s2;

683 } else /* isTrail(c2) */ {	757 } else /* isTrail(c2) */ {

684 /*	758 /*

685 * we got a supplementary code point when hitting its trail surrogate,	759 * we got a supplementary code point when hitting its trail surrogate,

686 * therefore the lead surrogate must have been the same as i n the other string;	760 * therefore the lead surrogate must have been the same as i n the other string;

687 * compare this decomposition with the lead surrogate in the other string	761 * compare this decomposition with the lead surrogate in the other string

688 * remember that this simulates bulk text replacement:	762 * remember that this simulates bulk text replacement:

689 * the decomposition would replace the entire code point	763 * the decomposition would replace the entire code point

690 */	764 */

691 --s1;	765 --s1;

	766 --m2;

692 c1=*(s1-1);	767 c1=*(s1-1);

693 }	768 }

694 }	769 }

695	770

696 /* push current level pointers */	771 /* push current level pointers */

697 stack2[0].start=start2;	772 stack2[0].start=start2;

698 stack2[0].s=s2;	773 stack2[0].s=s2;

699 stack2[0].limit=limit2;	774 stack2[0].limit=limit2;

700 ++level2;	775 ++level2;

701	776

(...skipping 48 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
750 (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) \|\|	825 (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) \|\|

751 (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))	826 (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))

752 ) {	827 ) {

753 /* part of a surrogate pair, leave >=d800 */	828 /* part of a surrogate pair, leave >=d800 */

754 } else {	829 } else {

755 /* BMP code point - may be surrogate code point - make <d800 */	830 /* BMP code point - may be surrogate code point - make <d800 */

756 c2-=0x2800;	831 c2-=0x2800;

757 }	832 }

758 }	833 }

759	834

760 return c1-c2;	835 cmpRes=c1-c2;

	836 break;

761 }	837 }

	838

	839 if(matchLen1) {

	840 *matchLen1=m1-org1;

	841 *matchLen2=m2-org2;

	842 }

	843 return cmpRes;

	844 }

	845

	846 /* internal function */

	847 U_CFUNC int32_t

	848 u_strcmpFold(const UChar *s1, int32_t length1,

	849 const UChar *s2, int32_t length2,

	850 uint32_t options,

	851 UErrorCode *pErrorCode) {

	852 return _cmpFold(s1, length1, s2, length2, options, NULL, NULL, pErrorCode);

762 }	853 }

763	854

764 /* public API functions */	855 /* public API functions */

765	856

766 U_CAPI int32_t U_EXPORT2	857 U_CAPI int32_t U_EXPORT2

767 u_strCaseCompare(const UChar *s1, int32_t length1,	858 u_strCaseCompare(const UChar *s1, int32_t length1,

768 const UChar *s2, int32_t length2,	859 const UChar *s2, int32_t length2,

769 uint32_t options,	860 uint32_t options,

770 UErrorCode *pErrorCode) {	861 UErrorCode *pErrorCode) {

771 /* argument checking */	862 /* argument checking */

(...skipping 25 matching lines...) Expand all Loading...
797 &errorCode);	888 &errorCode);

798 }	889 }

799	890

800 U_CAPI int32_t U_EXPORT2	891 U_CAPI int32_t U_EXPORT2

801 u_strncasecmp(const UChar s1, const UChar s2, int32_t n, uint32_t options) {	892 u_strncasecmp(const UChar s1, const UChar s2, int32_t n, uint32_t options) {

802 UErrorCode errorCode=U_ZERO_ERROR;	893 UErrorCode errorCode=U_ZERO_ERROR;

803 return u_strcmpFold(s1, n, s2, n,	894 return u_strcmpFold(s1, n, s2, n,

804 options\|(U_COMPARE_IGNORE_CASE\|_STRNCMP_STYLE),	895 options\|(U_COMPARE_IGNORE_CASE\|_STRNCMP_STYLE),

805 &errorCode);	896 &errorCode);

806 }	897 }

	898

	899 /* internal API - detect length of shared prefix */

	900 U_CAPI void

	901 u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,

	902 const UChar *s2, int32_t length2,

	903 uint32_t options,

	904 int32_t matchLen1, int32_t matchLen2,

	905 UErrorCode *pErrorCode) {

	906 _cmpFold(s1, length1, s2, length2, options,

	907 matchLen1, matchLen2, pErrorCode);

	908 }

OLD	NEW

« no previous file with comments | « source/common/ustr_imp.h ('k') | source/common/ustrenum.h » ('j') | no next file with comments »