| Index: source/common/ustrcase.cpp
|
| diff --git a/source/common/ustrcase.cpp b/source/common/ustrcase.cpp
|
| index e687267df868c723c8f8670523fa36f733d59aab..4697160e810e08c52a21ca5b7fe2d3cf4a0db73f 100644
|
| --- a/source/common/ustrcase.cpp
|
| +++ b/source/common/ustrcase.cpp
|
| @@ -1,7 +1,7 @@
|
| /*
|
| *******************************************************************************
|
| *
|
| -* Copyright (C) 2001-2014, International Business Machines
|
| +* Copyright (C) 2001-2015, International Business Machines
|
| * Corporation and others. All Rights Reserved.
|
| *
|
| *******************************************************************************
|
| @@ -28,6 +28,7 @@
|
| #include "cmemory.h"
|
| #include "ucase.h"
|
| #include "ustr_imp.h"
|
| +#include "uassert.h"
|
|
|
| U_NAMESPACE_USE
|
|
|
| @@ -463,17 +464,39 @@ struct CmpEquivLevel {
|
| };
|
| typedef struct CmpEquivLevel CmpEquivLevel;
|
|
|
| -/* internal function */
|
| -U_CFUNC int32_t
|
| -u_strcmpFold(const UChar *s1, int32_t length1,
|
| - const UChar *s2, int32_t length2,
|
| - uint32_t options,
|
| - UErrorCode *pErrorCode) {
|
| +/**
|
| + * Internal implementation code comparing string with case fold.
|
| + * This function is called from u_strcmpFold() and u_caseInsensitivePrefixMatch().
|
| + *
|
| + * @param s1 input string 1
|
| + * @param length1 length of string 1, or -1 (NULL terminated)
|
| + * @param s2 input string 2
|
| + * @param length2 length of string 2, or -1 (NULL terminated)
|
| + * @param options compare options
|
| + * @param matchLen1 (output) length of partial prefix match in s1
|
| + * @param matchLen2 (output) length of partial prefix match in s2
|
| + * @param pErrorCode receives error status
|
| + * @return The result of comparison
|
| + */
|
| +static int32_t _cmpFold(
|
| + const UChar *s1, int32_t length1,
|
| + const UChar *s2, int32_t length2,
|
| + uint32_t options,
|
| + int32_t *matchLen1, int32_t *matchLen2,
|
| + UErrorCode *pErrorCode) {
|
| + int32_t cmpRes = 0;
|
| +
|
| const UCaseProps *csp;
|
|
|
| /* current-level start/limit - s1/s2 as current */
|
| const UChar *start1, *start2, *limit1, *limit2;
|
|
|
| + /* points to the original start address */
|
| + const UChar *org1, *org2;
|
| +
|
| + /* points to the end of match + 1 */
|
| + const UChar *m1, *m2;
|
| +
|
| /* case folding variables */
|
| const UChar *p;
|
| int32_t length;
|
| @@ -502,14 +525,20 @@ u_strcmpFold(const UChar *s1, int32_t length1,
|
| }
|
|
|
| /* initialize */
|
| - start1=s1;
|
| + if(matchLen1) {
|
| + U_ASSERT(matchLen2 !=NULL);
|
| + *matchLen1=0;
|
| + *matchLen2=0;
|
| + }
|
| +
|
| + start1=m1=org1=s1;
|
| if(length1==-1) {
|
| limit1=NULL;
|
| } else {
|
| limit1=s1+length1;
|
| }
|
|
|
| - start2=s2;
|
| + start2=m2=org2=s2;
|
| if(length2==-1) {
|
| limit2=NULL;
|
| } else {
|
| @@ -577,15 +606,59 @@ u_strcmpFold(const UChar *s1, int32_t length1,
|
| * either variable c1, c2 is -1 only if the corresponding string is finished
|
| */
|
| if(c1==c2) {
|
| + const UChar *next1, *next2;
|
| +
|
| if(c1<0) {
|
| - return 0; /* c1==c2==-1 indicating end of strings */
|
| + cmpRes=0; /* c1==c2==-1 indicating end of strings */
|
| + break;
|
| + }
|
| +
|
| + /*
|
| + * Note: Move the match positions in both strings at the same time
|
| + * only when corresponding code point(s) in the original strings
|
| + * are fully consumed. For example, when comparing s1="Fust" and
|
| + * s2="Fu\u00dfball", s2[2] is folded into "ss", and s1[2] matches
|
| + * the first code point in the case-folded data. But the second "s"
|
| + * has no matching code point in s1, so this implementation returns
|
| + * 2 as the prefix match length ("Fu").
|
| + */
|
| + next1=next2=NULL;
|
| + if(level1==0) {
|
| + next1=s1;
|
| + } else if(s1==limit1) {
|
| + /* Note: This implementation only use a single level of stack.
|
| + * If this code needs to be changed to use multiple levels
|
| + * of stacks, the code above should check if the current
|
| + * code is at the end of all stacks.
|
| + */
|
| + U_ASSERT(level1==1);
|
| +
|
| + /* is s1 at the end of the current stack? */
|
| + next1=stack1[0].s;
|
| + }
|
| +
|
| + if (next1!=NULL) {
|
| + if(level2==0) {
|
| + next2=s2;
|
| + } else if(s2==limit2) {
|
| + U_ASSERT(level2==1);
|
| +
|
| + /* is s2 at the end of the current stack? */
|
| + next2=stack2[0].s;
|
| + }
|
| + if(next2!=NULL) {
|
| + m1=next1;
|
| + m2=next2;
|
| + }
|
| }
|
| c1=c2=-1; /* make us fetch new code units */
|
| continue;
|
| } else if(c1<0) {
|
| - return -1; /* string 1 ends before string 2 */
|
| + cmpRes=-1; /* string 1 ends before string 2 */
|
| + break;
|
| } else if(c2<0) {
|
| - return 1; /* string 2 ends before string 1 */
|
| + cmpRes=1; /* string 2 ends before string 1 */
|
| + break;
|
| }
|
| /* c1!=c2 && c1>=0 && c2>=0 */
|
|
|
| @@ -644,6 +717,7 @@ u_strcmpFold(const UChar *s1, int32_t length1,
|
| * the decomposition would replace the entire code point
|
| */
|
| --s2;
|
| + --m2;
|
| c2=*(s2-1);
|
| }
|
| }
|
| @@ -689,6 +763,7 @@ u_strcmpFold(const UChar *s1, int32_t length1,
|
| * the decomposition would replace the entire code point
|
| */
|
| --s1;
|
| + --m2;
|
| c1=*(s1-1);
|
| }
|
| }
|
| @@ -757,8 +832,24 @@ u_strcmpFold(const UChar *s1, int32_t length1,
|
| }
|
| }
|
|
|
| - return c1-c2;
|
| + cmpRes=c1-c2;
|
| + break;
|
| }
|
| +
|
| + if(matchLen1) {
|
| + *matchLen1=m1-org1;
|
| + *matchLen2=m2-org2;
|
| + }
|
| + return cmpRes;
|
| +}
|
| +
|
| +/* internal function */
|
| +U_CFUNC int32_t
|
| +u_strcmpFold(const UChar *s1, int32_t length1,
|
| + const UChar *s2, int32_t length2,
|
| + uint32_t options,
|
| + UErrorCode *pErrorCode) {
|
| + return _cmpFold(s1, length1, s2, length2, options, NULL, NULL, pErrorCode);
|
| }
|
|
|
| /* public API functions */
|
| @@ -804,3 +895,14 @@ u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) {
|
| options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE),
|
| &errorCode);
|
| }
|
| +
|
| +/* internal API - detect length of shared prefix */
|
| +U_CAPI void
|
| +u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,
|
| + const UChar *s2, int32_t length2,
|
| + uint32_t options,
|
| + int32_t *matchLen1, int32_t *matchLen2,
|
| + UErrorCode *pErrorCode) {
|
| + _cmpFold(s1, length1, s2, length2, options,
|
| + matchLen1, matchLen2, pErrorCode);
|
| +}
|
|
|