source/common/ustrcase.cpp - Issue 1621843002: ICU 56 update step 1

Unified Diff: source/common/ustrcase.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561

Patch Set: Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/common/ustrcase.cpp

diff --git a/source/common/ustrcase.cpp b/source/common/ustrcase.cpp

index e687267df868c723c8f8670523fa36f733d59aab..4697160e810e08c52a21ca5b7fe2d3cf4a0db73f 100644

--- a/source/common/ustrcase.cpp

+++ b/source/common/ustrcase.cpp

@@ -1,7 +1,7 @@

*******************************************************************************

@@ -28,6 +28,7 @@

#include "cmemory.h"

#include "ucase.h"

#include "ustr_imp.h"

+#include "uassert.h"

U_NAMESPACE_USE

@@ -463,17 +464,39 @@ struct CmpEquivLevel {

};

typedef struct CmpEquivLevel CmpEquivLevel;

-/* internal function */

-U_CFUNC int32_t

-u_strcmpFold(const UChar *s1, int32_t length1,

- const UChar *s2, int32_t length2,

- uint32_t options,

- UErrorCode *pErrorCode) {

+/**

+ * Internal implementation code comparing string with case fold.

+ * This function is called from u_strcmpFold() and u_caseInsensitivePrefixMatch().

+ *

+ * @param s1 input string 1

+ * @param length1 length of string 1, or -1 (NULL terminated)

+ * @param s2 input string 2

+ * @param length2 length of string 2, or -1 (NULL terminated)

+ * @param options compare options

+ * @param matchLen1 (output) length of partial prefix match in s1

+ * @param matchLen2 (output) length of partial prefix match in s2

+ * @param pErrorCode receives error status

+ * @return The result of comparison

+ */

+static int32_t _cmpFold(

+ const UChar *s1, int32_t length1,

+ const UChar *s2, int32_t length2,

+ uint32_t options,

+ int32_t *matchLen1, int32_t *matchLen2,

+ UErrorCode *pErrorCode) {

+ int32_t cmpRes = 0;

const UCaseProps *csp;

/* current-level start/limit - s1/s2 as current */

const UChar *start1, *start2, *limit1, *limit2;

+ /* points to the original start address */

+ const UChar *org1, *org2;

+ /* points to the end of match + 1 */

+ const UChar *m1, *m2;

/* case folding variables */

const UChar *p;

int32_t length;

@@ -502,14 +525,20 @@ u_strcmpFold(const UChar *s1, int32_t length1,

}

/* initialize */

- start1=s1;

+ if(matchLen1) {

+ U_ASSERT(matchLen2 !=NULL);

+ *matchLen1=0;

+ *matchLen2=0;

+ }

+ start1=m1=org1=s1;

if(length1==-1) {

limit1=NULL;

} else {

limit1=s1+length1;

}

- start2=s2;

+ start2=m2=org2=s2;

if(length2==-1) {

limit2=NULL;

} else {

@@ -577,15 +606,59 @@ u_strcmpFold(const UChar *s1, int32_t length1,

* either variable c1, c2 is -1 only if the corresponding string is finished

if(c1==c2) {

+ const UChar *next1, *next2;

if(c1<0) {

- return 0; /* c1==c2==-1 indicating end of strings */

+ cmpRes=0; /* c1==c2==-1 indicating end of strings */

+ break;

+ }

+ /*

+ * Note: Move the match positions in both strings at the same time

+ * only when corresponding code point(s) in the original strings

+ * are fully consumed. For example, when comparing s1="Fust" and

+ * s2="Fu\u00dfball", s2[2] is folded into "ss", and s1[2] matches

+ * the first code point in the case-folded data. But the second "s"

+ * has no matching code point in s1, so this implementation returns

+ * 2 as the prefix match length ("Fu").

+ */

+ next1=next2=NULL;

+ if(level1==0) {

+ next1=s1;

+ } else if(s1==limit1) {

+ /* Note: This implementation only use a single level of stack.

+ * If this code needs to be changed to use multiple levels

+ * of stacks, the code above should check if the current

+ * code is at the end of all stacks.

+ */

+ U_ASSERT(level1==1);

+ /* is s1 at the end of the current stack? */

+ next1=stack1[0].s;

+ }

+ if (next1!=NULL) {

+ if(level2==0) {

+ next2=s2;

+ } else if(s2==limit2) {

+ U_ASSERT(level2==1);

+ /* is s2 at the end of the current stack? */

+ next2=stack2[0].s;

+ }

+ if(next2!=NULL) {

+ m1=next1;

+ m2=next2;

+ }

}

c1=c2=-1; /* make us fetch new code units */

continue;

} else if(c1<0) {

- return -1; /* string 1 ends before string 2 */

+ cmpRes=-1; /* string 1 ends before string 2 */

+ break;

} else if(c2<0) {

- return 1; /* string 2 ends before string 1 */

+ cmpRes=1; /* string 2 ends before string 1 */

+ break;

}

/* c1!=c2 && c1>=0 && c2>=0 */

@@ -644,6 +717,7 @@ u_strcmpFold(const UChar *s1, int32_t length1,

* the decomposition would replace the entire code point

--s2;

+ --m2;

c2=*(s2-1);

}

@@ -689,6 +763,7 @@ u_strcmpFold(const UChar *s1, int32_t length1,

* the decomposition would replace the entire code point

--s1;

+ --m2;

c1=*(s1-1);

}

@@ -757,8 +832,24 @@ u_strcmpFold(const UChar *s1, int32_t length1,

}

- return c1-c2;

+ cmpRes=c1-c2;

+ break;

}

+ if(matchLen1) {

+ *matchLen1=m1-org1;

+ *matchLen2=m2-org2;

+ }

+ return cmpRes;

+/* internal function */

+U_CFUNC int32_t

+u_strcmpFold(const UChar *s1, int32_t length1,

+ const UChar *s2, int32_t length2,

+ uint32_t options,

+ UErrorCode *pErrorCode) {

+ return _cmpFold(s1, length1, s2, length2, options, NULL, NULL, pErrorCode);

}

/* public API functions */

@@ -804,3 +895,14 @@ u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) {

options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE),

&errorCode);

}

+/* internal API - detect length of shared prefix */

+U_CAPI void

+u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,

+ const UChar *s2, int32_t length2,

+ uint32_t options,

+ int32_t *matchLen1, int32_t *matchLen2,

+ UErrorCode *pErrorCode) {

+ _cmpFold(s1, length1, s2, length2, options,

+ matchLen1, matchLen2, pErrorCode);

« no previous file with comments | « source/common/ustr_imp.h ('k') | source/common/ustrenum.h » ('j') | no next file with comments »