Index: source/common/ustrcase.cpp |
diff --git a/source/common/ustrcase.cpp b/source/common/ustrcase.cpp |
index e687267df868c723c8f8670523fa36f733d59aab..4697160e810e08c52a21ca5b7fe2d3cf4a0db73f 100644 |
--- a/source/common/ustrcase.cpp |
+++ b/source/common/ustrcase.cpp |
@@ -1,7 +1,7 @@ |
/* |
******************************************************************************* |
* |
-* Copyright (C) 2001-2014, International Business Machines |
+* Copyright (C) 2001-2015, International Business Machines |
* Corporation and others. All Rights Reserved. |
* |
******************************************************************************* |
@@ -28,6 +28,7 @@ |
#include "cmemory.h" |
#include "ucase.h" |
#include "ustr_imp.h" |
+#include "uassert.h" |
U_NAMESPACE_USE |
@@ -463,17 +464,39 @@ struct CmpEquivLevel { |
}; |
typedef struct CmpEquivLevel CmpEquivLevel; |
-/* internal function */ |
-U_CFUNC int32_t |
-u_strcmpFold(const UChar *s1, int32_t length1, |
- const UChar *s2, int32_t length2, |
- uint32_t options, |
- UErrorCode *pErrorCode) { |
+/** |
+ * Internal implementation code comparing string with case fold. |
+ * This function is called from u_strcmpFold() and u_caseInsensitivePrefixMatch(). |
+ * |
+ * @param s1 input string 1 |
+ * @param length1 length of string 1, or -1 (NULL terminated) |
+ * @param s2 input string 2 |
+ * @param length2 length of string 2, or -1 (NULL terminated) |
+ * @param options compare options |
+ * @param matchLen1 (output) length of partial prefix match in s1 |
+ * @param matchLen2 (output) length of partial prefix match in s2 |
+ * @param pErrorCode receives error status |
+ * @return The result of comparison |
+ */ |
+static int32_t _cmpFold( |
+ const UChar *s1, int32_t length1, |
+ const UChar *s2, int32_t length2, |
+ uint32_t options, |
+ int32_t *matchLen1, int32_t *matchLen2, |
+ UErrorCode *pErrorCode) { |
+ int32_t cmpRes = 0; |
+ |
const UCaseProps *csp; |
/* current-level start/limit - s1/s2 as current */ |
const UChar *start1, *start2, *limit1, *limit2; |
+ /* points to the original start address */ |
+ const UChar *org1, *org2; |
+ |
+ /* points to the end of match + 1 */ |
+ const UChar *m1, *m2; |
+ |
/* case folding variables */ |
const UChar *p; |
int32_t length; |
@@ -502,14 +525,20 @@ u_strcmpFold(const UChar *s1, int32_t length1, |
} |
/* initialize */ |
- start1=s1; |
+ if(matchLen1) { |
+ U_ASSERT(matchLen2 !=NULL); |
+ *matchLen1=0; |
+ *matchLen2=0; |
+ } |
+ |
+ start1=m1=org1=s1; |
if(length1==-1) { |
limit1=NULL; |
} else { |
limit1=s1+length1; |
} |
- start2=s2; |
+ start2=m2=org2=s2; |
if(length2==-1) { |
limit2=NULL; |
} else { |
@@ -577,15 +606,59 @@ u_strcmpFold(const UChar *s1, int32_t length1, |
* either variable c1, c2 is -1 only if the corresponding string is finished |
*/ |
if(c1==c2) { |
+ const UChar *next1, *next2; |
+ |
if(c1<0) { |
- return 0; /* c1==c2==-1 indicating end of strings */ |
+ cmpRes=0; /* c1==c2==-1 indicating end of strings */ |
+ break; |
+ } |
+ |
+ /* |
+ * Note: Move the match positions in both strings at the same time |
+ * only when corresponding code point(s) in the original strings |
+ * are fully consumed. For example, when comparing s1="Fust" and |
+ * s2="Fu\u00dfball", s2[2] is folded into "ss", and s1[2] matches |
+ * the first code point in the case-folded data. But the second "s" |
+ * has no matching code point in s1, so this implementation returns |
+ * 2 as the prefix match length ("Fu"). |
+ */ |
+ next1=next2=NULL; |
+ if(level1==0) { |
+ next1=s1; |
+ } else if(s1==limit1) { |
+ /* Note: This implementation only use a single level of stack. |
+ * If this code needs to be changed to use multiple levels |
+ * of stacks, the code above should check if the current |
+ * code is at the end of all stacks. |
+ */ |
+ U_ASSERT(level1==1); |
+ |
+ /* is s1 at the end of the current stack? */ |
+ next1=stack1[0].s; |
+ } |
+ |
+ if (next1!=NULL) { |
+ if(level2==0) { |
+ next2=s2; |
+ } else if(s2==limit2) { |
+ U_ASSERT(level2==1); |
+ |
+ /* is s2 at the end of the current stack? */ |
+ next2=stack2[0].s; |
+ } |
+ if(next2!=NULL) { |
+ m1=next1; |
+ m2=next2; |
+ } |
} |
c1=c2=-1; /* make us fetch new code units */ |
continue; |
} else if(c1<0) { |
- return -1; /* string 1 ends before string 2 */ |
+ cmpRes=-1; /* string 1 ends before string 2 */ |
+ break; |
} else if(c2<0) { |
- return 1; /* string 2 ends before string 1 */ |
+ cmpRes=1; /* string 2 ends before string 1 */ |
+ break; |
} |
/* c1!=c2 && c1>=0 && c2>=0 */ |
@@ -644,6 +717,7 @@ u_strcmpFold(const UChar *s1, int32_t length1, |
* the decomposition would replace the entire code point |
*/ |
--s2; |
+ --m2; |
c2=*(s2-1); |
} |
} |
@@ -689,6 +763,7 @@ u_strcmpFold(const UChar *s1, int32_t length1, |
* the decomposition would replace the entire code point |
*/ |
--s1; |
+ --m2; |
c1=*(s1-1); |
} |
} |
@@ -757,8 +832,24 @@ u_strcmpFold(const UChar *s1, int32_t length1, |
} |
} |
- return c1-c2; |
+ cmpRes=c1-c2; |
+ break; |
} |
+ |
+ if(matchLen1) { |
+ *matchLen1=m1-org1; |
+ *matchLen2=m2-org2; |
+ } |
+ return cmpRes; |
+} |
+ |
+/* internal function */ |
+U_CFUNC int32_t |
+u_strcmpFold(const UChar *s1, int32_t length1, |
+ const UChar *s2, int32_t length2, |
+ uint32_t options, |
+ UErrorCode *pErrorCode) { |
+ return _cmpFold(s1, length1, s2, length2, options, NULL, NULL, pErrorCode); |
} |
/* public API functions */ |
@@ -804,3 +895,14 @@ u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) { |
options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE), |
&errorCode); |
} |
+ |
+/* internal API - detect length of shared prefix */ |
+U_CAPI void |
+u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1, |
+ const UChar *s2, int32_t length2, |
+ uint32_t options, |
+ int32_t *matchLen1, int32_t *matchLen2, |
+ UErrorCode *pErrorCode) { |
+ _cmpFold(s1, length1, s2, length2, options, |
+ matchLen1, matchLen2, pErrorCode); |
+} |