OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * |
| 4 * Copyright (C) 1999-2010, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 ******************************************************************************* |
| 8 * file name: unistr_case.cpp |
| 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) |
| 11 * indentation:2 |
| 12 * |
| 13 * created on: 2004aug19 |
| 14 * created by: Markus W. Scherer |
| 15 * |
| 16 * Case-mapping functions moved here from unistr.cpp |
| 17 */ |
| 18 |
| 19 #include "unicode/utypes.h" |
| 20 #include "unicode/putil.h" |
| 21 #include "unicode/locid.h" |
| 22 #include "cstring.h" |
| 23 #include "cmemory.h" |
| 24 #include "unicode/ustring.h" |
| 25 #include "unicode/unistr.h" |
| 26 #include "unicode/uchar.h" |
| 27 #include "unicode/ubrk.h" |
| 28 #include "ustr_imp.h" |
| 29 #include "uhash.h" |
| 30 |
| 31 U_NAMESPACE_BEGIN |
| 32 |
| 33 //======================================== |
| 34 // Read-only implementation |
| 35 //======================================== |
| 36 |
| 37 int8_t |
| 38 UnicodeString::doCaseCompare(int32_t start, |
| 39 int32_t length, |
| 40 const UChar *srcChars, |
| 41 int32_t srcStart, |
| 42 int32_t srcLength, |
| 43 uint32_t options) const |
| 44 { |
| 45 // compare illegal string values |
| 46 // treat const UChar *srcChars==NULL as an empty string |
| 47 if(isBogus()) { |
| 48 return -1; |
| 49 } |
| 50 |
| 51 // pin indices to legal values |
| 52 pinIndices(start, length); |
| 53 |
| 54 if(srcChars == NULL) { |
| 55 srcStart = srcLength = 0; |
| 56 } |
| 57 |
| 58 // get the correct pointer |
| 59 const UChar *chars = getArrayStart(); |
| 60 |
| 61 chars += start; |
| 62 srcChars += srcStart; |
| 63 |
| 64 if(chars != srcChars) { |
| 65 UErrorCode errorCode=U_ZERO_ERROR; |
| 66 int32_t result=u_strcmpFold(chars, length, srcChars, srcLength, |
| 67 options|U_COMPARE_IGNORE_CASE, &errorCode); |
| 68 if(result!=0) { |
| 69 return (int8_t)(result >> 24 | 1); |
| 70 } |
| 71 } else { |
| 72 // get the srcLength if necessary |
| 73 if(srcLength < 0) { |
| 74 srcLength = u_strlen(srcChars + srcStart); |
| 75 } |
| 76 if(length != srcLength) { |
| 77 return (int8_t)((length - srcLength) >> 24 | 1); |
| 78 } |
| 79 } |
| 80 return 0; |
| 81 } |
| 82 |
| 83 //======================================== |
| 84 // Write implementation |
| 85 //======================================== |
| 86 |
| 87 /* |
| 88 * Implement argument checking and buffer handling |
| 89 * for string case mapping as a common function. |
| 90 */ |
| 91 |
| 92 UnicodeString & |
| 93 UnicodeString::caseMap(BreakIterator *titleIter, |
| 94 const char *locale, |
| 95 uint32_t options, |
| 96 int32_t toWhichCase) { |
| 97 if(isEmpty() || !isWritable()) { |
| 98 // nothing to do |
| 99 return *this; |
| 100 } |
| 101 |
| 102 const UCaseProps *csp=ucase_getSingleton(); |
| 103 |
| 104 // We need to allocate a new buffer for the internal string case mapping funct
ion. |
| 105 // This is very similar to how doReplace() keeps the old array pointer |
| 106 // and deletes the old array itself after it is done. |
| 107 // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new a
rray. |
| 108 UChar oldStackBuffer[US_STACKBUF_SIZE]; |
| 109 UChar *oldArray; |
| 110 int32_t oldLength; |
| 111 |
| 112 if(fFlags&kUsingStackBuffer) { |
| 113 // copy the stack buffer contents because it will be overwritten |
| 114 u_memcpy(oldStackBuffer, fUnion.fStackBuffer, fShortLength); |
| 115 oldArray = oldStackBuffer; |
| 116 oldLength = fShortLength; |
| 117 } else { |
| 118 oldArray = getArrayStart(); |
| 119 oldLength = length(); |
| 120 } |
| 121 |
| 122 int32_t capacity; |
| 123 if(oldLength <= US_STACKBUF_SIZE) { |
| 124 capacity = US_STACKBUF_SIZE; |
| 125 } else { |
| 126 capacity = oldLength + 20; |
| 127 } |
| 128 int32_t *bufferToDelete = 0; |
| 129 if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) { |
| 130 return *this; |
| 131 } |
| 132 |
| 133 // Case-map, and if the result is too long, then reallocate and repeat. |
| 134 UErrorCode errorCode; |
| 135 int32_t newLength; |
| 136 do { |
| 137 errorCode = U_ZERO_ERROR; |
| 138 if(toWhichCase==TO_LOWER) { |
| 139 newLength = ustr_toLower(csp, getArrayStart(), getCapacity(), |
| 140 oldArray, oldLength, |
| 141 locale, &errorCode); |
| 142 } else if(toWhichCase==TO_UPPER) { |
| 143 newLength = ustr_toUpper(csp, getArrayStart(), getCapacity(), |
| 144 oldArray, oldLength, |
| 145 locale, &errorCode); |
| 146 } else if(toWhichCase==TO_TITLE) { |
| 147 #if UCONFIG_NO_BREAK_ITERATION |
| 148 errorCode=U_UNSUPPORTED_ERROR; |
| 149 #else |
| 150 newLength = ustr_toTitle(csp, getArrayStart(), getCapacity(), |
| 151 oldArray, oldLength, |
| 152 (UBreakIterator *)titleIter, locale, options, &er
rorCode); |
| 153 #endif |
| 154 } else { |
| 155 newLength = ustr_foldCase(csp, getArrayStart(), getCapacity(), |
| 156 oldArray, oldLength, |
| 157 options, |
| 158 &errorCode); |
| 159 } |
| 160 setLength(newLength); |
| 161 } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, ne
wLength, FALSE)); |
| 162 |
| 163 if (bufferToDelete) { |
| 164 uprv_free(bufferToDelete); |
| 165 } |
| 166 if(U_FAILURE(errorCode)) { |
| 167 setToBogus(); |
| 168 } |
| 169 return *this; |
| 170 } |
| 171 |
| 172 UnicodeString & |
| 173 UnicodeString::toLower() { |
| 174 return caseMap(0, Locale::getDefault().getName(), 0, TO_LOWER); |
| 175 } |
| 176 |
| 177 UnicodeString & |
| 178 UnicodeString::toLower(const Locale &locale) { |
| 179 return caseMap(0, locale.getName(), 0, TO_LOWER); |
| 180 } |
| 181 |
| 182 UnicodeString & |
| 183 UnicodeString::toUpper() { |
| 184 return caseMap(0, Locale::getDefault().getName(), 0, TO_UPPER); |
| 185 } |
| 186 |
| 187 UnicodeString & |
| 188 UnicodeString::toUpper(const Locale &locale) { |
| 189 return caseMap(0, locale.getName(), 0, TO_UPPER); |
| 190 } |
| 191 |
| 192 #if !UCONFIG_NO_BREAK_ITERATION |
| 193 |
| 194 UnicodeString & |
| 195 UnicodeString::toTitle(BreakIterator *titleIter) { |
| 196 return caseMap(titleIter, Locale::getDefault().getName(), 0, TO_TITLE); |
| 197 } |
| 198 |
| 199 UnicodeString & |
| 200 UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) { |
| 201 return caseMap(titleIter, locale.getName(), 0, TO_TITLE); |
| 202 } |
| 203 |
| 204 UnicodeString & |
| 205 UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t
options) { |
| 206 return caseMap(titleIter, locale.getName(), options, TO_TITLE); |
| 207 } |
| 208 |
| 209 #endif |
| 210 |
| 211 UnicodeString & |
| 212 UnicodeString::foldCase(uint32_t options) { |
| 213 /* The Locale parameter isn't used. Use "" instead. */ |
| 214 return caseMap(0, "", options, FOLD_CASE); |
| 215 } |
| 216 |
| 217 U_NAMESPACE_END |
| 218 |
| 219 // Defined here to reduce dependencies on break iterator |
| 220 U_CAPI int32_t U_EXPORT2 |
| 221 uhash_hashCaselessUnicodeString(const UHashTok key) { |
| 222 U_NAMESPACE_USE |
| 223 const UnicodeString *str = (const UnicodeString*) key.pointer; |
| 224 if (str == NULL) { |
| 225 return 0; |
| 226 } |
| 227 // Inefficient; a better way would be to have a hash function in |
| 228 // UnicodeString that does case folding on the fly. |
| 229 UnicodeString copy(*str); |
| 230 return copy.foldCase().hashCode(); |
| 231 } |
| 232 |
| 233 // Defined here to reduce dependencies on break iterator |
| 234 U_CAPI UBool U_EXPORT2 |
| 235 uhash_compareCaselessUnicodeString(const UHashTok key1, const UHashTok key2) { |
| 236 U_NAMESPACE_USE |
| 237 const UnicodeString *str1 = (const UnicodeString*) key1.pointer; |
| 238 const UnicodeString *str2 = (const UnicodeString*) key2.pointer; |
| 239 if (str1 == str2) { |
| 240 return TRUE; |
| 241 } |
| 242 if (str1 == NULL || str2 == NULL) { |
| 243 return FALSE; |
| 244 } |
| 245 return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0; |
| 246 } |
| 247 |
OLD | NEW |