OLD | NEW |
(Empty) | |
| 1 /** |
| 2 *******************************************************************************
***** |
| 3 * Copyright (C) 2006-2009, International Business Machines Corporation and othe
rs. * |
| 4 * All Rights Reserved.
* |
| 5 *******************************************************************************
***** |
| 6 */ |
| 7 |
| 8 #include "unicode/utypes.h" |
| 9 |
| 10 #if !UCONFIG_NO_BREAK_ITERATION |
| 11 |
| 12 #include "brkeng.h" |
| 13 #include "dictbe.h" |
| 14 #include "triedict.h" |
| 15 #include "unicode/uchar.h" |
| 16 #include "unicode/uniset.h" |
| 17 #include "unicode/chariter.h" |
| 18 #include "unicode/ures.h" |
| 19 #include "unicode/udata.h" |
| 20 #include "unicode/putil.h" |
| 21 #include "unicode/ustring.h" |
| 22 #include "unicode/uscript.h" |
| 23 #include "uvector.h" |
| 24 #include "umutex.h" |
| 25 #include "uresimp.h" |
| 26 #include "ubrkimpl.h" |
| 27 |
| 28 U_NAMESPACE_BEGIN |
| 29 |
| 30 /* |
| 31 ****************************************************************** |
| 32 */ |
| 33 |
| 34 LanguageBreakEngine::LanguageBreakEngine() { |
| 35 } |
| 36 |
| 37 LanguageBreakEngine::~LanguageBreakEngine() { |
| 38 } |
| 39 |
| 40 /* |
| 41 ****************************************************************** |
| 42 */ |
| 43 |
| 44 LanguageBreakFactory::LanguageBreakFactory() { |
| 45 } |
| 46 |
| 47 LanguageBreakFactory::~LanguageBreakFactory() { |
| 48 } |
| 49 |
| 50 /* |
| 51 ****************************************************************** |
| 52 */ |
| 53 |
| 54 UnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) { |
| 55 for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i
) { |
| 56 fHandled[i] = 0; |
| 57 } |
| 58 } |
| 59 |
| 60 UnhandledEngine::~UnhandledEngine() { |
| 61 for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i
) { |
| 62 if (fHandled[i] != 0) { |
| 63 delete fHandled[i]; |
| 64 } |
| 65 } |
| 66 } |
| 67 |
| 68 UBool |
| 69 UnhandledEngine::handles(UChar32 c, int32_t breakType) const { |
| 70 return (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHan
dled[0])) |
| 71 && fHandled[breakType] != 0 && fHandled[breakType]->contains(c)); |
| 72 } |
| 73 |
| 74 int32_t |
| 75 UnhandledEngine::findBreaks( UText *text, |
| 76 int32_t startPos, |
| 77 int32_t endPos, |
| 78 UBool reverse, |
| 79 int32_t breakType, |
| 80 UStack &/*foundBreaks*/ ) const { |
| 81 if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled
[0]))) { |
| 82 UChar32 c = utext_current32(text); |
| 83 if (reverse) { |
| 84 while((int32_t)utext_getNativeIndex(text) > startPos && fHandled[bre
akType]->contains(c)) { |
| 85 c = utext_previous32(text); |
| 86 } |
| 87 } |
| 88 else { |
| 89 while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[break
Type]->contains(c)) { |
| 90 utext_next32(text); // TODO: recast loop to work wit
h post-increment operations. |
| 91 c = utext_current32(text); |
| 92 } |
| 93 } |
| 94 } |
| 95 return 0; |
| 96 } |
| 97 |
| 98 void |
| 99 UnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) { |
| 100 if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled
[0]))) { |
| 101 if (fHandled[breakType] == 0) { |
| 102 fHandled[breakType] = new UnicodeSet(); |
| 103 if (fHandled[breakType] == 0) { |
| 104 return; |
| 105 } |
| 106 } |
| 107 if (!fHandled[breakType]->contains(c)) { |
| 108 UErrorCode status = U_ZERO_ERROR; |
| 109 // Apply the entire script of the character. |
| 110 int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT); |
| 111 fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, sta
tus); |
| 112 } |
| 113 } |
| 114 } |
| 115 |
| 116 /* |
| 117 ****************************************************************** |
| 118 */ |
| 119 |
| 120 ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) { |
| 121 fEngines = 0; |
| 122 } |
| 123 |
| 124 ICULanguageBreakFactory::~ICULanguageBreakFactory() { |
| 125 if (fEngines != 0) { |
| 126 delete fEngines; |
| 127 } |
| 128 } |
| 129 |
| 130 U_NAMESPACE_END |
| 131 U_CDECL_BEGIN |
| 132 static void U_CALLCONV _deleteEngine(void *obj) { |
| 133 delete (const U_NAMESPACE_QUALIFIER LanguageBreakEngine *) obj; |
| 134 } |
| 135 U_CDECL_END |
| 136 U_NAMESPACE_BEGIN |
| 137 |
| 138 const LanguageBreakEngine * |
| 139 ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) { |
| 140 UBool needsInit; |
| 141 int32_t i; |
| 142 const LanguageBreakEngine *lbe = NULL; |
| 143 UErrorCode status = U_ZERO_ERROR; |
| 144 |
| 145 // TODO: The global mutex should not be used. |
| 146 // The global mutex should only be used for short periods. |
| 147 // A ICULanguageBreakFactory specific mutex should be used. |
| 148 umtx_lock(NULL); |
| 149 needsInit = (UBool)(fEngines == NULL); |
| 150 if (!needsInit) { |
| 151 i = fEngines->size(); |
| 152 while (--i >= 0) { |
| 153 lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); |
| 154 if (lbe != NULL && lbe->handles(c, breakType)) { |
| 155 break; |
| 156 } |
| 157 lbe = NULL; |
| 158 } |
| 159 } |
| 160 umtx_unlock(NULL); |
| 161 |
| 162 if (lbe != NULL) { |
| 163 return lbe; |
| 164 } |
| 165 |
| 166 if (needsInit) { |
| 167 UStack *engines = new UStack(_deleteEngine, NULL, status); |
| 168 if (U_SUCCESS(status) && engines == NULL) { |
| 169 status = U_MEMORY_ALLOCATION_ERROR; |
| 170 } |
| 171 else if (U_FAILURE(status)) { |
| 172 delete engines; |
| 173 engines = NULL; |
| 174 } |
| 175 else { |
| 176 umtx_lock(NULL); |
| 177 if (fEngines == NULL) { |
| 178 fEngines = engines; |
| 179 engines = NULL; |
| 180 } |
| 181 umtx_unlock(NULL); |
| 182 delete engines; |
| 183 } |
| 184 } |
| 185 |
| 186 if (fEngines == NULL) { |
| 187 return NULL; |
| 188 } |
| 189 |
| 190 // We didn't find an engine the first time through, or there was no |
| 191 // stack. Create an engine. |
| 192 const LanguageBreakEngine *newlbe = loadEngineFor(c, breakType); |
| 193 |
| 194 // Now get the lock, and see if someone else has created it in the |
| 195 // meantime |
| 196 umtx_lock(NULL); |
| 197 i = fEngines->size(); |
| 198 while (--i >= 0) { |
| 199 lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); |
| 200 if (lbe != NULL && lbe->handles(c, breakType)) { |
| 201 break; |
| 202 } |
| 203 lbe = NULL; |
| 204 } |
| 205 if (lbe == NULL && newlbe != NULL) { |
| 206 fEngines->push((void *)newlbe, status); |
| 207 lbe = newlbe; |
| 208 newlbe = NULL; |
| 209 } |
| 210 umtx_unlock(NULL); |
| 211 |
| 212 delete newlbe; |
| 213 |
| 214 return lbe; |
| 215 } |
| 216 |
| 217 const LanguageBreakEngine * |
| 218 ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) { |
| 219 UErrorCode status = U_ZERO_ERROR; |
| 220 UScriptCode code = uscript_getScript(c, &status); |
| 221 if (U_SUCCESS(status)) { |
| 222 const CompactTrieDictionary *dict = loadDictionaryFor(code, breakType); |
| 223 if (dict != NULL) { |
| 224 const LanguageBreakEngine *engine = NULL; |
| 225 switch(code) { |
| 226 case USCRIPT_THAI: |
| 227 engine = new ThaiBreakEngine(dict, status); |
| 228 break; |
| 229 default: |
| 230 break; |
| 231 } |
| 232 if (engine == NULL) { |
| 233 delete dict; |
| 234 } |
| 235 else if (U_FAILURE(status)) { |
| 236 delete engine; |
| 237 engine = NULL; |
| 238 } |
| 239 return engine; |
| 240 } |
| 241 } |
| 242 return NULL; |
| 243 } |
| 244 |
| 245 const CompactTrieDictionary * |
| 246 ICULanguageBreakFactory::loadDictionaryFor(UScriptCode script, int32_t /*breakTy
pe*/) { |
| 247 UErrorCode status = U_ZERO_ERROR; |
| 248 // Open root from brkitr tree. |
| 249 char dictnbuff[256]; |
| 250 char ext[4]={'\0'}; |
| 251 |
| 252 UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status); |
| 253 b = ures_getByKeyWithFallback(b, "dictionaries", b, &status); |
| 254 b = ures_getByKeyWithFallback(b, uscript_getShortName(script), b, &status); |
| 255 int32_t dictnlength = 0; |
| 256 const UChar *dictfname = ures_getString(b, &dictnlength, &status); |
| 257 if (U_SUCCESS(status) && (size_t)dictnlength >= sizeof(dictnbuff)) { |
| 258 dictnlength = 0; |
| 259 status = U_BUFFER_OVERFLOW_ERROR; |
| 260 } |
| 261 if (U_SUCCESS(status) && dictfname) { |
| 262 UChar* extStart=u_strchr(dictfname, 0x002e); |
| 263 int len = 0; |
| 264 if(extStart!=NULL){ |
| 265 len = (int)(extStart-dictfname); |
| 266 u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the
buff |
| 267 u_UCharsToChars(dictfname, dictnbuff, len); |
| 268 } |
| 269 dictnbuff[len]=0; // nul terminate |
| 270 } |
| 271 ures_close(b); |
| 272 UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext, dictnbuff, &status); |
| 273 if (U_SUCCESS(status)) { |
| 274 const CompactTrieDictionary *dict = new CompactTrieDictionary( |
| 275 file, status); |
| 276 if (U_SUCCESS(status) && dict == NULL) { |
| 277 status = U_MEMORY_ALLOCATION_ERROR; |
| 278 } |
| 279 if (U_FAILURE(status)) { |
| 280 delete dict; |
| 281 dict = NULL; |
| 282 } |
| 283 return dict; |
| 284 } |
| 285 return NULL; |
| 286 } |
| 287 |
| 288 U_NAMESPACE_END |
| 289 |
| 290 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
OLD | NEW |