| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 ****************************************************************************** | |
| 3 * Copyright (C) 1996-2012, International Business Machines * | |
| 4 * Corporation and others. All Rights Reserved. * | |
| 5 ****************************************************************************** | |
| 6 */ | |
| 7 | |
| 8 /** | |
| 9 * \file | |
| 10 * \brief Originally, added as C++ API for Collation data used to compute minLen
gthInChars | |
| 11 * \internal | |
| 12 */ | |
| 13 | |
| 14 /* | |
| 15 * Note: This module was incldued in ICU 4.0.1 as @internal technology preview f
or supporting | |
| 16 * Boyer-Moore string search API. For now, only SSearchTest depends on this modu
le. I temporaly | |
| 17 * moved the module from i18n directory to intltest, because we have no plan to
publish this | |
| 18 * as public API. (2012-12-18 yoshito) | |
| 19 */ | |
| 20 | |
| 21 #ifndef COLL_DATA_H | |
| 22 #define COLL_DATA_H | |
| 23 | |
| 24 #include "unicode/utypes.h" | |
| 25 | |
| 26 #if !UCONFIG_NO_COLLATION | |
| 27 | |
| 28 #include "unicode/ucol.h" | |
| 29 #include "unicode/unistr.h" | |
| 30 | |
| 31 /** | |
| 32 * The size of the internal CE buffer in a <code>CEList</code> object | |
| 33 */ | |
| 34 #define CELIST_BUFFER_SIZE 4 | |
| 35 | |
| 36 /** | |
| 37 * \def INSTRUMENT_CELIST | |
| 38 * Define this to enable the <code>CEList</code> objects to collect | |
| 39 * statistics. | |
| 40 */ | |
| 41 | |
| 42 /** | |
| 43 * The size of the initial list in a <code>StringList</code> object. | |
| 44 */ | |
| 45 #define STRING_LIST_BUFFER_SIZE 16 | |
| 46 | |
| 47 U_NAMESPACE_USE | |
| 48 | |
| 49 /** | |
| 50 * This object holds a list of CEs generated from a particular | |
| 51 * <code>UnicodeString</code> | |
| 52 * | |
| 53 */ | |
| 54 class CEList | |
| 55 { | |
| 56 public: | |
| 57 /** | |
| 58 * Construct a <code>CEList</code> object. | |
| 59 * | |
| 60 * @param coll - the Collator used to collect the CEs. | |
| 61 * @param string - the string for which to collect the CEs. | |
| 62 * @param status - will be set if any errors occur. | |
| 63 * | |
| 64 * Note: if on return, status is set to an error code, | |
| 65 * the only safe thing to do with this object is to call | |
| 66 * the destructor. | |
| 67 */ | |
| 68 CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status); | |
| 69 | |
| 70 /** | |
| 71 * The destructor. | |
| 72 */ | |
| 73 ~CEList(); | |
| 74 | |
| 75 /** | |
| 76 * Return the number of CEs in the list. | |
| 77 * | |
| 78 * @return the number of CEs in the list. | |
| 79 */ | |
| 80 int32_t size() const; | |
| 81 | |
| 82 /** | |
| 83 * Get a particular CE from the list. | |
| 84 * | |
| 85 * @param index - the index of the CE to return | |
| 86 * | |
| 87 * @return the CE, or <code>0</code> if <code>index</code> is out of range | |
| 88 */ | |
| 89 uint32_t get(int32_t index) const; | |
| 90 | |
| 91 /** | |
| 92 * Check if the CEs in another <code>CEList</code> match the | |
| 93 * suffix of this list starting at a give offset. | |
| 94 * | |
| 95 * @param offset - the offset of the suffix | |
| 96 * @param other - the other <code>CEList</code> | |
| 97 * | |
| 98 * @return <code>TRUE</code> if the CEs match, <code>FALSE</code> otherwise. | |
| 99 */ | |
| 100 UBool matchesAt(int32_t offset, const CEList *other) const; | |
| 101 | |
| 102 /** | |
| 103 * The index operator. | |
| 104 * | |
| 105 * @param index - the index | |
| 106 * | |
| 107 * @return a reference to the given CE in the list | |
| 108 */ | |
| 109 uint32_t &operator[](int32_t index) const; | |
| 110 | |
| 111 private: | |
| 112 void add(uint32_t ce, UErrorCode &status); | |
| 113 | |
| 114 uint32_t ceBuffer[CELIST_BUFFER_SIZE]; | |
| 115 uint32_t *ces; | |
| 116 int32_t listMax; | |
| 117 int32_t listSize; | |
| 118 }; | |
| 119 | |
| 120 /** | |
| 121 * StringList | |
| 122 * | |
| 123 * This object holds a list of <code>UnicodeString</code> objects. | |
| 124 */ | |
| 125 class StringList | |
| 126 { | |
| 127 public: | |
| 128 /** | |
| 129 * Construct an empty <code>StringList</code> | |
| 130 * | |
| 131 * @param status - will be set if any errors occur. | |
| 132 * | |
| 133 * Note: if on return, status is set to an error code, | |
| 134 * the only safe thing to do with this object is to call | |
| 135 * the destructor. | |
| 136 */ | |
| 137 StringList(UErrorCode &status); | |
| 138 | |
| 139 /** | |
| 140 * The destructor. | |
| 141 */ | |
| 142 ~StringList(); | |
| 143 | |
| 144 /** | |
| 145 * Add a string to the list. | |
| 146 * | |
| 147 * @param string - the string to add | |
| 148 * @param status - will be set if any errors occur. | |
| 149 */ | |
| 150 void add(const UnicodeString *string, UErrorCode &status); | |
| 151 | |
| 152 /** | |
| 153 * Add an array of Unicode code points to the list. | |
| 154 * | |
| 155 * @param chars - the address of the array of code points | |
| 156 * @param count - the number of code points in the array | |
| 157 * @param status - will be set if any errors occur. | |
| 158 */ | |
| 159 void add(const UChar *chars, int32_t count, UErrorCode &status); | |
| 160 | |
| 161 /** | |
| 162 * Get a particular string from the list. | |
| 163 * | |
| 164 * @param index - the index of the string | |
| 165 * | |
| 166 * @return a pointer to the <code>UnicodeString</code> or <code>NULL</code> | |
| 167 * if <code>index</code> is out of bounds. | |
| 168 */ | |
| 169 const UnicodeString *get(int32_t index) const; | |
| 170 | |
| 171 /** | |
| 172 * Get the number of stings in the list. | |
| 173 * | |
| 174 * @return the number of strings in the list. | |
| 175 */ | |
| 176 int32_t size() const; | |
| 177 | |
| 178 private: | |
| 179 UnicodeString *strings; | |
| 180 int32_t listMax; | |
| 181 int32_t listSize; | |
| 182 }; | |
| 183 | |
| 184 | |
| 185 /* | |
| 186 * Forward references to internal classes. | |
| 187 */ | |
| 188 class StringToCEsMap; | |
| 189 class CEToStringsMap; | |
| 190 | |
| 191 /** | |
| 192 * CollData | |
| 193 * | |
| 194 * This class holds the Collator-specific data needed to | |
| 195 * compute the length of the shortest string that can | |
| 196 * generate a partcular list of CEs. | |
| 197 * | |
| 198 * <code>CollData</code> objects are quite expensive to compute. Because | |
| 199 * of this, they are cached. When you call <code>CollData::open</code> it | |
| 200 * returns a reference counted cached object. When you call <code>CollData::clos
e</code> | |
| 201 * the reference count on the object is decremented but the object is not delete
d. | |
| 202 * | |
| 203 * If you do not need to reuse any unreferenced objects in the cache, you can ca
ll | |
| 204 * <code>CollData::flushCollDataCache</code>. If you no longer need any <code>Co
llData</code> | |
| 205 * objects, you can call <code>CollData::freeCollDataCache</code> | |
| 206 */ | |
| 207 class CollData | |
| 208 { | |
| 209 public: | |
| 210 /** | |
| 211 * Construct a <code>CollData</code> object. | |
| 212 * | |
| 213 * @param collator - the collator | |
| 214 * @param status - will be set if any errors occur. | |
| 215 */ | |
| 216 CollData(UCollator *collator, UErrorCode &status); | |
| 217 | |
| 218 /** | |
| 219 * The destructor. | |
| 220 */ | |
| 221 ~CollData(); | |
| 222 | |
| 223 /** | |
| 224 * Get the <code>UCollator</code> object used to create this object. | |
| 225 * The object returned may not be the exact object that was used to | |
| 226 * create this object, but it will have the same behavior. | |
| 227 */ | |
| 228 UCollator *getCollator() const; | |
| 229 | |
| 230 /** | |
| 231 * Get a list of all the strings which generate a list | |
| 232 * of CEs starting with a given CE. | |
| 233 * | |
| 234 * @param ce - the CE | |
| 235 * | |
| 236 * return a <code>StringList</code> object containing all | |
| 237 * the stirngs, or <code>NULL</code> if there are | |
| 238 * no such strings. | |
| 239 */ | |
| 240 const StringList *getStringList(int32_t ce) const; | |
| 241 | |
| 242 /** | |
| 243 * Get a list of the CEs generated by a partcular stirng. | |
| 244 * | |
| 245 * @param string - the string | |
| 246 * | |
| 247 * @return a <code>CEList</code> object containt the CEs. You | |
| 248 * must call <code>freeCEList</code> when you are finished | |
| 249 * using the <code>CEList</code>/ | |
| 250 */ | |
| 251 const CEList *getCEList(const UnicodeString *string) const; | |
| 252 | |
| 253 /** | |
| 254 * Release a <code>CEList</code> returned by <code>getCEList</code>. | |
| 255 * | |
| 256 * @param list - the <code>CEList</code> to free. | |
| 257 */ | |
| 258 void freeCEList(const CEList *list); | |
| 259 | |
| 260 /** | |
| 261 * Return the length of the shortest string that will generate | |
| 262 * the given list of CEs. | |
| 263 * | |
| 264 * @param ces - the CEs | |
| 265 * @param offset - the offset of the first CE in the list to use. | |
| 266 * | |
| 267 * @return the length of the shortest string. | |
| 268 */ | |
| 269 int32_t minLengthInChars(const CEList *ces, int32_t offset) const; | |
| 270 | |
| 271 | |
| 272 /** | |
| 273 * Return the length of the shortest string that will generate | |
| 274 * the given list of CEs. | |
| 275 * | |
| 276 * Note: the algorithm used to do this computation is recursive. To | |
| 277 * limit the amount of recursion, a "history" list is used to record | |
| 278 * the best answer starting at a particular offset in the list of CEs. | |
| 279 * If the same offset is visited again during the recursion, the answer | |
| 280 * in the history list is used. | |
| 281 * | |
| 282 * @param ces - the CEs | |
| 283 * @param offset - the offset of the first CE in the list to use. | |
| 284 * @param history - the history list. Must be at least as long as | |
| 285 * the number of cEs in the <code>CEList</code> | |
| 286 * | |
| 287 * @return the length of the shortest string. | |
| 288 */ | |
| 289 int32_t minLengthInChars(const CEList *ces, int32_t offset, int32_t *history)
const; | |
| 290 | |
| 291 private: | |
| 292 UCollator *coll; | |
| 293 CEToStringsMap *ceToCharsStartingWith; | |
| 294 | |
| 295 uint32_t minHan; | |
| 296 uint32_t maxHan; | |
| 297 | |
| 298 uint32_t jamoLimits[4]; | |
| 299 }; | |
| 300 | |
| 301 #endif // #if !UCONFIG_NO_COLLATION | |
| 302 #endif // #ifndef COLL_DATA_H | |
| OLD | NEW |