OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ********************************************************************** |
| 3 * Copyright (c) 2001-2008, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. |
| 5 ********************************************************************** |
| 6 * Date Name Description |
| 7 * 08/10/2001 aliu Creation. |
| 8 ********************************************************************** |
| 9 */ |
| 10 #ifndef _TRANSREG_H |
| 11 #define _TRANSREG_H |
| 12 |
| 13 #include "unicode/utypes.h" |
| 14 |
| 15 #if !UCONFIG_NO_TRANSLITERATION |
| 16 |
| 17 #include "unicode/uobject.h" |
| 18 #include "unicode/translit.h" |
| 19 #include "hash.h" |
| 20 #include "uvector.h" |
| 21 |
| 22 U_NAMESPACE_BEGIN |
| 23 |
| 24 class TransliteratorEntry; |
| 25 class TransliteratorSpec; |
| 26 class UnicodeString; |
| 27 |
| 28 //------------------------------------------------------------------ |
| 29 // TransliteratorAlias |
| 30 //------------------------------------------------------------------ |
| 31 |
| 32 /** |
| 33 * A TransliteratorAlias object is returned by get() if the given ID |
| 34 * actually translates into something else. The caller then invokes |
| 35 * the create() method on the alias to create the actual |
| 36 * transliterator, and deletes the alias. |
| 37 * |
| 38 * Why all the shenanigans? To prevent circular calls between |
| 39 * the registry code and the transliterator code that deadlocks. |
| 40 */ |
| 41 class TransliteratorAlias : public UMemory { |
| 42 public: |
| 43 /** |
| 44 * Construct a simple alias (type == SIMPLE) |
| 45 * @param aliasID the given id. |
| 46 */ |
| 47 TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compound
Filter); |
| 48 |
| 49 /** |
| 50 * Construct a compound RBT alias (type == COMPOUND) |
| 51 */ |
| 52 TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks, |
| 53 UVector* adoptedTransliterators, |
| 54 const UnicodeSet* compoundFilter); |
| 55 |
| 56 /** |
| 57 * Construct a rules alias (type = RULES) |
| 58 */ |
| 59 TransliteratorAlias(const UnicodeString& theID, |
| 60 const UnicodeString& rules, |
| 61 UTransDirection dir); |
| 62 |
| 63 ~TransliteratorAlias(); |
| 64 |
| 65 /** |
| 66 * The whole point of create() is that the caller must invoke |
| 67 * it when the registry mutex is NOT held, to prevent deadlock. |
| 68 * It may only be called once. |
| 69 * |
| 70 * Note: Only call create() if isRuleBased() returns FALSE. |
| 71 * |
| 72 * This method must be called *outside* of the TransliteratorRegistry |
| 73 * mutex. |
| 74 */ |
| 75 Transliterator* create(UParseError&, UErrorCode&); |
| 76 |
| 77 /** |
| 78 * Return TRUE if this alias is rule-based. If so, the caller |
| 79 * must call parse() on it, then call TransliteratorRegistry::reget(). |
| 80 */ |
| 81 UBool isRuleBased() const; |
| 82 |
| 83 /** |
| 84 * If isRuleBased() returns TRUE, then the caller must call this |
| 85 * method, followed by TransliteratorRegistry::reget(). The latter |
| 86 * method must be called inside the TransliteratorRegistry mutex. |
| 87 * |
| 88 * Note: Only call parse() if isRuleBased() returns TRUE. |
| 89 * |
| 90 * This method must be called *outside* of the TransliteratorRegistry |
| 91 * mutex, because it can instantiate Transliterators embedded in |
| 92 * the rules via the "&Latin-Arabic()" syntax. |
| 93 */ |
| 94 void parse(TransliteratorParser& parser, |
| 95 UParseError& pe, UErrorCode& ec) const; |
| 96 |
| 97 private: |
| 98 // We actually come in three flavors: |
| 99 // 1. Simple alias |
| 100 // Here aliasID is the alias string. Everything else is |
| 101 // null, zero, empty. |
| 102 // 2. CompoundRBT |
| 103 // Here ID is the ID, aliasID is the idBlock, trans is the |
| 104 // contained RBT, and idSplitPoint is the offet in aliasID |
| 105 // where the contained RBT goes. compoundFilter is the |
| 106 // compound filter, and it is _not_ owned. |
| 107 // 3. Rules |
| 108 // Here ID is the ID, aliasID is the rules string. |
| 109 // idSplitPoint is the UTransDirection. |
| 110 UnicodeString ID; |
| 111 UnicodeString aliasesOrRules; |
| 112 UVector* transes; // owned |
| 113 const UnicodeSet* compoundFilter; // alias |
| 114 UTransDirection direction; |
| 115 enum { SIMPLE, COMPOUND, RULES } type; |
| 116 |
| 117 TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of
this class |
| 118 TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid
copying of this class |
| 119 }; |
| 120 |
| 121 |
| 122 /** |
| 123 * A registry of system transliterators. This is the data structure |
| 124 * that implements the mapping between transliterator IDs and the data |
| 125 * or function pointers used to create the corresponding |
| 126 * transliterators. There is one instance of the registry that is |
| 127 * created statically. |
| 128 * |
| 129 * The registry consists of a dynamic component -- a hashtable -- and |
| 130 * a static component -- locale resource bundles. The dynamic store |
| 131 * is semantically overlaid on the static store, so the static mapping |
| 132 * can be dynamically overridden. |
| 133 * |
| 134 * This is an internal class that is only used by Transliterator. |
| 135 * Transliterator maintains one static instance of this class and |
| 136 * delegates all registry-related operations to it. |
| 137 * |
| 138 * @author Alan Liu |
| 139 */ |
| 140 class TransliteratorRegistry : public UMemory { |
| 141 |
| 142 public: |
| 143 |
| 144 /** |
| 145 * Contructor |
| 146 * @param status Output param set to success/failure code. |
| 147 */ |
| 148 TransliteratorRegistry(UErrorCode& status); |
| 149 |
| 150 /** |
| 151 * Nonvirtual destructor -- this class is not subclassable. |
| 152 */ |
| 153 ~TransliteratorRegistry(); |
| 154 |
| 155 //------------------------------------------------------------------ |
| 156 // Basic public API |
| 157 //------------------------------------------------------------------ |
| 158 |
| 159 /** |
| 160 * Given a simple ID (forward direction, no inline filter, not |
| 161 * compound) attempt to instantiate it from the registry. Return |
| 162 * 0 on failure. |
| 163 * |
| 164 * Return a non-NULL aliasReturn value if the ID points to an alias. |
| 165 * We cannot instantiate it ourselves because the alias may contain |
| 166 * filters or compounds, which we do not understand. Caller should |
| 167 * make aliasReturn NULL before calling. |
| 168 * @param ID the given ID |
| 169 * @param aliasReturn output param to receive TransliteratorAlias; |
| 170 * should be NULL on entry |
| 171 * @param parseError Struct to recieve information on position |
| 172 * of error if an error is encountered |
| 173 * @param status Output param set to success/failure code. |
| 174 */ |
| 175 Transliterator* get(const UnicodeString& ID, |
| 176 TransliteratorAlias*& aliasReturn, |
| 177 UErrorCode& status); |
| 178 |
| 179 /** |
| 180 * The caller must call this after calling get(), if [a] calling get() |
| 181 * returns an alias, and [b] the alias is rule based. In that |
| 182 * situation the caller must call alias->parse() to do the parsing |
| 183 * OUTSIDE THE REGISTRY MUTEX, then call this method to retry |
| 184 * instantiating the transliterator. |
| 185 * |
| 186 * Note: Another alias might be returned by this method. |
| 187 * |
| 188 * This method (like all public methods of this class) must be called |
| 189 * from within the TransliteratorRegistry mutex. |
| 190 * |
| 191 * @param aliasReturn output param to receive TransliteratorAlias; |
| 192 * should be NULL on entry |
| 193 */ |
| 194 Transliterator* reget(const UnicodeString& ID, |
| 195 TransliteratorParser& parser, |
| 196 TransliteratorAlias*& aliasReturn, |
| 197 UErrorCode& status); |
| 198 |
| 199 /** |
| 200 * Register a prototype (adopted). This adds an entry to the |
| 201 * dynamic store, or replaces an existing entry. Any entry in the |
| 202 * underlying static locale resource store is masked. |
| 203 */ |
| 204 void put(Transliterator* adoptedProto, |
| 205 UBool visible, |
| 206 UErrorCode& ec); |
| 207 |
| 208 /** |
| 209 * Register an ID and a factory function pointer. This adds an |
| 210 * entry to the dynamic store, or replaces an existing entry. Any |
| 211 * entry in the underlying static locale resource store is masked. |
| 212 */ |
| 213 void put(const UnicodeString& ID, |
| 214 Transliterator::Factory factory, |
| 215 Transliterator::Token context, |
| 216 UBool visible, |
| 217 UErrorCode& ec); |
| 218 |
| 219 /** |
| 220 * Register an ID and a resource name. This adds an entry to the |
| 221 * dynamic store, or replaces an existing entry. Any entry in the |
| 222 * underlying static locale resource store is masked. |
| 223 */ |
| 224 void put(const UnicodeString& ID, |
| 225 const UnicodeString& resourceName, |
| 226 UTransDirection dir, |
| 227 UBool readonlyResourceAlias, |
| 228 UBool visible, |
| 229 UErrorCode& ec); |
| 230 |
| 231 /** |
| 232 * Register an ID and an alias ID. This adds an entry to the |
| 233 * dynamic store, or replaces an existing entry. Any entry in the |
| 234 * underlying static locale resource store is masked. |
| 235 */ |
| 236 void put(const UnicodeString& ID, |
| 237 const UnicodeString& alias, |
| 238 UBool readonlyAliasAlias, |
| 239 UBool visible, |
| 240 UErrorCode& ec); |
| 241 |
| 242 /** |
| 243 * Unregister an ID. This removes an entry from the dynamic store |
| 244 * if there is one. The static locale resource store is |
| 245 * unaffected. |
| 246 * @param ID the given ID. |
| 247 */ |
| 248 void remove(const UnicodeString& ID); |
| 249 |
| 250 //------------------------------------------------------------------ |
| 251 // Public ID and spec management |
| 252 //------------------------------------------------------------------ |
| 253 |
| 254 /** |
| 255 * Return a StringEnumeration over the IDs currently registered |
| 256 * with the system. |
| 257 * @internal |
| 258 */ |
| 259 StringEnumeration* getAvailableIDs() const; |
| 260 |
| 261 /** |
| 262 * == OBSOLETE - remove in ICU 3.4 == |
| 263 * Return the number of IDs currently registered with the system. |
| 264 * To retrieve the actual IDs, call getAvailableID(i) with |
| 265 * i from 0 to countAvailableIDs() - 1. |
| 266 * @return the number of IDs currently registered with the system. |
| 267 * @internal |
| 268 */ |
| 269 int32_t countAvailableIDs(void) const; |
| 270 |
| 271 /** |
| 272 * == OBSOLETE - remove in ICU 3.4 == |
| 273 * Return the index-th available ID. index must be between 0 |
| 274 * and countAvailableIDs() - 1, inclusive. If index is out of |
| 275 * range, the result of getAvailableID(0) is returned. |
| 276 * @param index the given index. |
| 277 * @return the index-th available ID. index must be between 0 |
| 278 * and countAvailableIDs() - 1, inclusive. If index is out of |
| 279 * range, the result of getAvailableID(0) is returned. |
| 280 * @internal |
| 281 */ |
| 282 const UnicodeString& getAvailableID(int32_t index) const; |
| 283 |
| 284 /** |
| 285 * Return the number of registered source specifiers. |
| 286 * @return the number of registered source specifiers. |
| 287 */ |
| 288 int32_t countAvailableSources(void) const; |
| 289 |
| 290 /** |
| 291 * Return a registered source specifier. |
| 292 * @param index which specifier to return, from 0 to n-1, where |
| 293 * n = countAvailableSources() |
| 294 * @param result fill-in paramter to receive the source specifier. |
| 295 * If index is out of range, result will be empty. |
| 296 * @return reference to result |
| 297 */ |
| 298 UnicodeString& getAvailableSource(int32_t index, |
| 299 UnicodeString& result) const; |
| 300 |
| 301 /** |
| 302 * Return the number of registered target specifiers for a given |
| 303 * source specifier. |
| 304 * @param source the given source specifier. |
| 305 * @return the number of registered target specifiers for a given |
| 306 * source specifier. |
| 307 */ |
| 308 int32_t countAvailableTargets(const UnicodeString& source) const; |
| 309 |
| 310 /** |
| 311 * Return a registered target specifier for a given source. |
| 312 * @param index which specifier to return, from 0 to n-1, where |
| 313 * n = countAvailableTargets(source) |
| 314 * @param source the source specifier |
| 315 * @param result fill-in paramter to receive the target specifier. |
| 316 * If source is invalid or if index is out of range, result will |
| 317 * be empty. |
| 318 * @return reference to result |
| 319 */ |
| 320 UnicodeString& getAvailableTarget(int32_t index, |
| 321 const UnicodeString& source, |
| 322 UnicodeString& result) const; |
| 323 |
| 324 /** |
| 325 * Return the number of registered variant specifiers for a given |
| 326 * source-target pair. There is always at least one variant: If |
| 327 * just source-target is registered, then the single variant |
| 328 * NO_VARIANT is returned. If source-target/variant is registered |
| 329 * then that variant is returned. |
| 330 * @param source the source specifiers |
| 331 * @param target the target specifiers |
| 332 * @return the number of registered variant specifiers for a given |
| 333 * source-target pair. |
| 334 */ |
| 335 int32_t countAvailableVariants(const UnicodeString& source, |
| 336 const UnicodeString& target) const; |
| 337 |
| 338 /** |
| 339 * Return a registered variant specifier for a given source-target |
| 340 * pair. If NO_VARIANT is one of the variants, then it will be |
| 341 * at index 0. |
| 342 * @param index which specifier to return, from 0 to n-1, where |
| 343 * n = countAvailableVariants(source, target) |
| 344 * @param source the source specifier |
| 345 * @param target the target specifier |
| 346 * @param result fill-in paramter to receive the variant |
| 347 * specifier. If source is invalid or if target is invalid or if |
| 348 * index is out of range, result will be empty. |
| 349 * @return reference to result |
| 350 */ |
| 351 UnicodeString& getAvailableVariant(int32_t index, |
| 352 const UnicodeString& source, |
| 353 const UnicodeString& target, |
| 354 UnicodeString& result) const; |
| 355 |
| 356 private: |
| 357 |
| 358 //---------------------------------------------------------------- |
| 359 // Private implementation |
| 360 //---------------------------------------------------------------- |
| 361 |
| 362 TransliteratorEntry* find(const UnicodeString& ID); |
| 363 |
| 364 TransliteratorEntry* find(UnicodeString& source, |
| 365 UnicodeString& target, |
| 366 UnicodeString& variant); |
| 367 |
| 368 TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src, |
| 369 const TransliteratorSpec& trg, |
| 370 const UnicodeString& variant) const; |
| 371 |
| 372 TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src, |
| 373 const TransliteratorSpec& trg, |
| 374 const UnicodeString& variant); |
| 375 |
| 376 static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpe
n, |
| 377 const TransliteratorSpec& specToFind, |
| 378 const UnicodeString& variant, |
| 379 UTransDirection direction); |
| 380 |
| 381 void registerEntry(const UnicodeString& source, |
| 382 const UnicodeString& target, |
| 383 const UnicodeString& variant, |
| 384 TransliteratorEntry* adopted, |
| 385 UBool visible); |
| 386 |
| 387 void registerEntry(const UnicodeString& ID, |
| 388 TransliteratorEntry* adopted, |
| 389 UBool visible); |
| 390 |
| 391 void registerEntry(const UnicodeString& ID, |
| 392 const UnicodeString& source, |
| 393 const UnicodeString& target, |
| 394 const UnicodeString& variant, |
| 395 TransliteratorEntry* adopted, |
| 396 UBool visible); |
| 397 |
| 398 void registerSTV(const UnicodeString& source, |
| 399 const UnicodeString& target, |
| 400 const UnicodeString& variant); |
| 401 |
| 402 void removeSTV(const UnicodeString& source, |
| 403 const UnicodeString& target, |
| 404 const UnicodeString& variant); |
| 405 |
| 406 Transliterator* instantiateEntry(const UnicodeString& ID, |
| 407 TransliteratorEntry *entry, |
| 408 TransliteratorAlias*& aliasReturn, |
| 409 UErrorCode& status); |
| 410 |
| 411 /** |
| 412 * A StringEnumeration over the registered IDs in this object. |
| 413 */ |
| 414 class Enumeration : public StringEnumeration { |
| 415 public: |
| 416 Enumeration(const TransliteratorRegistry& reg); |
| 417 virtual ~Enumeration(); |
| 418 virtual int32_t count(UErrorCode& status) const; |
| 419 virtual const UnicodeString* snext(UErrorCode& status); |
| 420 virtual void reset(UErrorCode& status); |
| 421 static UClassID U_EXPORT2 getStaticClassID(); |
| 422 virtual UClassID getDynamicClassID() const; |
| 423 private: |
| 424 int32_t index; |
| 425 const TransliteratorRegistry& reg; |
| 426 }; |
| 427 friend class Enumeration; |
| 428 |
| 429 private: |
| 430 |
| 431 /** |
| 432 * Dynamic registry mapping full IDs to Entry objects. This |
| 433 * contains both public and internal entities. The visibility is |
| 434 * controlled by whether an entry is listed in availableIDs and |
| 435 * specDAG or not. |
| 436 */ |
| 437 Hashtable registry; |
| 438 |
| 439 /** |
| 440 * DAG of visible IDs by spec. Hashtable: source => (Hashtable: |
| 441 * target => (UVector: variant)) The UVector of variants is never |
| 442 * empty. For a source-target with no variant, the special |
| 443 * variant NO_VARIANT (the empty string) is stored in slot zero of |
| 444 * the UVector. |
| 445 */ |
| 446 Hashtable specDAG; |
| 447 |
| 448 /** |
| 449 * Vector of public full IDs. |
| 450 */ |
| 451 UVector availableIDs; |
| 452 |
| 453 TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copyi
ng of this class |
| 454 TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // f
orbid copying of this class |
| 455 }; |
| 456 |
| 457 U_NAMESPACE_END |
| 458 |
| 459 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ |
| 460 |
| 461 #endif |
| 462 //eof |
OLD | NEW |