OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ********************************************************************** |
| 3 * Copyright (C) 1999-2007, International Business Machines Corporation |
| 4 * and others. All Rights Reserved. |
| 5 ********************************************************************** |
| 6 * Date Name Description |
| 7 * 11/17/99 aliu Creation. |
| 8 ********************************************************************** |
| 9 */ |
| 10 #ifndef RBT_SET_H |
| 11 #define RBT_SET_H |
| 12 |
| 13 #include "unicode/utypes.h" |
| 14 |
| 15 #if !UCONFIG_NO_TRANSLITERATION |
| 16 |
| 17 #include "unicode/uobject.h" |
| 18 #include "unicode/utrans.h" |
| 19 #include "uvector.h" |
| 20 |
| 21 U_NAMESPACE_BEGIN |
| 22 |
| 23 class Replaceable; |
| 24 class TransliterationRule; |
| 25 class TransliterationRuleData; |
| 26 class UnicodeFilter; |
| 27 class UnicodeString; |
| 28 class UnicodeSet; |
| 29 |
| 30 /** |
| 31 * A set of rules for a <code>RuleBasedTransliterator</code>. |
| 32 * @author Alan Liu |
| 33 */ |
| 34 class TransliterationRuleSet : public UMemory { |
| 35 /** |
| 36 * Vector of rules, in the order added. This is used while the |
| 37 * rule set is getting built. After that, freeze() reorders and |
| 38 * indexes the rules into rules[]. Any given rule is stored once |
| 39 * in ruleVector, and one or more times in rules[]. ruleVector |
| 40 * owns and deletes the rules. |
| 41 */ |
| 42 UVector* ruleVector; |
| 43 |
| 44 /** |
| 45 * Sorted and indexed table of rules. This is created by freeze() |
| 46 * from the rules in ruleVector. It contains alias pointers to |
| 47 * the rules in ruleVector. It is zero before freeze() is called |
| 48 * and non-zero thereafter. |
| 49 */ |
| 50 TransliterationRule** rules; |
| 51 |
| 52 /** |
| 53 * Index table. For text having a first character c, compute x = c&0xFF. |
| 54 * Now use rules[index[x]..index[x+1]-1]. This index table is created by |
| 55 * freeze(). Before freeze() is called it contains garbage. |
| 56 */ |
| 57 int32_t index[257]; |
| 58 |
| 59 /** |
| 60 * Length of the longest preceding context |
| 61 */ |
| 62 int32_t maxContextLength; |
| 63 |
| 64 public: |
| 65 |
| 66 /** |
| 67 * Construct a new empty rule set. |
| 68 * @param status Output parameter filled in with success or failure statu
s. |
| 69 */ |
| 70 TransliterationRuleSet(UErrorCode& status); |
| 71 |
| 72 /** |
| 73 * Copy constructor. |
| 74 */ |
| 75 TransliterationRuleSet(const TransliterationRuleSet&); |
| 76 |
| 77 /** |
| 78 * Destructor. |
| 79 */ |
| 80 virtual ~TransliterationRuleSet(); |
| 81 |
| 82 /** |
| 83 * Change the data object that this rule belongs to. Used |
| 84 * internally by the TransliterationRuleData copy constructor. |
| 85 * @param data the new data value to be set. |
| 86 */ |
| 87 void setData(const TransliterationRuleData* data); |
| 88 |
| 89 /** |
| 90 * Return the maximum context length. |
| 91 * @return the length of the longest preceding context. |
| 92 */ |
| 93 virtual int32_t getMaximumContextLength(void) const; |
| 94 |
| 95 /** |
| 96 * Add a rule to this set. Rules are added in order, and order is |
| 97 * significant. The last call to this method must be followed by |
| 98 * a call to <code>freeze()</code> before the rule set is used. |
| 99 * This method must <em>not</em> be called after freeze() has been |
| 100 * called. |
| 101 * |
| 102 * @param adoptedRule the rule to add |
| 103 */ |
| 104 virtual void addRule(TransliterationRule* adoptedRule, |
| 105 UErrorCode& status); |
| 106 |
| 107 /** |
| 108 * Check this for masked rules and index it to optimize performance. |
| 109 * The sequence of operations is: (1) add rules to a set using |
| 110 * <code>addRule()</code>; (2) freeze the set using |
| 111 * <code>freeze()</code>; (3) use the rule set. If |
| 112 * <code>addRule()</code> is called after calling this method, it |
| 113 * invalidates this object, and this method must be called again. |
| 114 * That is, <code>freeze()</code> may be called multiple times, |
| 115 * although for optimal performance it shouldn't be. |
| 116 * @param parseError A pointer to UParseError to receive information about e
rrors |
| 117 * occurred. |
| 118 * @param status Output parameter filled in with success or failure stat
us. |
| 119 */ |
| 120 virtual void freeze(UParseError& parseError, UErrorCode& status); |
| 121 |
| 122 /** |
| 123 * Transliterate the given text with the given UTransPosition |
| 124 * indices. Return TRUE if the transliteration should continue |
| 125 * or FALSE if it should halt (because of a U_PARTIAL_MATCH match). |
| 126 * Note that FALSE is only ever returned if isIncremental is TRUE. |
| 127 * @param text the text to be transliterated |
| 128 * @param index the position indices, which will be updated |
| 129 * @param isIncremental if TRUE, assume new text may be inserted |
| 130 * at index.limit, and return FALSE if thre is a partial match. |
| 131 * @return TRUE unless a U_PARTIAL_MATCH has been obtained, |
| 132 * indicating that transliteration should stop until more text |
| 133 * arrives. |
| 134 */ |
| 135 UBool transliterate(Replaceable& text, |
| 136 UTransPosition& index, |
| 137 UBool isIncremental); |
| 138 |
| 139 /** |
| 140 * Create rule strings that represents this rule set. |
| 141 * @param result string to receive the rule strings. Current |
| 142 * contents will be deleted. |
| 143 * @param escapeUnprintable True, will escape the unprintable characters |
| 144 * @return A reference to 'result'. |
| 145 */ |
| 146 virtual UnicodeString& toRules(UnicodeString& result, |
| 147 UBool escapeUnprintable) const; |
| 148 |
| 149 /** |
| 150 * Return the set of all characters that may be modified |
| 151 * (getTarget=false) or emitted (getTarget=true) by this set. |
| 152 */ |
| 153 UnicodeSet& getSourceTargetSet(UnicodeSet& result, |
| 154 UBool getTarget) const; |
| 155 |
| 156 private: |
| 157 |
| 158 TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // f
orbid copying of this class |
| 159 }; |
| 160 |
| 161 U_NAMESPACE_END |
| 162 |
| 163 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ |
| 164 |
| 165 #endif |
OLD | NEW |