OLD | NEW |
(Empty) | |
| 1 // |
| 2 // rbbirb.h |
| 3 // |
| 4 // Copyright (C) 2002-2008, International Business Machines Corporation and oth
ers. |
| 5 // All Rights Reserved. |
| 6 // |
| 7 // This file contains declarations for several classes from the |
| 8 // Rule Based Break Iterator rule builder. |
| 9 // |
| 10 |
| 11 |
| 12 #ifndef RBBIRB_H |
| 13 #define RBBIRB_H |
| 14 |
| 15 #include "unicode/utypes.h" |
| 16 #include "unicode/uobject.h" |
| 17 #include "unicode/rbbi.h" |
| 18 #include "unicode/uniset.h" |
| 19 #include "unicode/parseerr.h" |
| 20 #include "uhash.h" |
| 21 #include "uvector.h" |
| 22 #include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that |
| 23 // looks up references to $variables within a set. |
| 24 |
| 25 |
| 26 |
| 27 U_NAMESPACE_BEGIN |
| 28 |
| 29 class RBBIRuleScanner; |
| 30 struct RBBIRuleTableEl; |
| 31 class RBBISetBuilder; |
| 32 class RBBINode; |
| 33 class RBBITableBuilder; |
| 34 |
| 35 |
| 36 |
| 37 //------------------------------------------------------------------------------
-- |
| 38 // |
| 39 // RBBISymbolTable. Implements SymbolTable interface that is used by the |
| 40 // UnicodeSet parser to resolve references to $variables. |
| 41 // |
| 42 //------------------------------------------------------------------------------
-- |
| 43 class RBBISymbolTableEntry : public UMemory { // The symbol table hash table con
tains one |
| 44 public: // of these structs for each ent
ry. |
| 45 RBBISymbolTableEntry(); |
| 46 UnicodeString key; |
| 47 RBBINode *val; |
| 48 ~RBBISymbolTableEntry(); |
| 49 |
| 50 private: |
| 51 RBBISymbolTableEntry(const RBBISymbolTableEntry &other); // forbid copying o
f this class |
| 52 RBBISymbolTableEntry &operator=(const RBBISymbolTableEntry &other); // forbi
d copying of this class |
| 53 }; |
| 54 |
| 55 |
| 56 class RBBISymbolTable : public UMemory, public SymbolTable { |
| 57 private: |
| 58 const UnicodeString &fRules; |
| 59 UHashtable *fHashTable; |
| 60 RBBIRuleScanner *fRuleScanner; |
| 61 |
| 62 // These next two fields are part of the mechanism for passing references to |
| 63 // already-constructed UnicodeSets back to the UnicodeSet constructor |
| 64 // when the pattern includes $variable references. |
| 65 const UnicodeString ffffString; // = "/uffff" |
| 66 UnicodeSet *fCachedSetLookup; |
| 67 |
| 68 public: |
| 69 // API inherited from class SymbolTable |
| 70 virtual const UnicodeString* lookup(const UnicodeString& s) const; |
| 71 virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const; |
| 72 virtual UnicodeString parseReference(const UnicodeString& text, |
| 73 ParsePosition& pos, int32_t limit) cons
t; |
| 74 |
| 75 // Additional Functions |
| 76 RBBISymbolTable(RBBIRuleScanner *, const UnicodeString &fRules, UErrorCode &
status); |
| 77 virtual ~RBBISymbolTable(); |
| 78 |
| 79 virtual RBBINode *lookupNode(const UnicodeString &key) const; |
| 80 virtual void addEntry (const UnicodeString &key, RBBINode *val, UError
Code &err); |
| 81 |
| 82 #ifdef RBBI_DEBUG |
| 83 virtual void rbbiSymtablePrint() const; |
| 84 #else |
| 85 // A do-nothing inline function for non-debug builds. Member funcs can't be
empty |
| 86 // or the call sites won't compile. |
| 87 int32_t fFakeField; |
| 88 #define rbbiSymtablePrint() fFakeField=0; |
| 89 #endif |
| 90 |
| 91 private: |
| 92 RBBISymbolTable(const RBBISymbolTable &other); // forbid copying of this cla
ss |
| 93 RBBISymbolTable &operator=(const RBBISymbolTable &other); // forbid copying
of this class |
| 94 }; |
| 95 |
| 96 |
| 97 //------------------------------------------------------------------------------
-- |
| 98 // |
| 99 // class RBBIRuleBuilder The top-level class handling RBBI rule compiling
. |
| 100 // |
| 101 //------------------------------------------------------------------------------
-- |
| 102 class RBBIRuleBuilder : public UMemory { |
| 103 public: |
| 104 |
| 105 // Create a rule based break iterator from a set of rules. |
| 106 // This function is the main entry point into the rule builder. The |
| 107 // public ICU API for creating RBBIs uses this function to do the actual w
ork. |
| 108 // |
| 109 static BreakIterator * createRuleBasedBreakIterator( const UnicodeString
&rules, |
| 110 UParseError *parseError, |
| 111 UErrorCode &status); |
| 112 |
| 113 public: |
| 114 // The "public" functions and data members that appear below are accessed |
| 115 // (and shared) by the various parts that make up the rule builder. They |
| 116 // are NOT intended to be accessed by anything outside of the |
| 117 // rule builder implementation. |
| 118 RBBIRuleBuilder(const UnicodeString &rules, |
| 119 UParseError *parseErr, |
| 120 UErrorCode &status |
| 121 ); |
| 122 |
| 123 virtual ~RBBIRuleBuilder(); |
| 124 char *fDebugEnv; // controls debug trace out
put |
| 125 UErrorCode *fStatus; // Error reporting. Keepin
g status |
| 126 UParseError *fParseError; // here avoids passing it
everywhere. |
| 127 const UnicodeString &fRules; // The rule string that we
are compiling |
| 128 |
| 129 RBBIRuleScanner *fScanner; // The scanner. |
| 130 RBBINode *fForwardTree; // The parse trees, generat
ed by the scanner, |
| 131 RBBINode *fReverseTree; // then manipulated by su
bsequent steps. |
| 132 RBBINode *fSafeFwdTree; |
| 133 RBBINode *fSafeRevTree; |
| 134 |
| 135 RBBINode **fDefaultTree; // For rules not qualified
with a ! |
| 136 // the tree to which they
belong to. |
| 137 |
| 138 UBool fChainRules; // True for chained Unicode
TR style rules. |
| 139 // False for traditional re
gexp rules. |
| 140 |
| 141 UBool fLBCMNoChain; // True: suppress chaining
of rules on |
| 142 // chars with LineBreak p
roperty == CM. |
| 143 |
| 144 UBool fLookAheadHardBreak; // True: Look ahead mat
ches cause an |
| 145 // immediate break, no cont
inuing for the |
| 146 // longest match. |
| 147 |
| 148 RBBISetBuilder *fSetBuilder; // Set and Character Catego
ry builder. |
| 149 UVector *fUSetNodes; // Vector of all uset nodes
. |
| 150 |
| 151 RBBITableBuilder *fForwardTables; // State transition tables |
| 152 RBBITableBuilder *fReverseTables; |
| 153 RBBITableBuilder *fSafeFwdTables; |
| 154 RBBITableBuilder *fSafeRevTables; |
| 155 |
| 156 UVector *fRuleStatusVals; // The values that can be r
eturned |
| 157 // from getRuleStatus(). |
| 158 |
| 159 RBBIDataHeader *flattenData(); // Create the flattened (ru
ntime format) |
| 160 // data tables.. |
| 161 private: |
| 162 RBBIRuleBuilder(const RBBIRuleBuilder &other); // forbid copying of this cla
ss |
| 163 RBBIRuleBuilder &operator=(const RBBIRuleBuilder &other); // forbid copying
of this class |
| 164 }; |
| 165 |
| 166 |
| 167 |
| 168 |
| 169 //---------------------------------------------------------------------------- |
| 170 // |
| 171 // RBBISetTableEl is an entry in the hash table of UnicodeSets that have |
| 172 // been encountered. The val Node will be of nodetype uset |
| 173 // and contain pointers to the actual UnicodeSets. |
| 174 // The Key is the source string for initializing the set. |
| 175 // |
| 176 // The hash table is used to avoid creating duplicate |
| 177 // unnamed (not $var references) UnicodeSets. |
| 178 // |
| 179 // Memory Management: |
| 180 // The Hash Table owns these RBBISetTableEl structs and |
| 181 // the key strings. It does NOT own the val nodes. |
| 182 // |
| 183 //---------------------------------------------------------------------------- |
| 184 struct RBBISetTableEl { |
| 185 UnicodeString *key; |
| 186 RBBINode *val; |
| 187 }; |
| 188 |
| 189 |
| 190 //---------------------------------------------------------------------------- |
| 191 // |
| 192 // RBBIDebugPrintf Printf equivalent, for debugging output. |
| 193 // Conditional compilation of the implementation lets us |
| 194 // get rid of the stdio dependency in environments where it |
| 195 // is unavailable. |
| 196 // |
| 197 //---------------------------------------------------------------------------- |
| 198 #ifdef RBBI_DEBUG |
| 199 #include <stdio.h> |
| 200 #define RBBIDebugPrintf printf |
| 201 #define RBBIDebugPuts puts |
| 202 #else |
| 203 #undef RBBIDebugPrintf |
| 204 #define RBBIDebugPuts(arg) |
| 205 #endif |
| 206 |
| 207 U_NAMESPACE_END |
| 208 #endif |
| 209 |
| 210 |
| 211 |
OLD | NEW |