OLD | NEW |
(Empty) | |
| 1 // |
| 2 // file: rbbistbl.cpp Implementation of the ICU RBBISymbolTable class |
| 3 // |
| 4 /* |
| 5 *************************************************************************** |
| 6 * Copyright (C) 2002-2006 International Business Machines Corporation * |
| 7 * and others. All rights reserved. * |
| 8 *************************************************************************** |
| 9 */ |
| 10 |
| 11 #include "unicode/utypes.h" |
| 12 |
| 13 #if !UCONFIG_NO_BREAK_ITERATION |
| 14 |
| 15 #include "unicode/unistr.h" |
| 16 #include "unicode/uniset.h" |
| 17 #include "unicode/uchar.h" |
| 18 #include "unicode/parsepos.h" |
| 19 |
| 20 #include "umutex.h" |
| 21 |
| 22 #include "rbbirb.h" |
| 23 #include "rbbinode.h" |
| 24 |
| 25 |
| 26 // |
| 27 // RBBISymbolTableEntry_deleter Used by the UHashTable to delete the content
s |
| 28 // when the hash table is deleted. |
| 29 // |
| 30 U_CDECL_BEGIN |
| 31 static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) { |
| 32 U_NAMESPACE_QUALIFIER RBBISymbolTableEntry *px = (U_NAMESPACE_QUALIFIER RBBI
SymbolTableEntry *)p; |
| 33 delete px; |
| 34 } |
| 35 U_CDECL_END |
| 36 |
| 37 |
| 38 |
| 39 U_NAMESPACE_BEGIN |
| 40 |
| 41 RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules
, UErrorCode &status) |
| 42 :fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff)) |
| 43 { |
| 44 fHashTable = NULL; |
| 45 fCachedSetLookup = NULL; |
| 46 |
| 47 fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString,
NULL, &status); |
| 48 // uhash_open checks status |
| 49 if (U_FAILURE(status)) { |
| 50 return; |
| 51 } |
| 52 uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter); |
| 53 } |
| 54 |
| 55 |
| 56 |
| 57 RBBISymbolTable::~RBBISymbolTable() |
| 58 { |
| 59 uhash_close(fHashTable); |
| 60 } |
| 61 |
| 62 |
| 63 // |
| 64 // RBBISymbolTable::lookup This function from the abstract symbol table i
nteface |
| 65 // looks up a variable name and returns a Unicode
String |
| 66 // containing the substitution text. |
| 67 // |
| 68 // The variable name does NOT include the leading
$. |
| 69 // |
| 70 const UnicodeString *RBBISymbolTable::lookup(const UnicodeString& s) const |
| 71 { |
| 72 RBBISymbolTableEntry *el; |
| 73 RBBINode *varRefNode; |
| 74 RBBINode *exprNode; |
| 75 RBBINode *usetNode; |
| 76 const UnicodeString *retString; |
| 77 RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const |
| 78 |
| 79 el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s); |
| 80 if (el == NULL) { |
| 81 return NULL; |
| 82 } |
| 83 |
| 84 varRefNode = el->val; |
| 85 exprNode = varRefNode->fLeftChild; // Root node of expression for vari
able |
| 86 if (exprNode->fType == RBBINode::setRef) { |
| 87 // The $variable refers to a single UnicodeSet |
| 88 // return the ffffString, which will subsequently be interpreted as a |
| 89 // stand-in character for the set by RBBISymbolTable::lookupMatcher() |
| 90 usetNode = exprNode->fLeftChild; |
| 91 This->fCachedSetLookup = usetNode->fInputSet; |
| 92 retString = &ffffString; |
| 93 } |
| 94 else |
| 95 { |
| 96 // The variable refers to something other than just a set. |
| 97 // return the original source string for the expression |
| 98 retString = &exprNode->fText; |
| 99 This->fCachedSetLookup = NULL; |
| 100 } |
| 101 return retString; |
| 102 } |
| 103 |
| 104 |
| 105 |
| 106 // |
| 107 // RBBISymbolTable::lookupMatcher This function from the abstract symbol tabl
e |
| 108 // interface maps a single stand-in character
to a |
| 109 // pointer to a Unicode Set. The Unicode Set
code uses this |
| 110 // mechanism to get all references to the same
$variable |
| 111 // name to refer to a single common Unicode Se
t instance. |
| 112 // |
| 113 // This implementation cheats a little, and does not maintain a map of stand-
in chars |
| 114 // to sets. Instead, it takes advantage of the fact that the UnicodeSet |
| 115 // constructor will always call this function right after calling lookup(), |
| 116 // and we just need to remember what set to return between these two calls. |
| 117 const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const |
| 118 { |
| 119 UnicodeSet *retVal = NULL; |
| 120 RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const |
| 121 if (ch == 0xffff) { |
| 122 retVal = fCachedSetLookup; |
| 123 This->fCachedSetLookup = 0; |
| 124 } |
| 125 return retVal; |
| 126 } |
| 127 |
| 128 // |
| 129 // RBBISymbolTable::parseReference This function from the abstract symbol tabl
e interface |
| 130 // looks for a $variable name in the source te
xt. |
| 131 // It does not look it up, only scans for it. |
| 132 // It is used by the UnicodeSet parser. |
| 133 // |
| 134 // This implementation is lifted pretty much v
erbatim |
| 135 // from the rules based transliterator impleme
ntation. |
| 136 // I didn't see an obvious way of sharing it. |
| 137 // |
| 138 UnicodeString RBBISymbolTable::parseReference(const UnicodeString& text, |
| 139 ParsePosition& pos, int32_t limi
t) const |
| 140 { |
| 141 int32_t start = pos.getIndex(); |
| 142 int32_t i = start; |
| 143 UnicodeString result; |
| 144 while (i < limit) { |
| 145 UChar c = text.charAt(i); |
| 146 if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) { |
| 147 break; |
| 148 } |
| 149 ++i; |
| 150 } |
| 151 if (i == start) { // No valid name chars |
| 152 return result; // Indicate failure with empty string |
| 153 } |
| 154 pos.setIndex(i); |
| 155 text.extractBetween(start, i, result); |
| 156 return result; |
| 157 } |
| 158 |
| 159 |
| 160 |
| 161 // |
| 162 // RBBISymbolTable::lookupNode Given a key (a variable name), return the |
| 163 // corresponding RBBI Node. If there is no ent
ry |
| 164 // in the table for this name, return NULL. |
| 165 // |
| 166 RBBINode *RBBISymbolTable::lookupNode(const UnicodeString &key) const{ |
| 167 |
| 168 RBBINode *retNode = NULL; |
| 169 RBBISymbolTableEntry *el; |
| 170 |
| 171 el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key); |
| 172 if (el != NULL) { |
| 173 retNode = el->val; |
| 174 } |
| 175 return retNode; |
| 176 } |
| 177 |
| 178 |
| 179 // |
| 180 // RBBISymbolTable::addEntry Add a new entry to the symbol table. |
| 181 // Indicate an error if the name already exists
- |
| 182 // this will only occur in the case of duplic
ate |
| 183 // variable assignments. |
| 184 // |
| 185 void RBBISymbolTable::addEntry (const UnicodeString &key, RBBINode *
val, UErrorCode &err) { |
| 186 RBBISymbolTableEntry *e; |
| 187 /* test for buffer overflows */ |
| 188 if (U_FAILURE(err)) { |
| 189 return; |
| 190 } |
| 191 e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key); |
| 192 if (e != NULL) { |
| 193 err = U_BRK_VARIABLE_REDFINITION; |
| 194 return; |
| 195 } |
| 196 |
| 197 e = new RBBISymbolTableEntry; |
| 198 if (e == NULL) { |
| 199 err = U_MEMORY_ALLOCATION_ERROR; |
| 200 return; |
| 201 } |
| 202 e->key = key; |
| 203 e->val = val; |
| 204 uhash_put( fHashTable, &e->key, e, &err); |
| 205 } |
| 206 |
| 207 |
| 208 RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL) {} |
| 209 |
| 210 RBBISymbolTableEntry::~RBBISymbolTableEntry() { |
| 211 // The "val" of a symbol table entry is a variable reference node. |
| 212 // The l. child of the val is the rhs expression from the assignment. |
| 213 // Unlike other node types, children of variable reference nodes are not |
| 214 // automatically recursively deleted. We do it manually here. |
| 215 delete val->fLeftChild; |
| 216 val->fLeftChild = NULL; |
| 217 |
| 218 delete val; |
| 219 |
| 220 // Note: the key UnicodeString is destructed by virtue of being in the objec
t by value. |
| 221 } |
| 222 |
| 223 |
| 224 // |
| 225 // RBBISymbolTable::print Debugging function, dump out the symbol table cont
ents. |
| 226 // |
| 227 #ifdef RBBI_DEBUG |
| 228 void RBBISymbolTable::rbbiSymtablePrint() const { |
| 229 RBBIDebugPrintf("Variable Definitions\n" |
| 230 "Name Node Val String Val\n" |
| 231 "--------------------------------------------------------------------
--\n"); |
| 232 |
| 233 int32_t pos = -1; |
| 234 const UHashElement *e = NULL; |
| 235 for (;;) { |
| 236 e = uhash_nextElement(fHashTable, &pos); |
| 237 if (e == NULL ) { |
| 238 break; |
| 239 } |
| 240 RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer; |
| 241 |
| 242 RBBI_DEBUG_printUnicodeString(s->key, 15); |
| 243 RBBIDebugPrintf(" %8p ", (void *)s->val); |
| 244 RBBI_DEBUG_printUnicodeString(s->val->fLeftChild->fText); |
| 245 RBBIDebugPrintf("\n"); |
| 246 } |
| 247 |
| 248 RBBIDebugPrintf("\nParsed Variable Definitions\n"); |
| 249 pos = -1; |
| 250 for (;;) { |
| 251 e = uhash_nextElement(fHashTable, &pos); |
| 252 if (e == NULL ) { |
| 253 break; |
| 254 } |
| 255 RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer; |
| 256 RBBI_DEBUG_printUnicodeString(s->key); |
| 257 s->val->fLeftChild->printTree(TRUE); |
| 258 RBBIDebugPrintf("\n"); |
| 259 } |
| 260 } |
| 261 #endif |
| 262 |
| 263 |
| 264 |
| 265 |
| 266 |
| 267 U_NAMESPACE_END |
| 268 |
| 269 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
OLD | NEW |