OLD | NEW |
(Empty) | |
| 1 // |
| 2 // file: rbbirb.cpp |
| 3 // |
| 4 // Copyright (C) 2002-2008, International Business Machines Corporation and oth
ers. |
| 5 // All Rights Reserved. |
| 6 // |
| 7 // This file contains the RBBIRuleBuilder class implementation. This is the ma
in class for |
| 8 // building (compiling) break rules into the tables required by the runtime |
| 9 // RBBI engine. |
| 10 // |
| 11 |
| 12 #include "unicode/utypes.h" |
| 13 |
| 14 #if !UCONFIG_NO_BREAK_ITERATION |
| 15 |
| 16 #include "unicode/brkiter.h" |
| 17 #include "unicode/rbbi.h" |
| 18 #include "unicode/ubrk.h" |
| 19 #include "unicode/unistr.h" |
| 20 #include "unicode/uniset.h" |
| 21 #include "unicode/uchar.h" |
| 22 #include "unicode/uchriter.h" |
| 23 #include "unicode/parsepos.h" |
| 24 #include "unicode/parseerr.h" |
| 25 #include "cmemory.h" |
| 26 #include "cstring.h" |
| 27 |
| 28 #include "rbbirb.h" |
| 29 #include "rbbinode.h" |
| 30 |
| 31 #include "rbbiscan.h" |
| 32 #include "rbbisetb.h" |
| 33 #include "rbbitblb.h" |
| 34 #include "rbbidata.h" |
| 35 |
| 36 |
| 37 U_NAMESPACE_BEGIN |
| 38 |
| 39 |
| 40 //------------------------------------------------------------------------------
---------- |
| 41 // |
| 42 // Constructor. |
| 43 // |
| 44 //------------------------------------------------------------------------------
---------- |
| 45 RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules, |
| 46 UParseError *parseErr, |
| 47 UErrorCode &status) |
| 48 : fRules(rules) |
| 49 { |
| 50 fStatus = &status; // status is checked below |
| 51 fParseError = parseErr; |
| 52 fDebugEnv = NULL; |
| 53 #ifdef RBBI_DEBUG |
| 54 fDebugEnv = getenv("U_RBBIDEBUG"); |
| 55 #endif |
| 56 |
| 57 |
| 58 fForwardTree = NULL; |
| 59 fReverseTree = NULL; |
| 60 fSafeFwdTree = NULL; |
| 61 fSafeRevTree = NULL; |
| 62 fDefaultTree = &fForwardTree; |
| 63 fForwardTables = NULL; |
| 64 fReverseTables = NULL; |
| 65 fSafeFwdTables = NULL; |
| 66 fSafeRevTables = NULL; |
| 67 fRuleStatusVals = NULL; |
| 68 fChainRules = FALSE; |
| 69 fLBCMNoChain = FALSE; |
| 70 fLookAheadHardBreak = FALSE; |
| 71 fUSetNodes = NULL; |
| 72 fRuleStatusVals = NULL; |
| 73 fScanner = NULL; |
| 74 fSetBuilder = NULL; |
| 75 if (parseErr) { |
| 76 uprv_memset(parseErr, 0, sizeof(UParseError)); |
| 77 } |
| 78 |
| 79 if (U_FAILURE(status)) { |
| 80 return; |
| 81 } |
| 82 |
| 83 fUSetNodes = new UVector(status); // bcos status gets overwritten h
ere |
| 84 fRuleStatusVals = new UVector(status); |
| 85 fScanner = new RBBIRuleScanner(this); |
| 86 fSetBuilder = new RBBISetBuilder(this); |
| 87 if (U_FAILURE(status)) { |
| 88 return; |
| 89 } |
| 90 if(fSetBuilder == 0 || fScanner == 0 || fUSetNodes == 0 || fRuleStatusVals =
= 0) { |
| 91 status = U_MEMORY_ALLOCATION_ERROR; |
| 92 } |
| 93 } |
| 94 |
| 95 |
| 96 |
| 97 //------------------------------------------------------------------------------
---------- |
| 98 // |
| 99 // Destructor |
| 100 // |
| 101 //------------------------------------------------------------------------------
---------- |
| 102 RBBIRuleBuilder::~RBBIRuleBuilder() { |
| 103 |
| 104 int i; |
| 105 for (i=0; ; i++) { |
| 106 RBBINode *n = (RBBINode *)fUSetNodes->elementAt(i); |
| 107 if (n==NULL) { |
| 108 break; |
| 109 } |
| 110 delete n; |
| 111 } |
| 112 |
| 113 delete fUSetNodes; |
| 114 delete fSetBuilder; |
| 115 delete fForwardTables; |
| 116 delete fReverseTables; |
| 117 delete fSafeFwdTables; |
| 118 delete fSafeRevTables; |
| 119 |
| 120 delete fForwardTree; |
| 121 delete fReverseTree; |
| 122 delete fSafeFwdTree; |
| 123 delete fSafeRevTree; |
| 124 delete fScanner; |
| 125 delete fRuleStatusVals; |
| 126 } |
| 127 |
| 128 |
| 129 |
| 130 |
| 131 |
| 132 //------------------------------------------------------------------------------
---------- |
| 133 // |
| 134 // flattenData() - Collect up the compiled RBBI rule data and put it into |
| 135 // the format for saving in ICU data files, |
| 136 // which is also the format needed by the RBBI runtime engine
. |
| 137 // |
| 138 //------------------------------------------------------------------------------
---------- |
| 139 static int32_t align8(int32_t i) {return (i+7) & 0xfffffff8;} |
| 140 |
| 141 RBBIDataHeader *RBBIRuleBuilder::flattenData() { |
| 142 int32_t i; |
| 143 |
| 144 if (U_FAILURE(*fStatus)) { |
| 145 return NULL; |
| 146 } |
| 147 |
| 148 // Remove comments and whitespace from the rules to make it smaller. |
| 149 UnicodeString strippedRules((const UnicodeString&)RBBIRuleScanner::stripRule
s(fRules)); |
| 150 |
| 151 // Calculate the size of each section in the data. |
| 152 // Sizes here are padded up to a multiple of 8 for better memory alignment
. |
| 153 // Sections sizes actually stored in the header are for the actual data |
| 154 // without the padding. |
| 155 // |
| 156 int32_t headerSize = align8(sizeof(RBBIDataHeader)); |
| 157 int32_t forwardTableSize = align8(fForwardTables->getTableSize()); |
| 158 int32_t reverseTableSize = align8(fReverseTables->getTableSize()); |
| 159 int32_t safeFwdTableSize = align8(fSafeFwdTables->getTableSize()); |
| 160 int32_t safeRevTableSize = align8(fSafeRevTables->getTableSize()); |
| 161 int32_t trieSize = align8(fSetBuilder->getTrieSize()); |
| 162 int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t)
); |
| 163 int32_t rulesSize = align8((strippedRules.length()+1) * sizeof(UChar
)); |
| 164 |
| 165 int32_t totalSize = headerSize + forwardTableSize + reverseTableSize |
| 166 + safeFwdTableSize + safeRevTableSize |
| 167 + statusTableSize + trieSize + rulesSize; |
| 168 |
| 169 RBBIDataHeader *data = (RBBIDataHeader *)uprv_malloc(totalSize); |
| 170 if (data == NULL) { |
| 171 *fStatus = U_MEMORY_ALLOCATION_ERROR; |
| 172 return NULL; |
| 173 } |
| 174 uprv_memset(data, 0, totalSize); |
| 175 |
| 176 |
| 177 data->fMagic = 0xb1a0; |
| 178 data->fFormatVersion[0] = 3; |
| 179 data->fFormatVersion[1] = 1; |
| 180 data->fFormatVersion[2] = 0; |
| 181 data->fFormatVersion[3] = 0; |
| 182 data->fLength = totalSize; |
| 183 data->fCatCount = fSetBuilder->getNumCharCategories(); |
| 184 |
| 185 data->fFTable = headerSize; |
| 186 data->fFTableLen = forwardTableSize; |
| 187 data->fRTable = data->fFTable + forwardTableSize; |
| 188 data->fRTableLen = reverseTableSize; |
| 189 data->fSFTable = data->fRTable + reverseTableSize; |
| 190 data->fSFTableLen = safeFwdTableSize; |
| 191 data->fSRTable = data->fSFTable + safeFwdTableSize; |
| 192 data->fSRTableLen = safeRevTableSize; |
| 193 |
| 194 data->fTrie = data->fSRTable + safeRevTableSize; |
| 195 data->fTrieLen = fSetBuilder->getTrieSize(); |
| 196 data->fStatusTable = data->fTrie + trieSize; |
| 197 data->fStatusTableLen= statusTableSize; |
| 198 data->fRuleSource = data->fStatusTable + statusTableSize; |
| 199 data->fRuleSourceLen = strippedRules.length() * sizeof(UChar); |
| 200 |
| 201 uprv_memset(data->fReserved, 0, sizeof(data->fReserved)); |
| 202 |
| 203 fForwardTables->exportTable((uint8_t *)data + data->fFTable); |
| 204 fReverseTables->exportTable((uint8_t *)data + data->fRTable); |
| 205 fSafeFwdTables->exportTable((uint8_t *)data + data->fSFTable); |
| 206 fSafeRevTables->exportTable((uint8_t *)data + data->fSRTable); |
| 207 fSetBuilder->serializeTrie ((uint8_t *)data + data->fTrie); |
| 208 |
| 209 int32_t *ruleStatusTable = (int32_t *)((uint8_t *)data + data->fStatusTable)
; |
| 210 for (i=0; i<fRuleStatusVals->size(); i++) { |
| 211 ruleStatusTable[i] = fRuleStatusVals->elementAti(i); |
| 212 } |
| 213 |
| 214 strippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSiz
e/2+1, *fStatus); |
| 215 |
| 216 return data; |
| 217 } |
| 218 |
| 219 |
| 220 |
| 221 |
| 222 |
| 223 |
| 224 //------------------------------------------------------------------------------
---------- |
| 225 // |
| 226 // createRuleBasedBreakIterator construct from source rules that are passed
in |
| 227 // in a UnicodeString |
| 228 // |
| 229 //------------------------------------------------------------------------------
---------- |
| 230 BreakIterator * |
| 231 RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules, |
| 232 UParseError *parseError, |
| 233 UErrorCode &status) |
| 234 { |
| 235 // status checked below |
| 236 |
| 237 // |
| 238 // Read the input rules, generate a parse tree, symbol table, |
| 239 // and list of all Unicode Sets referenced by the rules. |
| 240 // |
| 241 RBBIRuleBuilder builder(rules, parseError, status); |
| 242 if (U_FAILURE(status)) { // status checked here bcos build below doesn't |
| 243 return NULL; |
| 244 } |
| 245 builder.fScanner->parse(); |
| 246 |
| 247 // |
| 248 // UnicodeSet processing. |
| 249 // Munge the Unicode Sets to create a set of character categories. |
| 250 // Generate the mapping tables (TRIE) from input 32-bit characters to |
| 251 // the character categories. |
| 252 // |
| 253 builder.fSetBuilder->build(); |
| 254 |
| 255 |
| 256 // |
| 257 // Generate the DFA state transition table. |
| 258 // |
| 259 builder.fForwardTables = new RBBITableBuilder(&builder, &builder.fForwardTre
e); |
| 260 builder.fReverseTables = new RBBITableBuilder(&builder, &builder.fReverseTre
e); |
| 261 builder.fSafeFwdTables = new RBBITableBuilder(&builder, &builder.fSafeFwdTre
e); |
| 262 builder.fSafeRevTables = new RBBITableBuilder(&builder, &builder.fSafeRevTre
e); |
| 263 if (U_SUCCESS(status) |
| 264 && (builder.fForwardTables == NULL || builder.fReverseTables == NULL || |
| 265 builder.fSafeFwdTables == NULL || builder.fSafeRevTables == NULL)) |
| 266 { |
| 267 status = U_MEMORY_ALLOCATION_ERROR; |
| 268 } |
| 269 |
| 270 // Before building the tables, check to make sure the status is ok. |
| 271 if (U_FAILURE(status)) { |
| 272 delete builder.fForwardTables; builder.fForwardTables = NULL; |
| 273 delete builder.fReverseTables; builder.fReverseTables = NULL; |
| 274 delete builder.fSafeFwdTables; builder.fSafeFwdTables = NULL; |
| 275 delete builder.fSafeRevTables; builder.fSafeRevTables = NULL; |
| 276 return NULL; |
| 277 } |
| 278 |
| 279 builder.fForwardTables->build(); |
| 280 builder.fReverseTables->build(); |
| 281 builder.fSafeFwdTables->build(); |
| 282 builder.fSafeRevTables->build(); |
| 283 |
| 284 #ifdef RBBI_DEBUG |
| 285 if (builder.fDebugEnv && uprv_strstr(builder.fDebugEnv, "states")) { |
| 286 builder.fForwardTables->printRuleStatusTable(); |
| 287 } |
| 288 #endif |
| 289 |
| 290 // |
| 291 // Package up the compiled data into a memory image |
| 292 // in the run-time format. |
| 293 // |
| 294 RBBIDataHeader *data = builder.flattenData(); // returns NULL if error |
| 295 if (U_FAILURE(*builder.fStatus)) { |
| 296 return NULL; |
| 297 } |
| 298 |
| 299 |
| 300 // |
| 301 // Clean up the compiler related stuff |
| 302 // |
| 303 |
| 304 |
| 305 // |
| 306 // Create a break iterator from the compiled rules. |
| 307 // (Identical to creation from stored pre-compiled rules) |
| 308 // |
| 309 // status is checked after init in construction. |
| 310 RuleBasedBreakIterator *This = new RuleBasedBreakIterator(data, status); |
| 311 if (U_FAILURE(status)) { |
| 312 delete This; |
| 313 This = NULL; |
| 314 } |
| 315 else if(This == NULL) { // test for NULL |
| 316 status = U_MEMORY_ALLOCATION_ERROR; |
| 317 } |
| 318 return This; |
| 319 } |
| 320 |
| 321 U_NAMESPACE_END |
| 322 |
| 323 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
OLD | NEW |