OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ********************************************************************** |
| 3 * Copyright (c) 2002-2009, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. |
| 5 ********************************************************************** |
| 6 */ |
| 7 // |
| 8 // rbbitblb.cpp |
| 9 // |
| 10 |
| 11 |
| 12 #include "unicode/utypes.h" |
| 13 |
| 14 #if !UCONFIG_NO_BREAK_ITERATION |
| 15 |
| 16 #include "unicode/unistr.h" |
| 17 #include "rbbitblb.h" |
| 18 #include "rbbirb.h" |
| 19 #include "rbbisetb.h" |
| 20 #include "rbbidata.h" |
| 21 #include "cstring.h" |
| 22 #include "uassert.h" |
| 23 #include "cmemory.h" |
| 24 |
| 25 U_NAMESPACE_BEGIN |
| 26 |
| 27 RBBITableBuilder::RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode) : |
| 28 fTree(*rootNode) { |
| 29 fRB = rb; |
| 30 fStatus = fRB->fStatus; |
| 31 UErrorCode status = U_ZERO_ERROR; |
| 32 fDStates = new UVector(status); |
| 33 if (U_FAILURE(*fStatus)) { |
| 34 return; |
| 35 } |
| 36 if (U_FAILURE(status)) { |
| 37 *fStatus = status; |
| 38 return; |
| 39 } |
| 40 if (fDStates == NULL) { |
| 41 *fStatus = U_MEMORY_ALLOCATION_ERROR;; |
| 42 } |
| 43 } |
| 44 |
| 45 |
| 46 |
| 47 RBBITableBuilder::~RBBITableBuilder() { |
| 48 int i; |
| 49 for (i=0; i<fDStates->size(); i++) { |
| 50 delete (RBBIStateDescriptor *)fDStates->elementAt(i); |
| 51 } |
| 52 delete fDStates; |
| 53 } |
| 54 |
| 55 |
| 56 //----------------------------------------------------------------------------- |
| 57 // |
| 58 // RBBITableBuilder::build - This is the main function for building the DFA
state transtion |
| 59 // table from the RBBI rules parse tree. |
| 60 // |
| 61 //----------------------------------------------------------------------------- |
| 62 void RBBITableBuilder::build() { |
| 63 |
| 64 if (U_FAILURE(*fStatus)) { |
| 65 return; |
| 66 } |
| 67 |
| 68 // If there were no rules, just return. This situation can easily arise |
| 69 // for the reverse rules. |
| 70 if (fTree==NULL) { |
| 71 return; |
| 72 } |
| 73 |
| 74 // |
| 75 // Walk through the tree, replacing any references to $variables with a copy
of the |
| 76 // parse tree for the substition expression. |
| 77 // |
| 78 fTree = fTree->flattenVariables(); |
| 79 #ifdef RBBI_DEBUG |
| 80 if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ftree")) { |
| 81 RBBIDebugPuts("Parse tree after flattening variable references."); |
| 82 fTree->printTree(TRUE); |
| 83 } |
| 84 #endif |
| 85 |
| 86 // |
| 87 // If the rules contained any references to {bof} |
| 88 // add a {bof} <cat> <former root of tree> to the |
| 89 // tree. Means that all matches must start out with the |
| 90 // {bof} fake character. |
| 91 // |
| 92 if (fRB->fSetBuilder->sawBOF()) { |
| 93 RBBINode *bofTop = new RBBINode(RBBINode::opCat); |
| 94 RBBINode *bofLeaf = new RBBINode(RBBINode::leafChar); |
| 95 // Delete and exit if memory allocation failed. |
| 96 if (bofTop == NULL || bofLeaf == NULL) { |
| 97 *fStatus = U_MEMORY_ALLOCATION_ERROR; |
| 98 delete bofTop; |
| 99 delete bofLeaf; |
| 100 return; |
| 101 } |
| 102 bofTop->fLeftChild = bofLeaf; |
| 103 bofTop->fRightChild = fTree; |
| 104 bofLeaf->fParent = bofTop; |
| 105 bofLeaf->fVal = 2; // Reserved value for {bof}. |
| 106 fTree = bofTop; |
| 107 } |
| 108 |
| 109 // |
| 110 // Add a unique right-end marker to the expression. |
| 111 // Appears as a cat-node, left child being the original tree, |
| 112 // right child being the end marker. |
| 113 // |
| 114 RBBINode *cn = new RBBINode(RBBINode::opCat); |
| 115 // Exit if memory allocation failed. |
| 116 if (cn == NULL) { |
| 117 *fStatus = U_MEMORY_ALLOCATION_ERROR; |
| 118 return; |
| 119 } |
| 120 cn->fLeftChild = fTree; |
| 121 fTree->fParent = cn; |
| 122 cn->fRightChild = new RBBINode(RBBINode::endMark); |
| 123 // Delete and exit if memory allocation failed. |
| 124 if (cn->fRightChild == NULL) { |
| 125 *fStatus = U_MEMORY_ALLOCATION_ERROR; |
| 126 delete cn; |
| 127 return; |
| 128 } |
| 129 cn->fRightChild->fParent = cn; |
| 130 fTree = cn; |
| 131 |
| 132 // |
| 133 // Replace all references to UnicodeSets with the tree for the equivalent |
| 134 // expression. |
| 135 // |
| 136 fTree->flattenSets(); |
| 137 #ifdef RBBI_DEBUG |
| 138 if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "stree")) { |
| 139 RBBIDebugPuts("Parse tree after flattening Unicode Set references."); |
| 140 fTree->printTree(TRUE); |
| 141 } |
| 142 #endif |
| 143 |
| 144 |
| 145 // |
| 146 // calculate the functions nullable, firstpos, lastpos and followpos on |
| 147 // nodes in the parse tree. |
| 148 // See the alogrithm description in Aho. |
| 149 // Understanding how this works by looking at the code alone will be |
| 150 // nearly impossible. |
| 151 // |
| 152 calcNullable(fTree); |
| 153 calcFirstPos(fTree); |
| 154 calcLastPos(fTree); |
| 155 calcFollowPos(fTree); |
| 156 if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "pos")) { |
| 157 RBBIDebugPuts("\n"); |
| 158 printPosSets(fTree); |
| 159 } |
| 160 |
| 161 // |
| 162 // For "chained" rules, modify the followPos sets |
| 163 // |
| 164 if (fRB->fChainRules) { |
| 165 calcChainedFollowPos(fTree); |
| 166 } |
| 167 |
| 168 // |
| 169 // BOF (start of input) test fixup. |
| 170 // |
| 171 if (fRB->fSetBuilder->sawBOF()) { |
| 172 bofFixup(); |
| 173 } |
| 174 |
| 175 // |
| 176 // Build the DFA state transition tables. |
| 177 // |
| 178 buildStateTable(); |
| 179 flagAcceptingStates(); |
| 180 flagLookAheadStates(); |
| 181 flagTaggedStates(); |
| 182 |
| 183 // |
| 184 // Update the global table of rule status {tag} values |
| 185 // The rule builder has a global vector of status values that are common |
| 186 // for all tables. Merge the ones from this table into the global set. |
| 187 // |
| 188 mergeRuleStatusVals(); |
| 189 |
| 190 if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "states")) {printStates();
}; |
| 191 } |
| 192 |
| 193 |
| 194 |
| 195 //----------------------------------------------------------------------------- |
| 196 // |
| 197 // calcNullable. Impossible to explain succinctly. See Aho, section 3.9 |
| 198 // |
| 199 //----------------------------------------------------------------------------- |
| 200 void RBBITableBuilder::calcNullable(RBBINode *n) { |
| 201 if (n == NULL) { |
| 202 return; |
| 203 } |
| 204 if (n->fType == RBBINode::setRef || |
| 205 n->fType == RBBINode::endMark ) { |
| 206 // These are non-empty leaf node types. |
| 207 n->fNullable = FALSE; |
| 208 return; |
| 209 } |
| 210 |
| 211 if (n->fType == RBBINode::lookAhead || n->fType == RBBINode::tag) { |
| 212 // Lookahead marker node. It's a leaf, so no recursion on children. |
| 213 // It's nullable because it does not match any literal text from the inp
ut stream. |
| 214 n->fNullable = TRUE; |
| 215 return; |
| 216 } |
| 217 |
| 218 |
| 219 // The node is not a leaf. |
| 220 // Calculate nullable on its children. |
| 221 calcNullable(n->fLeftChild); |
| 222 calcNullable(n->fRightChild); |
| 223 |
| 224 // Apply functions from table 3.40 in Aho |
| 225 if (n->fType == RBBINode::opOr) { |
| 226 n->fNullable = n->fLeftChild->fNullable || n->fRightChild->fNullable; |
| 227 } |
| 228 else if (n->fType == RBBINode::opCat) { |
| 229 n->fNullable = n->fLeftChild->fNullable && n->fRightChild->fNullable; |
| 230 } |
| 231 else if (n->fType == RBBINode::opStar || n->fType == RBBINode::opQuestion) { |
| 232 n->fNullable = TRUE; |
| 233 } |
| 234 else { |
| 235 n->fNullable = FALSE; |
| 236 } |
| 237 } |
| 238 |
| 239 |
| 240 |
| 241 |
| 242 //----------------------------------------------------------------------------- |
| 243 // |
| 244 // calcFirstPos. Impossible to explain succinctly. See Aho, section 3.9 |
| 245 // |
| 246 //----------------------------------------------------------------------------- |
| 247 void RBBITableBuilder::calcFirstPos(RBBINode *n) { |
| 248 if (n == NULL) { |
| 249 return; |
| 250 } |
| 251 if (n->fType == RBBINode::leafChar || |
| 252 n->fType == RBBINode::endMark || |
| 253 n->fType == RBBINode::lookAhead || |
| 254 n->fType == RBBINode::tag) { |
| 255 // These are non-empty leaf node types. |
| 256 // Note: In order to maintain the sort invariant on the set, |
| 257 // this function should only be called on a node whose set is |
| 258 // empty to start with. |
| 259 n->fFirstPosSet->addElement(n, *fStatus); |
| 260 return; |
| 261 } |
| 262 |
| 263 // The node is not a leaf. |
| 264 // Calculate firstPos on its children. |
| 265 calcFirstPos(n->fLeftChild); |
| 266 calcFirstPos(n->fRightChild); |
| 267 |
| 268 // Apply functions from table 3.40 in Aho |
| 269 if (n->fType == RBBINode::opOr) { |
| 270 setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet); |
| 271 setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet); |
| 272 } |
| 273 else if (n->fType == RBBINode::opCat) { |
| 274 setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet); |
| 275 if (n->fLeftChild->fNullable) { |
| 276 setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet); |
| 277 } |
| 278 } |
| 279 else if (n->fType == RBBINode::opStar || |
| 280 n->fType == RBBINode::opQuestion || |
| 281 n->fType == RBBINode::opPlus) { |
| 282 setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet); |
| 283 } |
| 284 } |
| 285 |
| 286 |
| 287 |
| 288 //----------------------------------------------------------------------------- |
| 289 // |
| 290 // calcLastPos. Impossible to explain succinctly. See Aho, section 3.9 |
| 291 // |
| 292 //----------------------------------------------------------------------------- |
| 293 void RBBITableBuilder::calcLastPos(RBBINode *n) { |
| 294 if (n == NULL) { |
| 295 return; |
| 296 } |
| 297 if (n->fType == RBBINode::leafChar || |
| 298 n->fType == RBBINode::endMark || |
| 299 n->fType == RBBINode::lookAhead || |
| 300 n->fType == RBBINode::tag) { |
| 301 // These are non-empty leaf node types. |
| 302 // Note: In order to maintain the sort invariant on the set, |
| 303 // this function should only be called on a node whose set is |
| 304 // empty to start with. |
| 305 n->fLastPosSet->addElement(n, *fStatus); |
| 306 return; |
| 307 } |
| 308 |
| 309 // The node is not a leaf. |
| 310 // Calculate lastPos on its children. |
| 311 calcLastPos(n->fLeftChild); |
| 312 calcLastPos(n->fRightChild); |
| 313 |
| 314 // Apply functions from table 3.40 in Aho |
| 315 if (n->fType == RBBINode::opOr) { |
| 316 setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet); |
| 317 setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet); |
| 318 } |
| 319 else if (n->fType == RBBINode::opCat) { |
| 320 setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet); |
| 321 if (n->fRightChild->fNullable) { |
| 322 setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet); |
| 323 } |
| 324 } |
| 325 else if (n->fType == RBBINode::opStar || |
| 326 n->fType == RBBINode::opQuestion || |
| 327 n->fType == RBBINode::opPlus) { |
| 328 setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet); |
| 329 } |
| 330 } |
| 331 |
| 332 |
| 333 |
| 334 //----------------------------------------------------------------------------- |
| 335 // |
| 336 // calcFollowPos. Impossible to explain succinctly. See Aho, section 3.9 |
| 337 // |
| 338 //----------------------------------------------------------------------------- |
| 339 void RBBITableBuilder::calcFollowPos(RBBINode *n) { |
| 340 if (n == NULL || |
| 341 n->fType == RBBINode::leafChar || |
| 342 n->fType == RBBINode::endMark) { |
| 343 return; |
| 344 } |
| 345 |
| 346 calcFollowPos(n->fLeftChild); |
| 347 calcFollowPos(n->fRightChild); |
| 348 |
| 349 // Aho rule #1 |
| 350 if (n->fType == RBBINode::opCat) { |
| 351 RBBINode *i; // is 'i' in Aho's description |
| 352 uint32_t ix; |
| 353 |
| 354 UVector *LastPosOfLeftChild = n->fLeftChild->fLastPosSet; |
| 355 |
| 356 for (ix=0; ix<(uint32_t)LastPosOfLeftChild->size(); ix++) { |
| 357 i = (RBBINode *)LastPosOfLeftChild->elementAt(ix); |
| 358 setAdd(i->fFollowPos, n->fRightChild->fFirstPosSet); |
| 359 } |
| 360 } |
| 361 |
| 362 // Aho rule #2 |
| 363 if (n->fType == RBBINode::opStar || |
| 364 n->fType == RBBINode::opPlus) { |
| 365 RBBINode *i; // again, n and i are the names from Aho's description. |
| 366 uint32_t ix; |
| 367 |
| 368 for (ix=0; ix<(uint32_t)n->fLastPosSet->size(); ix++) { |
| 369 i = (RBBINode *)n->fLastPosSet->elementAt(ix); |
| 370 setAdd(i->fFollowPos, n->fFirstPosSet); |
| 371 } |
| 372 } |
| 373 |
| 374 |
| 375 |
| 376 } |
| 377 |
| 378 |
| 379 //----------------------------------------------------------------------------- |
| 380 // |
| 381 // calcChainedFollowPos. Modify the previously calculated followPos sets |
| 382 // to implement rule chaining. NOT described by Aho |
| 383 // |
| 384 //----------------------------------------------------------------------------- |
| 385 void RBBITableBuilder::calcChainedFollowPos(RBBINode *tree) { |
| 386 |
| 387 UVector endMarkerNodes(*fStatus); |
| 388 UVector leafNodes(*fStatus); |
| 389 int32_t i; |
| 390 |
| 391 if (U_FAILURE(*fStatus)) { |
| 392 return; |
| 393 } |
| 394 |
| 395 // get a list of all endmarker nodes. |
| 396 tree->findNodes(&endMarkerNodes, RBBINode::endMark, *fStatus); |
| 397 |
| 398 // get a list all leaf nodes |
| 399 tree->findNodes(&leafNodes, RBBINode::leafChar, *fStatus); |
| 400 if (U_FAILURE(*fStatus)) { |
| 401 return; |
| 402 } |
| 403 |
| 404 // Get all nodes that can be the start a match, which is FirstPosition() |
| 405 // of the portion of the tree corresponding to user-written rules. |
| 406 // See the tree description in bofFixup(). |
| 407 RBBINode *userRuleRoot = tree; |
| 408 if (fRB->fSetBuilder->sawBOF()) { |
| 409 userRuleRoot = tree->fLeftChild->fRightChild; |
| 410 } |
| 411 U_ASSERT(userRuleRoot != NULL); |
| 412 UVector *matchStartNodes = userRuleRoot->fFirstPosSet; |
| 413 |
| 414 |
| 415 // Iteratate over all leaf nodes, |
| 416 // |
| 417 int32_t endNodeIx; |
| 418 int32_t startNodeIx; |
| 419 |
| 420 for (endNodeIx=0; endNodeIx<leafNodes.size(); endNodeIx++) { |
| 421 RBBINode *tNode = (RBBINode *)leafNodes.elementAt(endNodeIx); |
| 422 RBBINode *endNode = NULL; |
| 423 |
| 424 // Identify leaf nodes that correspond to overall rule match positions. |
| 425 // These include an endMarkerNode in their followPos sets. |
| 426 for (i=0; i<endMarkerNodes.size(); i++) { |
| 427 if (tNode->fFollowPos->contains(endMarkerNodes.elementAt(i))) { |
| 428 endNode = tNode; |
| 429 break; |
| 430 } |
| 431 } |
| 432 if (endNode == NULL) { |
| 433 // node wasn't an end node. Try again with the next. |
| 434 continue; |
| 435 } |
| 436 |
| 437 // We've got a node that can end a match. |
| 438 |
| 439 // Line Break Specific hack: If this node's val correspond to the $CM c
har class, |
| 440 // don't chain from it. |
| 441 // TODO: Add rule syntax for this behavior, get specifics out of here a
nd |
| 442 // into the rule file. |
| 443 if (fRB->fLBCMNoChain) { |
| 444 UChar32 c = this->fRB->fSetBuilder->getFirstChar(endNode->fVal); |
| 445 if (c != -1) { |
| 446 // c == -1 occurs with sets containing only the {eof} marker str
ing. |
| 447 ULineBreak cLBProp = (ULineBreak)u_getIntPropertyValue(c, UCHAR_
LINE_BREAK); |
| 448 if (cLBProp == U_LB_COMBINING_MARK) { |
| 449 continue; |
| 450 } |
| 451 } |
| 452 } |
| 453 |
| 454 |
| 455 // Now iterate over the nodes that can start a match, looking for ones |
| 456 // with the same char class as our ending node. |
| 457 RBBINode *startNode; |
| 458 for (startNodeIx = 0; startNodeIx<matchStartNodes->size(); startNodeIx++
) { |
| 459 startNode = (RBBINode *)matchStartNodes->elementAt(startNodeIx); |
| 460 if (startNode->fType != RBBINode::leafChar) { |
| 461 continue; |
| 462 } |
| 463 |
| 464 if (endNode->fVal == startNode->fVal) { |
| 465 // The end val (character class) of one possible match is the |
| 466 // same as the start of another. |
| 467 |
| 468 // Add all nodes from the followPos of the start node to the |
| 469 // followPos set of the end node, which will have the effect of |
| 470 // letting matches transition from a match state at endNode |
| 471 // to the second char of a match starting with startNode. |
| 472 setAdd(endNode->fFollowPos, startNode->fFollowPos); |
| 473 } |
| 474 } |
| 475 } |
| 476 } |
| 477 |
| 478 |
| 479 //----------------------------------------------------------------------------- |
| 480 // |
| 481 // bofFixup. Fixup for state tables that include {bof} beginning of input t
esting. |
| 482 // Do an swizzle similar to chaining, modifying the followPos set
of |
| 483 // the bofNode to include the followPos nodes from other {bot} no
des |
| 484 // scattered through the tree. |
| 485 // |
| 486 // This function has much in common with calcChainedFollowPos(). |
| 487 // |
| 488 //----------------------------------------------------------------------------- |
| 489 void RBBITableBuilder::bofFixup() { |
| 490 |
| 491 if (U_FAILURE(*fStatus)) { |
| 492 return; |
| 493 } |
| 494 |
| 495 // The parse tree looks like this ... |
| 496 // fTree root ---> <cat> |
| 497 // / \ . |
| 498 // <cat> <#end node> |
| 499 // / \ . |
| 500 // <bofNode> rest |
| 501 // of tree |
| 502 // |
| 503 // We will be adding things to the followPos set of the <bofNode> |
| 504 // |
| 505 RBBINode *bofNode = fTree->fLeftChild->fLeftChild; |
| 506 U_ASSERT(bofNode->fType == RBBINode::leafChar); |
| 507 U_ASSERT(bofNode->fVal == 2); |
| 508 |
| 509 // Get all nodes that can be the start a match of the user-written rules |
| 510 // (excluding the fake bofNode) |
| 511 // We want the nodes that can start a match in the |
| 512 // part labeled "rest of tree" |
| 513 // |
| 514 UVector *matchStartNodes = fTree->fLeftChild->fRightChild->fFirstPosSet; |
| 515 |
| 516 RBBINode *startNode; |
| 517 int startNodeIx; |
| 518 for (startNodeIx = 0; startNodeIx<matchStartNodes->size(); startNodeIx++) { |
| 519 startNode = (RBBINode *)matchStartNodes->elementAt(startNodeIx); |
| 520 if (startNode->fType != RBBINode::leafChar) { |
| 521 continue; |
| 522 } |
| 523 |
| 524 if (startNode->fVal == bofNode->fVal) { |
| 525 // We found a leaf node corresponding to a {bof} that was |
| 526 // explicitly written into a rule. |
| 527 // Add everything from the followPos set of this node to the |
| 528 // followPos set of the fake bofNode at the start of the tree. |
| 529 // |
| 530 setAdd(bofNode->fFollowPos, startNode->fFollowPos); |
| 531 } |
| 532 } |
| 533 } |
| 534 |
| 535 //----------------------------------------------------------------------------- |
| 536 // |
| 537 // buildStateTable() Determine the set of runtime DFA states and the |
| 538 // transition tables for these states, by the algorithm |
| 539 // of fig. 3.44 in Aho. |
| 540 // |
| 541 // Most of the comments are quotes of Aho's psuedo-code. |
| 542 // |
| 543 //----------------------------------------------------------------------------- |
| 544 void RBBITableBuilder::buildStateTable() { |
| 545 if (U_FAILURE(*fStatus)) { |
| 546 return; |
| 547 } |
| 548 RBBIStateDescriptor *failState; |
| 549 // Set it to NULL to avoid uninitialized warning |
| 550 RBBIStateDescriptor *initialState = NULL; |
| 551 // |
| 552 // Add a dummy state 0 - the stop state. Not from Aho. |
| 553 int lastInputSymbol = fRB->fSetBuilder->getNumCharCategories() - 1; |
| 554 failState = new RBBIStateDescriptor(lastInputSymbol, fStatus); |
| 555 if (failState == NULL) { |
| 556 *fStatus = U_MEMORY_ALLOCATION_ERROR; |
| 557 goto ExitBuildSTdeleteall; |
| 558 } |
| 559 failState->fPositions = new UVector(*fStatus); |
| 560 if (failState->fPositions == NULL) { |
| 561 *fStatus = U_MEMORY_ALLOCATION_ERROR; |
| 562 } |
| 563 if (failState->fPositions == NULL || U_FAILURE(*fStatus)) { |
| 564 goto ExitBuildSTdeleteall; |
| 565 } |
| 566 fDStates->addElement(failState, *fStatus); |
| 567 if (U_FAILURE(*fStatus)) { |
| 568 goto ExitBuildSTdeleteall; |
| 569 } |
| 570 |
| 571 // initially, the only unmarked state in Dstates is firstpos(root), |
| 572 // where toot is the root of the syntax tree for (r)#; |
| 573 initialState = new RBBIStateDescriptor(lastInputSymbol, fStatus); |
| 574 if (initialState == NULL) { |
| 575 *fStatus = U_MEMORY_ALLOCATION_ERROR; |
| 576 } |
| 577 if (U_FAILURE(*fStatus)) { |
| 578 goto ExitBuildSTdeleteall; |
| 579 } |
| 580 initialState->fPositions = new UVector(*fStatus); |
| 581 if (initialState->fPositions == NULL) { |
| 582 *fStatus = U_MEMORY_ALLOCATION_ERROR; |
| 583 } |
| 584 if (U_FAILURE(*fStatus)) { |
| 585 goto ExitBuildSTdeleteall; |
| 586 } |
| 587 setAdd(initialState->fPositions, fTree->fFirstPosSet); |
| 588 fDStates->addElement(initialState, *fStatus); |
| 589 if (U_FAILURE(*fStatus)) { |
| 590 goto ExitBuildSTdeleteall; |
| 591 } |
| 592 |
| 593 // while there is an unmarked state T in Dstates do begin |
| 594 for (;;) { |
| 595 RBBIStateDescriptor *T = NULL; |
| 596 int32_t tx; |
| 597 for (tx=1; tx<fDStates->size(); tx++) { |
| 598 RBBIStateDescriptor *temp; |
| 599 temp = (RBBIStateDescriptor *)fDStates->elementAt(tx); |
| 600 if (temp->fMarked == FALSE) { |
| 601 T = temp; |
| 602 break; |
| 603 } |
| 604 } |
| 605 if (T == NULL) { |
| 606 break; |
| 607 } |
| 608 |
| 609 // mark T; |
| 610 T->fMarked = TRUE; |
| 611 |
| 612 // for each input symbol a do begin |
| 613 int32_t a; |
| 614 for (a = 1; a<=lastInputSymbol; a++) { |
| 615 // let U be the set of positions that are in followpos(p) |
| 616 // for some position p in T |
| 617 // such that the symbol at position p is a; |
| 618 UVector *U = NULL; |
| 619 RBBINode *p; |
| 620 int32_t px; |
| 621 for (px=0; px<T->fPositions->size(); px++) { |
| 622 p = (RBBINode *)T->fPositions->elementAt(px); |
| 623 if ((p->fType == RBBINode::leafChar) && (p->fVal == a)) { |
| 624 if (U == NULL) { |
| 625 U = new UVector(*fStatus); |
| 626 if (U == NULL) { |
| 627 *fStatus = U_MEMORY_ALLOCATION_ERROR; |
| 628 goto ExitBuildSTdeleteall; |
| 629 } |
| 630 } |
| 631 setAdd(U, p->fFollowPos); |
| 632 } |
| 633 } |
| 634 |
| 635 // if U is not empty and not in DStates then |
| 636 int32_t ux = 0; |
| 637 UBool UinDstates = FALSE; |
| 638 if (U != NULL) { |
| 639 U_ASSERT(U->size() > 0); |
| 640 int ix; |
| 641 for (ix=0; ix<fDStates->size(); ix++) { |
| 642 RBBIStateDescriptor *temp2; |
| 643 temp2 = (RBBIStateDescriptor *)fDStates->elementAt(ix); |
| 644 if (setEquals(U, temp2->fPositions)) { |
| 645 delete U; |
| 646 U = temp2->fPositions; |
| 647 ux = ix; |
| 648 UinDstates = TRUE; |
| 649 break; |
| 650 } |
| 651 } |
| 652 |
| 653 // Add U as an unmarked state to Dstates |
| 654 if (!UinDstates) |
| 655 { |
| 656 RBBIStateDescriptor *newState = new RBBIStateDescriptor(last
InputSymbol, fStatus); |
| 657 if (newState == NULL) { |
| 658 *fStatus = U_MEMORY_ALLOCATION_ERROR; |
| 659 } |
| 660 if (U_FAILURE(*fStatus)) { |
| 661 goto ExitBuildSTdeleteall; |
| 662 } |
| 663 newState->fPositions = U; |
| 664 fDStates->addElement(newState, *fStatus); |
| 665 if (U_FAILURE(*fStatus)) { |
| 666 return; |
| 667 } |
| 668 ux = fDStates->size()-1; |
| 669 } |
| 670 |
| 671 // Dtran[T, a] := U; |
| 672 T->fDtran->setElementAt(ux, a); |
| 673 } |
| 674 } |
| 675 } |
| 676 return; |
| 677 // delete local pointers only if error occured. |
| 678 ExitBuildSTdeleteall: |
| 679 delete initialState; |
| 680 delete failState; |
| 681 } |
| 682 |
| 683 |
| 684 |
| 685 //----------------------------------------------------------------------------- |
| 686 // |
| 687 // flagAcceptingStates Identify accepting states. |
| 688 // First get a list of all of the end marker nodes. |
| 689 // Then, for each state s, |
| 690 // if s contains one of the end marker nodes in its
list of tree positions then |
| 691 // s is an accepting state. |
| 692 // |
| 693 //----------------------------------------------------------------------------- |
| 694 void RBBITableBuilder::flagAcceptingStates() { |
| 695 if (U_FAILURE(*fStatus)) { |
| 696 return; |
| 697 } |
| 698 UVector endMarkerNodes(*fStatus); |
| 699 RBBINode *endMarker; |
| 700 int32_t i; |
| 701 int32_t n; |
| 702 |
| 703 if (U_FAILURE(*fStatus)) { |
| 704 return; |
| 705 } |
| 706 |
| 707 fTree->findNodes(&endMarkerNodes, RBBINode::endMark, *fStatus); |
| 708 if (U_FAILURE(*fStatus)) { |
| 709 return; |
| 710 } |
| 711 |
| 712 for (i=0; i<endMarkerNodes.size(); i++) { |
| 713 endMarker = (RBBINode *)endMarkerNodes.elementAt(i); |
| 714 for (n=0; n<fDStates->size(); n++) { |
| 715 RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt
(n); |
| 716 if (sd->fPositions->indexOf(endMarker) >= 0) { |
| 717 // Any non-zero value for fAccepting means this is an accepting
node. |
| 718 // The value is what will be returned to the user as the break s
tatus. |
| 719 // If no other value was specified, force it to -1. |
| 720 |
| 721 if (sd->fAccepting==0) { |
| 722 // State hasn't been marked as accepting yet. Do it now. |
| 723 sd->fAccepting = endMarker->fVal; |
| 724 if (sd->fAccepting == 0) { |
| 725 sd->fAccepting = -1; |
| 726 } |
| 727 } |
| 728 if (sd->fAccepting==-1 && endMarker->fVal != 0) { |
| 729 // Both lookahead and non-lookahead accepting for this state
. |
| 730 // Favor the look-ahead. Expedient for line break. |
| 731 // TODO: need a more elegant resolution for conflicting rul
es. |
| 732 sd->fAccepting = endMarker->fVal; |
| 733 } |
| 734 // implicit else: |
| 735 // if sd->fAccepting already had a value other than 0 or -1, lea
ve it be. |
| 736 |
| 737 // If the end marker node is from a look-ahead rule, set |
| 738 // the fLookAhead field or this state also. |
| 739 if (endMarker->fLookAheadEnd) { |
| 740 // TODO: don't change value if already set? |
| 741 // TODO: allow for more than one active look-ahead rule in
engine. |
| 742 // Make value here an index to a side array in engine
? |
| 743 sd->fLookAhead = sd->fAccepting; |
| 744 } |
| 745 } |
| 746 } |
| 747 } |
| 748 } |
| 749 |
| 750 |
| 751 //----------------------------------------------------------------------------- |
| 752 // |
| 753 // flagLookAheadStates Very similar to flagAcceptingStates, above. |
| 754 // |
| 755 //----------------------------------------------------------------------------- |
| 756 void RBBITableBuilder::flagLookAheadStates() { |
| 757 if (U_FAILURE(*fStatus)) { |
| 758 return; |
| 759 } |
| 760 UVector lookAheadNodes(*fStatus); |
| 761 RBBINode *lookAheadNode; |
| 762 int32_t i; |
| 763 int32_t n; |
| 764 |
| 765 fTree->findNodes(&lookAheadNodes, RBBINode::lookAhead, *fStatus); |
| 766 if (U_FAILURE(*fStatus)) { |
| 767 return; |
| 768 } |
| 769 for (i=0; i<lookAheadNodes.size(); i++) { |
| 770 lookAheadNode = (RBBINode *)lookAheadNodes.elementAt(i); |
| 771 |
| 772 for (n=0; n<fDStates->size(); n++) { |
| 773 RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt
(n); |
| 774 if (sd->fPositions->indexOf(lookAheadNode) >= 0) { |
| 775 sd->fLookAhead = lookAheadNode->fVal; |
| 776 } |
| 777 } |
| 778 } |
| 779 } |
| 780 |
| 781 |
| 782 |
| 783 |
| 784 //----------------------------------------------------------------------------- |
| 785 // |
| 786 // flagTaggedStates |
| 787 // |
| 788 //----------------------------------------------------------------------------- |
| 789 void RBBITableBuilder::flagTaggedStates() { |
| 790 if (U_FAILURE(*fStatus)) { |
| 791 return; |
| 792 } |
| 793 UVector tagNodes(*fStatus); |
| 794 RBBINode *tagNode; |
| 795 int32_t i; |
| 796 int32_t n; |
| 797 |
| 798 if (U_FAILURE(*fStatus)) { |
| 799 return; |
| 800 } |
| 801 fTree->findNodes(&tagNodes, RBBINode::tag, *fStatus); |
| 802 if (U_FAILURE(*fStatus)) { |
| 803 return; |
| 804 } |
| 805 for (i=0; i<tagNodes.size(); i++) { // For each tag node t
(all of 'em) |
| 806 tagNode = (RBBINode *)tagNodes.elementAt(i); |
| 807 |
| 808 for (n=0; n<fDStates->size(); n++) { // For each state
s (row in the state table) |
| 809 RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt
(n); |
| 810 if (sd->fPositions->indexOf(tagNode) >= 0) { // if s include
the tag node t |
| 811 sortedAdd(&sd->fTagVals, tagNode->fVal); |
| 812 } |
| 813 } |
| 814 } |
| 815 } |
| 816 |
| 817 |
| 818 |
| 819 |
| 820 //----------------------------------------------------------------------------- |
| 821 // |
| 822 // mergeRuleStatusVals |
| 823 // |
| 824 // Update the global table of rule status {tag} values |
| 825 // The rule builder has a global vector of status values that are common |
| 826 // for all tables. Merge the ones from this table into the global set. |
| 827 // |
| 828 //----------------------------------------------------------------------------- |
| 829 void RBBITableBuilder::mergeRuleStatusVals() { |
| 830 // |
| 831 // The basic outline of what happens here is this... |
| 832 // |
| 833 // for each state in this state table |
| 834 // if the status tag list for this state is in the global statuses lis
t |
| 835 // record where and |
| 836 // continue with the next state |
| 837 // else |
| 838 // add the tag list for this state to the global list. |
| 839 // |
| 840 int i; |
| 841 int n; |
| 842 |
| 843 // Pre-set a single tag of {0} into the table. |
| 844 // We will need this as a default, for rule sets with no explicit tagging. |
| 845 if (fRB->fRuleStatusVals->size() == 0) { |
| 846 fRB->fRuleStatusVals->addElement(1, *fStatus); // Num of statuses in gr
oup |
| 847 fRB->fRuleStatusVals->addElement((int32_t)0, *fStatus); // and our si
ngle status of zero |
| 848 } |
| 849 |
| 850 // For each state |
| 851 for (n=0; n<fDStates->size(); n++) { |
| 852 RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n); |
| 853 UVector *thisStatesTagValues = sd->fTagVals; |
| 854 if (thisStatesTagValues == NULL) { |
| 855 // No tag values are explicitly associated with this state. |
| 856 // Set the default tag value. |
| 857 sd->fTagsIdx = 0; |
| 858 continue; |
| 859 } |
| 860 |
| 861 // There are tag(s) associated with this state. |
| 862 // fTagsIdx will be the index into the global tag list for this state'
s tag values. |
| 863 // Initial value of -1 flags that we haven't got it set yet. |
| 864 sd->fTagsIdx = -1; |
| 865 int32_t thisTagGroupStart = 0; // indexes into the global rule status
vals list |
| 866 int32_t nextTagGroupStart = 0; |
| 867 |
| 868 // Loop runs once per group of tags in the global list |
| 869 while (nextTagGroupStart < fRB->fRuleStatusVals->size()) { |
| 870 thisTagGroupStart = nextTagGroupStart; |
| 871 nextTagGroupStart += fRB->fRuleStatusVals->elementAti(thisTagGroupSt
art) + 1; |
| 872 if (thisStatesTagValues->size() != fRB->fRuleStatusVals->elementAti(
thisTagGroupStart)) { |
| 873 // The number of tags for this state is different from |
| 874 // the number of tags in this group from the global list. |
| 875 // Continue with the next group from the global list. |
| 876 continue; |
| 877 } |
| 878 // The lengths match, go ahead and compare the actual tag values |
| 879 // between this state and the group from the global list. |
| 880 for (i=0; i<thisStatesTagValues->size(); i++) { |
| 881 if (thisStatesTagValues->elementAti(i) != |
| 882 fRB->fRuleStatusVals->elementAti(thisTagGroupStart + 1 + i)
) { |
| 883 // Mismatch. |
| 884 break; |
| 885 } |
| 886 } |
| 887 |
| 888 if (i == thisStatesTagValues->size()) { |
| 889 // We found a set of tag values in the global list that match |
| 890 // those for this state. Use them. |
| 891 sd->fTagsIdx = thisTagGroupStart; |
| 892 break; |
| 893 } |
| 894 } |
| 895 |
| 896 if (sd->fTagsIdx == -1) { |
| 897 // No suitable entry in the global tag list already. Add one |
| 898 sd->fTagsIdx = fRB->fRuleStatusVals->size(); |
| 899 fRB->fRuleStatusVals->addElement(thisStatesTagValues->size(), *fStat
us); |
| 900 for (i=0; i<thisStatesTagValues->size(); i++) { |
| 901 fRB->fRuleStatusVals->addElement(thisStatesTagValues->elementAti
(i), *fStatus); |
| 902 } |
| 903 } |
| 904 } |
| 905 } |
| 906 |
| 907 |
| 908 |
| 909 |
| 910 |
| 911 |
| 912 |
| 913 //----------------------------------------------------------------------------- |
| 914 // |
| 915 // sortedAdd Add a value to a vector of sorted values (ints). |
| 916 // Do not replicate entries; if the value is already there, do not |
| 917 // add a second one. |
| 918 // Lazily create the vector if it does not already exist. |
| 919 // |
| 920 //----------------------------------------------------------------------------- |
| 921 void RBBITableBuilder::sortedAdd(UVector **vector, int32_t val) { |
| 922 int32_t i; |
| 923 |
| 924 if (*vector == NULL) { |
| 925 *vector = new UVector(*fStatus); |
| 926 } |
| 927 if (*vector == NULL || U_FAILURE(*fStatus)) { |
| 928 return; |
| 929 } |
| 930 UVector *vec = *vector; |
| 931 int32_t vSize = vec->size(); |
| 932 for (i=0; i<vSize; i++) { |
| 933 int32_t valAtI = vec->elementAti(i); |
| 934 if (valAtI == val) { |
| 935 // The value is already in the vector. Don't add it again. |
| 936 return; |
| 937 } |
| 938 if (valAtI > val) { |
| 939 break; |
| 940 } |
| 941 } |
| 942 vec->insertElementAt(val, i, *fStatus); |
| 943 } |
| 944 |
| 945 |
| 946 |
| 947 //----------------------------------------------------------------------------- |
| 948 // |
| 949 // setAdd Set operation on UVector |
| 950 // dest = dest union source |
| 951 // Elements may only appear once and must be sorted. |
| 952 // |
| 953 //----------------------------------------------------------------------------- |
| 954 void RBBITableBuilder::setAdd(UVector *dest, UVector *source) { |
| 955 int32_t destOriginalSize = dest->size(); |
| 956 int32_t sourceSize = source->size(); |
| 957 int32_t di = 0; |
| 958 MaybeStackArray<void *, 16> destArray, sourceArray; // Handle small cases w
ithout malloc |
| 959 void **destPtr, **sourcePtr; |
| 960 void **destLim, **sourceLim; |
| 961 |
| 962 if (destOriginalSize > destArray.getCapacity()) { |
| 963 if (destArray.resize(destOriginalSize) == NULL) { |
| 964 return; |
| 965 } |
| 966 } |
| 967 destPtr = destArray.getAlias(); |
| 968 destLim = destPtr + destOriginalSize; // destArray.getArrayLimit()? |
| 969 |
| 970 if (sourceSize > sourceArray.getCapacity()) { |
| 971 if (sourceArray.resize(sourceSize) == NULL) { |
| 972 return; |
| 973 } |
| 974 } |
| 975 sourcePtr = sourceArray.getAlias(); |
| 976 sourceLim = sourcePtr + sourceSize; // sourceArray.getArrayLimit()? |
| 977 |
| 978 // Avoid multiple "get element" calls by getting the contents into arrays |
| 979 (void) dest->toArray(destPtr); |
| 980 (void) source->toArray(sourcePtr); |
| 981 |
| 982 dest->setSize(sourceSize+destOriginalSize, *fStatus); |
| 983 |
| 984 while (sourcePtr < sourceLim && destPtr < destLim) { |
| 985 if (*destPtr == *sourcePtr) { |
| 986 dest->setElementAt(*sourcePtr++, di++); |
| 987 destPtr++; |
| 988 } |
| 989 // This check is required for machines with segmented memory, like i5/OS
. |
| 990 // Direct pointer comparison is not recommended. |
| 991 else if (uprv_memcmp(destPtr, sourcePtr, sizeof(void *)) < 0) { |
| 992 dest->setElementAt(*destPtr++, di++); |
| 993 } |
| 994 else { /* *sourcePtr < *destPtr */ |
| 995 dest->setElementAt(*sourcePtr++, di++); |
| 996 } |
| 997 } |
| 998 |
| 999 // At most one of these two cleanup loops will execute |
| 1000 while (destPtr < destLim) { |
| 1001 dest->setElementAt(*destPtr++, di++); |
| 1002 } |
| 1003 while (sourcePtr < sourceLim) { |
| 1004 dest->setElementAt(*sourcePtr++, di++); |
| 1005 } |
| 1006 |
| 1007 dest->setSize(di, *fStatus); |
| 1008 } |
| 1009 |
| 1010 |
| 1011 |
| 1012 //----------------------------------------------------------------------------- |
| 1013 // |
| 1014 // setEqual Set operation on UVector. |
| 1015 // Compare for equality. |
| 1016 // Elements must be sorted. |
| 1017 // |
| 1018 //----------------------------------------------------------------------------- |
| 1019 UBool RBBITableBuilder::setEquals(UVector *a, UVector *b) { |
| 1020 return a->equals(*b); |
| 1021 } |
| 1022 |
| 1023 |
| 1024 //----------------------------------------------------------------------------- |
| 1025 // |
| 1026 // printPosSets Debug function. Dump Nullable, firstpos, lastpos and followp
os |
| 1027 // for each node in the tree. |
| 1028 // |
| 1029 //----------------------------------------------------------------------------- |
| 1030 #ifdef RBBI_DEBUG |
| 1031 void RBBITableBuilder::printPosSets(RBBINode *n) { |
| 1032 if (n==NULL) { |
| 1033 return; |
| 1034 } |
| 1035 n->printNode(); |
| 1036 RBBIDebugPrintf(" Nullable: %s\n", n->fNullable?"TRUE":"FALSE"); |
| 1037 |
| 1038 RBBIDebugPrintf(" firstpos: "); |
| 1039 printSet(n->fFirstPosSet); |
| 1040 |
| 1041 RBBIDebugPrintf(" lastpos: "); |
| 1042 printSet(n->fLastPosSet); |
| 1043 |
| 1044 RBBIDebugPrintf(" followpos: "); |
| 1045 printSet(n->fFollowPos); |
| 1046 |
| 1047 printPosSets(n->fLeftChild); |
| 1048 printPosSets(n->fRightChild); |
| 1049 } |
| 1050 #endif |
| 1051 |
| 1052 |
| 1053 |
| 1054 //----------------------------------------------------------------------------- |
| 1055 // |
| 1056 // getTableSize() Calculate the size of the runtime form of this |
| 1057 // state transition table. |
| 1058 // |
| 1059 //----------------------------------------------------------------------------- |
| 1060 int32_t RBBITableBuilder::getTableSize() const { |
| 1061 int32_t size = 0; |
| 1062 int32_t numRows; |
| 1063 int32_t numCols; |
| 1064 int32_t rowSize; |
| 1065 |
| 1066 if (fTree == NULL) { |
| 1067 return 0; |
| 1068 } |
| 1069 |
| 1070 size = sizeof(RBBIStateTable) - 4; // The header, with no rows to the
table. |
| 1071 |
| 1072 numRows = fDStates->size(); |
| 1073 numCols = fRB->fSetBuilder->getNumCharCategories(); |
| 1074 |
| 1075 // Note The declaration of RBBIStateTableRow is for a table of two columns
. |
| 1076 // Therefore we subtract two from numCols when determining |
| 1077 // how much storage to add to a row for the total columns. |
| 1078 rowSize = sizeof(RBBIStateTableRow) + sizeof(uint16_t)*(numCols-2); |
| 1079 size += numRows * rowSize; |
| 1080 return size; |
| 1081 } |
| 1082 |
| 1083 |
| 1084 |
| 1085 //----------------------------------------------------------------------------- |
| 1086 // |
| 1087 // exportTable() export the state transition table in the format required |
| 1088 // by the runtime engine. getTableSize() bytes of memory |
| 1089 // must be available at the output address "where". |
| 1090 // |
| 1091 //----------------------------------------------------------------------------- |
| 1092 void RBBITableBuilder::exportTable(void *where) { |
| 1093 RBBIStateTable *table = (RBBIStateTable *)where; |
| 1094 uint32_t state; |
| 1095 int col; |
| 1096 |
| 1097 if (U_FAILURE(*fStatus) || fTree == NULL) { |
| 1098 return; |
| 1099 } |
| 1100 |
| 1101 if (fRB->fSetBuilder->getNumCharCategories() > 0x7fff || |
| 1102 fDStates->size() > 0x7fff) { |
| 1103 *fStatus = U_BRK_INTERNAL_ERROR; |
| 1104 return; |
| 1105 } |
| 1106 |
| 1107 table->fRowLen = sizeof(RBBIStateTableRow) + |
| 1108 sizeof(uint16_t) * (fRB->fSetBuilder->getNumCharCate
gories() - 2); |
| 1109 table->fNumStates = fDStates->size(); |
| 1110 table->fFlags = 0; |
| 1111 if (fRB->fLookAheadHardBreak) { |
| 1112 table->fFlags |= RBBI_LOOKAHEAD_HARD_BREAK; |
| 1113 } |
| 1114 if (fRB->fSetBuilder->sawBOF()) { |
| 1115 table->fFlags |= RBBI_BOF_REQUIRED; |
| 1116 } |
| 1117 table->fReserved = 0; |
| 1118 |
| 1119 for (state=0; state<table->fNumStates; state++) { |
| 1120 RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(sta
te); |
| 1121 RBBIStateTableRow *row = (RBBIStateTableRow *)(table->fTableData + sta
te*table->fRowLen); |
| 1122 U_ASSERT (-32768 < sd->fAccepting && sd->fAccepting <= 32767); |
| 1123 U_ASSERT (-32768 < sd->fLookAhead && sd->fLookAhead <= 32767); |
| 1124 row->fAccepting = (int16_t)sd->fAccepting; |
| 1125 row->fLookAhead = (int16_t)sd->fLookAhead; |
| 1126 row->fTagIdx = (int16_t)sd->fTagsIdx; |
| 1127 for (col=0; col<fRB->fSetBuilder->getNumCharCategories(); col++) { |
| 1128 row->fNextState[col] = (uint16_t)sd->fDtran->elementAti(col); |
| 1129 } |
| 1130 } |
| 1131 } |
| 1132 |
| 1133 |
| 1134 |
| 1135 //----------------------------------------------------------------------------- |
| 1136 // |
| 1137 // printSet Debug function. Print the contents of a UVector |
| 1138 // |
| 1139 //----------------------------------------------------------------------------- |
| 1140 #ifdef RBBI_DEBUG |
| 1141 void RBBITableBuilder::printSet(UVector *s) { |
| 1142 int32_t i; |
| 1143 for (i=0; i<s->size(); i++) { |
| 1144 void *v = s->elementAt(i); |
| 1145 RBBIDebugPrintf("%10p", v); |
| 1146 } |
| 1147 RBBIDebugPrintf("\n"); |
| 1148 } |
| 1149 #endif |
| 1150 |
| 1151 |
| 1152 //----------------------------------------------------------------------------- |
| 1153 // |
| 1154 // printStates Debug Function. Dump the fully constructed state transition
table. |
| 1155 // |
| 1156 //----------------------------------------------------------------------------- |
| 1157 #ifdef RBBI_DEBUG |
| 1158 void RBBITableBuilder::printStates() { |
| 1159 int c; // input "character" |
| 1160 int n; // state number |
| 1161 |
| 1162 RBBIDebugPrintf("state | i n p u t s y m b o l s \n"); |
| 1163 RBBIDebugPrintf(" | Acc LA Tag"); |
| 1164 for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { |
| 1165 RBBIDebugPrintf(" %2d", c); |
| 1166 } |
| 1167 RBBIDebugPrintf("\n"); |
| 1168 RBBIDebugPrintf(" |---------------"); |
| 1169 for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { |
| 1170 RBBIDebugPrintf("---"); |
| 1171 } |
| 1172 RBBIDebugPrintf("\n"); |
| 1173 |
| 1174 for (n=0; n<fDStates->size(); n++) { |
| 1175 RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n); |
| 1176 RBBIDebugPrintf(" %3d | " , n); |
| 1177 RBBIDebugPrintf("%3d %3d %5d ", sd->fAccepting, sd->fLookAhead, sd->fTag
sIdx); |
| 1178 for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { |
| 1179 RBBIDebugPrintf(" %2d", sd->fDtran->elementAti(c)); |
| 1180 } |
| 1181 RBBIDebugPrintf("\n"); |
| 1182 } |
| 1183 RBBIDebugPrintf("\n\n"); |
| 1184 } |
| 1185 #endif |
| 1186 |
| 1187 |
| 1188 |
| 1189 //----------------------------------------------------------------------------- |
| 1190 // |
| 1191 // printRuleStatusTable Debug Function. Dump the common rule status table |
| 1192 // |
| 1193 //----------------------------------------------------------------------------- |
| 1194 #ifdef RBBI_DEBUG |
| 1195 void RBBITableBuilder::printRuleStatusTable() { |
| 1196 int32_t thisRecord = 0; |
| 1197 int32_t nextRecord = 0; |
| 1198 int i; |
| 1199 UVector *tbl = fRB->fRuleStatusVals; |
| 1200 |
| 1201 RBBIDebugPrintf("index | tags \n"); |
| 1202 RBBIDebugPrintf("-------------------\n"); |
| 1203 |
| 1204 while (nextRecord < tbl->size()) { |
| 1205 thisRecord = nextRecord; |
| 1206 nextRecord = thisRecord + tbl->elementAti(thisRecord) + 1; |
| 1207 RBBIDebugPrintf("%4d ", thisRecord); |
| 1208 for (i=thisRecord+1; i<nextRecord; i++) { |
| 1209 RBBIDebugPrintf(" %5d", tbl->elementAti(i)); |
| 1210 } |
| 1211 RBBIDebugPrintf("\n"); |
| 1212 } |
| 1213 RBBIDebugPrintf("\n\n"); |
| 1214 } |
| 1215 #endif |
| 1216 |
| 1217 |
| 1218 //----------------------------------------------------------------------------- |
| 1219 // |
| 1220 // RBBIStateDescriptor Methods. This is a very struct-like class |
| 1221 // Most access is directly to the fields. |
| 1222 // |
| 1223 //----------------------------------------------------------------------------- |
| 1224 |
| 1225 RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatu
s) { |
| 1226 fMarked = FALSE; |
| 1227 fAccepting = 0; |
| 1228 fLookAhead = 0; |
| 1229 fTagsIdx = 0; |
| 1230 fTagVals = NULL; |
| 1231 fPositions = NULL; |
| 1232 fDtran = NULL; |
| 1233 |
| 1234 fDtran = new UVector(lastInputSymbol+1, *fStatus); |
| 1235 if (U_FAILURE(*fStatus)) { |
| 1236 return; |
| 1237 } |
| 1238 if (fDtran == NULL) { |
| 1239 *fStatus = U_MEMORY_ALLOCATION_ERROR; |
| 1240 return; |
| 1241 } |
| 1242 fDtran->setSize(lastInputSymbol+1, *fStatus); // fDtran needs to be pre-s
ized. |
| 1243 // It is indexed by input symbols,
and will |
| 1244 // hold the next state number for
each |
| 1245 // symbol. |
| 1246 } |
| 1247 |
| 1248 |
| 1249 RBBIStateDescriptor::~RBBIStateDescriptor() { |
| 1250 delete fPositions; |
| 1251 delete fDtran; |
| 1252 delete fTagVals; |
| 1253 fPositions = NULL; |
| 1254 fDtran = NULL; |
| 1255 fTagVals = NULL; |
| 1256 } |
| 1257 |
| 1258 U_NAMESPACE_END |
| 1259 |
| 1260 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
OLD | NEW |