OLD | NEW |
1 | |
2 // | 1 // |
3 // file: rbbiscan.cpp | 2 // file: rbbiscan.cpp |
4 // | 3 // |
5 // Copyright (C) 2002-2014, International Business Machines Corporation and oth
ers. | 4 // Copyright (C) 2002-2015, International Business Machines Corporation and oth
ers. |
6 // All Rights Reserved. | 5 // All Rights Reserved. |
7 // | 6 // |
8 // This file contains the Rule Based Break Iterator Rule Builder functions for | 7 // This file contains the Rule Based Break Iterator Rule Builder functions for |
9 // scanning the rules and assembling a parse tree. This is the first phase | 8 // scanning the rules and assembling a parse tree. This is the first phase |
10 // of compiling the rules. | 9 // of compiling the rules. |
11 // | 10 // |
12 // The overall of the rules is managed by class RBBIRuleBuilder, which will | 11 // The overall of the rules is managed by class RBBIRuleBuilder, which will |
13 // create and use an instance of this class as part of the process. | 12 // create and use an instance of this class as part of the process. |
14 // | 13 // |
15 | 14 |
(...skipping 188 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
204 pushNewNode(RBBINode::opStart); | 203 pushNewNode(RBBINode::opStart); |
205 fRuleNum++; | 204 fRuleNum++; |
206 break; | 205 break; |
207 | 206 |
208 | 207 |
209 case doExprOrOperator: | 208 case doExprOrOperator: |
210 { | 209 { |
211 fixOpStack(RBBINode::precOpCat); | 210 fixOpStack(RBBINode::precOpCat); |
212 RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; | 211 RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; |
213 RBBINode *orNode = pushNewNode(RBBINode::opOr); | 212 RBBINode *orNode = pushNewNode(RBBINode::opOr); |
| 213 if (U_FAILURE(*fRB->fStatus)) { |
| 214 break; |
| 215 } |
214 orNode->fLeftChild = operandNode; | 216 orNode->fLeftChild = operandNode; |
215 operandNode->fParent = orNode; | 217 operandNode->fParent = orNode; |
216 } | 218 } |
217 break; | 219 break; |
218 | 220 |
219 case doExprCatOperator: | 221 case doExprCatOperator: |
220 // concatenation operator. | 222 // concatenation operator. |
221 // For the implicit concatenation of adjacent terms in an expression tha
t are | 223 // For the implicit concatenation of adjacent terms in an expression tha
t are |
222 // not separated by any other operator. Action is invoked between the | 224 // not separated by any other operator. Action is invoked between the |
223 // actions for the two terms. | 225 // actions for the two terms. |
224 { | 226 { |
225 fixOpStack(RBBINode::precOpCat); | 227 fixOpStack(RBBINode::precOpCat); |
226 RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; | 228 RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; |
227 RBBINode *catNode = pushNewNode(RBBINode::opCat); | 229 RBBINode *catNode = pushNewNode(RBBINode::opCat); |
| 230 if (U_FAILURE(*fRB->fStatus)) { |
| 231 break; |
| 232 } |
228 catNode->fLeftChild = operandNode; | 233 catNode->fLeftChild = operandNode; |
229 operandNode->fParent = catNode; | 234 operandNode->fParent = catNode; |
230 } | 235 } |
231 break; | 236 break; |
232 | 237 |
233 case doLParen: | 238 case doLParen: |
234 // Open Paren. | 239 // Open Paren. |
235 // The openParen node is a dummy operation type with a low precedence, | 240 // The openParen node is a dummy operation type with a low precedence, |
236 // which has the affect of ensuring that any real binary op that | 241 // which has the affect of ensuring that any real binary op that |
237 // follows within the parens binds more tightly to the operands than | 242 // follows within the parens binds more tightly to the operands than |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
313 if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rtree")) {printNodeSt
ack("end of rule");} | 318 if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rtree")) {printNodeSt
ack("end of rule");} |
314 #endif | 319 #endif |
315 U_ASSERT(fNodeStackPtr == 1); | 320 U_ASSERT(fNodeStackPtr == 1); |
316 | 321 |
317 // If this rule includes a look-ahead '/', add a endMark node to the | 322 // If this rule includes a look-ahead '/', add a endMark node to the |
318 // expression tree. | 323 // expression tree. |
319 if (fLookAheadRule) { | 324 if (fLookAheadRule) { |
320 RBBINode *thisRule = fNodeStack[fNodeStackPtr]; | 325 RBBINode *thisRule = fNodeStack[fNodeStackPtr]; |
321 RBBINode *endNode = pushNewNode(RBBINode::endMark); | 326 RBBINode *endNode = pushNewNode(RBBINode::endMark); |
322 RBBINode *catNode = pushNewNode(RBBINode::opCat); | 327 RBBINode *catNode = pushNewNode(RBBINode::opCat); |
| 328 if (U_FAILURE(*fRB->fStatus)) { |
| 329 break; |
| 330 } |
323 fNodeStackPtr -= 2; | 331 fNodeStackPtr -= 2; |
324 catNode->fLeftChild = thisRule; | 332 catNode->fLeftChild = thisRule; |
325 catNode->fRightChild = endNode; | 333 catNode->fRightChild = endNode; |
326 fNodeStack[fNodeStackPtr] = catNode; | 334 fNodeStack[fNodeStackPtr] = catNode; |
327 endNode->fVal = fRuleNum; | 335 endNode->fVal = fRuleNum; |
328 endNode->fLookAheadEnd = TRUE; | 336 endNode->fLookAheadEnd = TRUE; |
329 } | 337 } |
330 | 338 |
331 // All rule expressions are ORed together. | 339 // All rule expressions are ORed together. |
332 // The ';' that terminates an expression really just functions as a '|'
with | 340 // The ';' that terminates an expression really just functions as a '|'
with |
333 // a low operator prededence. | 341 // a low operator prededence. |
334 // | 342 // |
335 // Each of the four sets of rules are collected separately. | 343 // Each of the four sets of rules are collected separately. |
336 // (forward, reverse, safe_forward, safe_reverse) | 344 // (forward, reverse, safe_forward, safe_reverse) |
337 // OR this rule into the appropriate group of them. | 345 // OR this rule into the appropriate group of them. |
338 // | 346 // |
339 RBBINode **destRules = (fReverseRule? &fRB->fReverseTree : fRB->fDefault
Tree); | 347 RBBINode **destRules = (fReverseRule? &fRB->fReverseTree : fRB->fDefault
Tree); |
340 | 348 |
341 if (*destRules != NULL) { | 349 if (*destRules != NULL) { |
342 // This is not the first rule encounted. | 350 // This is not the first rule encounted. |
343 // OR previous stuff (from *destRules) | 351 // OR previous stuff (from *destRules) |
344 // with the current rule expression (on the Node Stack) | 352 // with the current rule expression (on the Node Stack) |
345 // with the resulting OR expression going to *destRules | 353 // with the resulting OR expression going to *destRules |
346 // | 354 // |
347 RBBINode *thisRule = fNodeStack[fNodeStackPtr]; | 355 RBBINode *thisRule = fNodeStack[fNodeStackPtr]; |
348 RBBINode *prevRules = *destRules; | 356 RBBINode *prevRules = *destRules; |
349 RBBINode *orNode = pushNewNode(RBBINode::opOr); | 357 RBBINode *orNode = pushNewNode(RBBINode::opOr); |
| 358 if (U_FAILURE(*fRB->fStatus)) { |
| 359 break; |
| 360 } |
350 orNode->fLeftChild = prevRules; | 361 orNode->fLeftChild = prevRules; |
351 prevRules->fParent = orNode; | 362 prevRules->fParent = orNode; |
352 orNode->fRightChild = thisRule; | 363 orNode->fRightChild = thisRule; |
353 thisRule->fParent = orNode; | 364 thisRule->fParent = orNode; |
354 *destRules = orNode; | 365 *destRules = orNode; |
355 } | 366 } |
356 else | 367 else |
357 { | 368 { |
358 // This is the first rule encountered (for this direction). | 369 // This is the first rule encountered (for this direction). |
359 // Just move its parse tree from the stack to *destRules. | 370 // Just move its parse tree from the stack to *destRules. |
(...skipping 20 matching lines...) Expand all Loading... |
380 // | 391 // |
381 // Unary operands + ? * | 392 // Unary operands + ? * |
382 // These all appear after the operand to which they apply. | 393 // These all appear after the operand to which they apply. |
383 // When we hit one, the operand (may be a whole sub expression) | 394 // When we hit one, the operand (may be a whole sub expression) |
384 // will be on the top of the stack. | 395 // will be on the top of the stack. |
385 // Unary Operator becomes TOS, with the old TOS as its one child. | 396 // Unary Operator becomes TOS, with the old TOS as its one child. |
386 case doUnaryOpPlus: | 397 case doUnaryOpPlus: |
387 { | 398 { |
388 RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; | 399 RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; |
389 RBBINode *plusNode = pushNewNode(RBBINode::opPlus); | 400 RBBINode *plusNode = pushNewNode(RBBINode::opPlus); |
| 401 if (U_FAILURE(*fRB->fStatus)) { |
| 402 break; |
| 403 } |
390 plusNode->fLeftChild = operandNode; | 404 plusNode->fLeftChild = operandNode; |
391 operandNode->fParent = plusNode; | 405 operandNode->fParent = plusNode; |
392 } | 406 } |
393 break; | 407 break; |
394 | 408 |
395 case doUnaryOpQuestion: | 409 case doUnaryOpQuestion: |
396 { | 410 { |
397 RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; | 411 RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; |
398 RBBINode *qNode = pushNewNode(RBBINode::opQuestion); | 412 RBBINode *qNode = pushNewNode(RBBINode::opQuestion); |
| 413 if (U_FAILURE(*fRB->fStatus)) { |
| 414 break; |
| 415 } |
399 qNode->fLeftChild = operandNode; | 416 qNode->fLeftChild = operandNode; |
400 operandNode->fParent = qNode; | 417 operandNode->fParent = qNode; |
401 } | 418 } |
402 break; | 419 break; |
403 | 420 |
404 case doUnaryOpStar: | 421 case doUnaryOpStar: |
405 { | 422 { |
406 RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; | 423 RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; |
407 RBBINode *starNode = pushNewNode(RBBINode::opStar); | 424 RBBINode *starNode = pushNewNode(RBBINode::opStar); |
| 425 if (U_FAILURE(*fRB->fStatus)) { |
| 426 break; |
| 427 } |
408 starNode->fLeftChild = operandNode; | 428 starNode->fLeftChild = operandNode; |
409 operandNode->fParent = starNode; | 429 operandNode->fParent = starNode; |
410 } | 430 } |
411 break; | 431 break; |
412 | 432 |
413 case doRuleChar: | 433 case doRuleChar: |
414 // A "Rule Character" is any single character that is a literal part | 434 // A "Rule Character" is any single character that is a literal part |
415 // of the regular expression. Like a, b and c in the expression "(abc*)
| [:L:]" | 435 // of the regular expression. Like a, b and c in the expression "(abc*)
| [:L:]" |
416 // These are pretty uncommon in break rules; the terms are more commonly | 436 // These are pretty uncommon in break rules; the terms are more commonly |
417 // sets. To keep things uniform, treat these characters like as | 437 // sets. To keep things uniform, treat these characters like as |
418 // sets that just happen to contain only one character. | 438 // sets that just happen to contain only one character. |
419 { | 439 { |
420 n = pushNewNode(RBBINode::setRef); | 440 n = pushNewNode(RBBINode::setRef); |
| 441 if (U_FAILURE(*fRB->fStatus)) { |
| 442 break; |
| 443 } |
421 findSetFor(UnicodeString(fC.fChar), n); | 444 findSetFor(UnicodeString(fC.fChar), n); |
422 n->fFirstPos = fScanIndex; | 445 n->fFirstPos = fScanIndex; |
423 n->fLastPos = fNextIndex; | 446 n->fLastPos = fNextIndex; |
424 fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); | 447 fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); |
425 break; | 448 break; |
426 } | 449 } |
427 | 450 |
428 case doDotAny: | 451 case doDotAny: |
429 // scanned a ".", meaning match any single character. | 452 // scanned a ".", meaning match any single character. |
430 { | 453 { |
431 n = pushNewNode(RBBINode::setRef); | 454 n = pushNewNode(RBBINode::setRef); |
| 455 if (U_FAILURE(*fRB->fStatus)) { |
| 456 break; |
| 457 } |
432 findSetFor(UnicodeString(TRUE, kAny, 3), n); | 458 findSetFor(UnicodeString(TRUE, kAny, 3), n); |
433 n->fFirstPos = fScanIndex; | 459 n->fFirstPos = fScanIndex; |
434 n->fLastPos = fNextIndex; | 460 n->fLastPos = fNextIndex; |
435 fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); | 461 fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); |
436 break; | 462 break; |
437 } | 463 } |
438 | 464 |
439 case doSlash: | 465 case doSlash: |
440 // Scanned a '/', which identifies a look-ahead break position in a rule
. | 466 // Scanned a '/', which identifies a look-ahead break position in a rule
. |
441 n = pushNewNode(RBBINode::lookAhead); | 467 n = pushNewNode(RBBINode::lookAhead); |
| 468 if (U_FAILURE(*fRB->fStatus)) { |
| 469 break; |
| 470 } |
442 n->fVal = fRuleNum; | 471 n->fVal = fRuleNum; |
443 n->fFirstPos = fScanIndex; | 472 n->fFirstPos = fScanIndex; |
444 n->fLastPos = fNextIndex; | 473 n->fLastPos = fNextIndex; |
445 fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); | 474 fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); |
446 fLookAheadRule = TRUE; | 475 fLookAheadRule = TRUE; |
447 break; | 476 break; |
448 | 477 |
449 | 478 |
450 case doStartTagValue: | 479 case doStartTagValue: |
451 // Scanned a '{', the opening delimiter for a tag value within a rule. | 480 // Scanned a '{', the opening delimiter for a tag value within a rule. |
452 n = pushNewNode(RBBINode::tag); | 481 n = pushNewNode(RBBINode::tag); |
| 482 if (U_FAILURE(*fRB->fStatus)) { |
| 483 break; |
| 484 } |
453 n->fVal = 0; | 485 n->fVal = 0; |
454 n->fFirstPos = fScanIndex; | 486 n->fFirstPos = fScanIndex; |
455 n->fLastPos = fNextIndex; | 487 n->fLastPos = fNextIndex; |
456 break; | 488 break; |
457 | 489 |
458 case doTagDigit: | 490 case doTagDigit: |
459 // Just scanned a decimal digit that's part of a tag value | 491 // Just scanned a decimal digit that's part of a tag value |
460 { | 492 { |
461 n = fNodeStack[fNodeStackPtr]; | 493 n = fNodeStack[fNodeStackPtr]; |
462 uint32_t v = u_charDigitValue(fC.fChar); | 494 uint32_t v = u_charDigitValue(fC.fChar); |
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
553 | 585 |
554 case doScanUnicodeSet: | 586 case doScanUnicodeSet: |
555 scanSet(); | 587 scanSet(); |
556 break; | 588 break; |
557 | 589 |
558 default: | 590 default: |
559 error(U_BRK_INTERNAL_ERROR); | 591 error(U_BRK_INTERNAL_ERROR); |
560 returnVal = FALSE; | 592 returnVal = FALSE; |
561 break; | 593 break; |
562 } | 594 } |
563 return returnVal; | 595 return returnVal && U_SUCCESS(*fRB->fStatus); |
564 } | 596 } |
565 | 597 |
566 | 598 |
567 | 599 |
568 | 600 |
569 //------------------------------------------------------------------------------ | 601 //------------------------------------------------------------------------------ |
570 // | 602 // |
571 // Error Report a rule parse error. | 603 // Error Report a rule parse error. |
572 // Only report it if no previous error has been recorded. | 604 // Only report it if no previous error has been recorded. |
573 // | 605 // |
(...skipping 470 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1044 } | 1076 } |
1045 | 1077 |
1046 } | 1078 } |
1047 | 1079 |
1048 // | 1080 // |
1049 // If there were NO user specified reverse rules, set up the equivalent of "
.*;" | 1081 // If there were NO user specified reverse rules, set up the equivalent of "
.*;" |
1050 // | 1082 // |
1051 if (fRB->fReverseTree == NULL) { | 1083 if (fRB->fReverseTree == NULL) { |
1052 fRB->fReverseTree = pushNewNode(RBBINode::opStar); | 1084 fRB->fReverseTree = pushNewNode(RBBINode::opStar); |
1053 RBBINode *operand = pushNewNode(RBBINode::setRef); | 1085 RBBINode *operand = pushNewNode(RBBINode::setRef); |
| 1086 if (U_FAILURE(*fRB->fStatus)) { |
| 1087 return; |
| 1088 } |
1054 findSetFor(UnicodeString(TRUE, kAny, 3), operand); | 1089 findSetFor(UnicodeString(TRUE, kAny, 3), operand); |
1055 fRB->fReverseTree->fLeftChild = operand; | 1090 fRB->fReverseTree->fLeftChild = operand; |
1056 operand->fParent = fRB->fReverseTree; | 1091 operand->fParent = fRB->fReverseTree; |
1057 fNodeStackPtr -= 2; | 1092 fNodeStackPtr -= 2; |
1058 } | 1093 } |
1059 | 1094 |
1060 | 1095 |
1061 // | 1096 // |
1062 // Parsing of the input RBBI rules is complete. | 1097 // Parsing of the input RBBI rules is complete. |
1063 // We now have a parse tree for the rule expressions | 1098 // We now have a parse tree for the rule expressions |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1096 | 1131 |
1097 | 1132 |
1098 | 1133 |
1099 //------------------------------------------------------------------------------ | 1134 //------------------------------------------------------------------------------ |
1100 // | 1135 // |
1101 // pushNewNode create a new RBBINode of the specified type and push it | 1136 // pushNewNode create a new RBBINode of the specified type and push it |
1102 // onto the stack of nodes. | 1137 // onto the stack of nodes. |
1103 // | 1138 // |
1104 //------------------------------------------------------------------------------ | 1139 //------------------------------------------------------------------------------ |
1105 RBBINode *RBBIRuleScanner::pushNewNode(RBBINode::NodeType t) { | 1140 RBBINode *RBBIRuleScanner::pushNewNode(RBBINode::NodeType t) { |
| 1141 if (U_FAILURE(*fRB->fStatus)) { |
| 1142 return NULL; |
| 1143 } |
1106 fNodeStackPtr++; | 1144 fNodeStackPtr++; |
1107 if (fNodeStackPtr >= kStackSize) { | 1145 if (fNodeStackPtr >= kStackSize) { |
1108 error(U_BRK_INTERNAL_ERROR); | 1146 error(U_BRK_INTERNAL_ERROR); |
1109 RBBIDebugPuts("RBBIRuleScanner::pushNewNode - stack overflow."); | 1147 RBBIDebugPuts("RBBIRuleScanner::pushNewNode - stack overflow."); |
1110 *fRB->fStatus = U_BRK_INTERNAL_ERROR; | 1148 *fRB->fStatus = U_BRK_INTERNAL_ERROR; |
1111 return NULL; | 1149 return NULL; |
1112 } | 1150 } |
1113 fNodeStack[fNodeStackPtr] = new RBBINode(t); | 1151 fNodeStack[fNodeStackPtr] = new RBBINode(t); |
1114 if (fNodeStack[fNodeStackPtr] == NULL) { | 1152 if (fNodeStack[fNodeStackPtr] == NULL) { |
1115 *fRB->fStatus = U_MEMORY_ALLOCATION_ERROR; | 1153 *fRB->fStatus = U_MEMORY_ALLOCATION_ERROR; |
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1185 if (fNextIndex >= i) { | 1223 if (fNextIndex >= i) { |
1186 break; | 1224 break; |
1187 } | 1225 } |
1188 nextCharLL(); | 1226 nextCharLL(); |
1189 } | 1227 } |
1190 | 1228 |
1191 if (U_SUCCESS(*fRB->fStatus)) { | 1229 if (U_SUCCESS(*fRB->fStatus)) { |
1192 RBBINode *n; | 1230 RBBINode *n; |
1193 | 1231 |
1194 n = pushNewNode(RBBINode::setRef); | 1232 n = pushNewNode(RBBINode::setRef); |
| 1233 if (U_FAILURE(*fRB->fStatus)) { |
| 1234 return; |
| 1235 } |
1195 n->fFirstPos = startPos; | 1236 n->fFirstPos = startPos; |
1196 n->fLastPos = fNextIndex; | 1237 n->fLastPos = fNextIndex; |
1197 fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); | 1238 fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); |
1198 // findSetFor() serves several purposes here: | 1239 // findSetFor() serves several purposes here: |
1199 // - Adopts storage for the UnicodeSet, will be responsible for dele
ting. | 1240 // - Adopts storage for the UnicodeSet, will be responsible for dele
ting. |
1200 // - Mantains collection of all sets in use, needed later for establ
ishing | 1241 // - Mantains collection of all sets in use, needed later for establ
ishing |
1201 // character categories for run time engine. | 1242 // character categories for run time engine. |
1202 // - Eliminates mulitiple instances of the same set. | 1243 // - Eliminates mulitiple instances of the same set. |
1203 // - Creates a new uset node if necessary (if this isn't a duplicate
.) | 1244 // - Creates a new uset node if necessary (if this isn't a duplicate
.) |
1204 findSetFor(n->fText, n, uset); | 1245 findSetFor(n->fText, n, uset); |
1205 } | 1246 } |
1206 | 1247 |
1207 } | 1248 } |
1208 | 1249 |
1209 U_NAMESPACE_END | 1250 U_NAMESPACE_END |
1210 | 1251 |
1211 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ | 1252 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
OLD | NEW |