Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(207)

Side by Side Diff: source/common/rbbiscan.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/common/putilimp.h ('k') | source/common/rbbistbl.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1
2 // 1 //
3 // file: rbbiscan.cpp 2 // file: rbbiscan.cpp
4 // 3 //
5 // Copyright (C) 2002-2014, International Business Machines Corporation and oth ers. 4 // Copyright (C) 2002-2015, International Business Machines Corporation and oth ers.
6 // All Rights Reserved. 5 // All Rights Reserved.
7 // 6 //
8 // This file contains the Rule Based Break Iterator Rule Builder functions for 7 // This file contains the Rule Based Break Iterator Rule Builder functions for
9 // scanning the rules and assembling a parse tree. This is the first phase 8 // scanning the rules and assembling a parse tree. This is the first phase
10 // of compiling the rules. 9 // of compiling the rules.
11 // 10 //
12 // The overall of the rules is managed by class RBBIRuleBuilder, which will 11 // The overall of the rules is managed by class RBBIRuleBuilder, which will
13 // create and use an instance of this class as part of the process. 12 // create and use an instance of this class as part of the process.
14 // 13 //
15 14
(...skipping 188 matching lines...) Expand 10 before | Expand all | Expand 10 after
204 pushNewNode(RBBINode::opStart); 203 pushNewNode(RBBINode::opStart);
205 fRuleNum++; 204 fRuleNum++;
206 break; 205 break;
207 206
208 207
209 case doExprOrOperator: 208 case doExprOrOperator:
210 { 209 {
211 fixOpStack(RBBINode::precOpCat); 210 fixOpStack(RBBINode::precOpCat);
212 RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; 211 RBBINode *operandNode = fNodeStack[fNodeStackPtr--];
213 RBBINode *orNode = pushNewNode(RBBINode::opOr); 212 RBBINode *orNode = pushNewNode(RBBINode::opOr);
213 if (U_FAILURE(*fRB->fStatus)) {
214 break;
215 }
214 orNode->fLeftChild = operandNode; 216 orNode->fLeftChild = operandNode;
215 operandNode->fParent = orNode; 217 operandNode->fParent = orNode;
216 } 218 }
217 break; 219 break;
218 220
219 case doExprCatOperator: 221 case doExprCatOperator:
220 // concatenation operator. 222 // concatenation operator.
221 // For the implicit concatenation of adjacent terms in an expression tha t are 223 // For the implicit concatenation of adjacent terms in an expression tha t are
222 // not separated by any other operator. Action is invoked between the 224 // not separated by any other operator. Action is invoked between the
223 // actions for the two terms. 225 // actions for the two terms.
224 { 226 {
225 fixOpStack(RBBINode::precOpCat); 227 fixOpStack(RBBINode::precOpCat);
226 RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; 228 RBBINode *operandNode = fNodeStack[fNodeStackPtr--];
227 RBBINode *catNode = pushNewNode(RBBINode::opCat); 229 RBBINode *catNode = pushNewNode(RBBINode::opCat);
230 if (U_FAILURE(*fRB->fStatus)) {
231 break;
232 }
228 catNode->fLeftChild = operandNode; 233 catNode->fLeftChild = operandNode;
229 operandNode->fParent = catNode; 234 operandNode->fParent = catNode;
230 } 235 }
231 break; 236 break;
232 237
233 case doLParen: 238 case doLParen:
234 // Open Paren. 239 // Open Paren.
235 // The openParen node is a dummy operation type with a low precedence, 240 // The openParen node is a dummy operation type with a low precedence,
236 // which has the affect of ensuring that any real binary op that 241 // which has the affect of ensuring that any real binary op that
237 // follows within the parens binds more tightly to the operands than 242 // follows within the parens binds more tightly to the operands than
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
313 if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rtree")) {printNodeSt ack("end of rule");} 318 if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rtree")) {printNodeSt ack("end of rule");}
314 #endif 319 #endif
315 U_ASSERT(fNodeStackPtr == 1); 320 U_ASSERT(fNodeStackPtr == 1);
316 321
317 // If this rule includes a look-ahead '/', add a endMark node to the 322 // If this rule includes a look-ahead '/', add a endMark node to the
318 // expression tree. 323 // expression tree.
319 if (fLookAheadRule) { 324 if (fLookAheadRule) {
320 RBBINode *thisRule = fNodeStack[fNodeStackPtr]; 325 RBBINode *thisRule = fNodeStack[fNodeStackPtr];
321 RBBINode *endNode = pushNewNode(RBBINode::endMark); 326 RBBINode *endNode = pushNewNode(RBBINode::endMark);
322 RBBINode *catNode = pushNewNode(RBBINode::opCat); 327 RBBINode *catNode = pushNewNode(RBBINode::opCat);
328 if (U_FAILURE(*fRB->fStatus)) {
329 break;
330 }
323 fNodeStackPtr -= 2; 331 fNodeStackPtr -= 2;
324 catNode->fLeftChild = thisRule; 332 catNode->fLeftChild = thisRule;
325 catNode->fRightChild = endNode; 333 catNode->fRightChild = endNode;
326 fNodeStack[fNodeStackPtr] = catNode; 334 fNodeStack[fNodeStackPtr] = catNode;
327 endNode->fVal = fRuleNum; 335 endNode->fVal = fRuleNum;
328 endNode->fLookAheadEnd = TRUE; 336 endNode->fLookAheadEnd = TRUE;
329 } 337 }
330 338
331 // All rule expressions are ORed together. 339 // All rule expressions are ORed together.
332 // The ';' that terminates an expression really just functions as a '|' with 340 // The ';' that terminates an expression really just functions as a '|' with
333 // a low operator prededence. 341 // a low operator prededence.
334 // 342 //
335 // Each of the four sets of rules are collected separately. 343 // Each of the four sets of rules are collected separately.
336 // (forward, reverse, safe_forward, safe_reverse) 344 // (forward, reverse, safe_forward, safe_reverse)
337 // OR this rule into the appropriate group of them. 345 // OR this rule into the appropriate group of them.
338 // 346 //
339 RBBINode **destRules = (fReverseRule? &fRB->fReverseTree : fRB->fDefault Tree); 347 RBBINode **destRules = (fReverseRule? &fRB->fReverseTree : fRB->fDefault Tree);
340 348
341 if (*destRules != NULL) { 349 if (*destRules != NULL) {
342 // This is not the first rule encounted. 350 // This is not the first rule encounted.
343 // OR previous stuff (from *destRules) 351 // OR previous stuff (from *destRules)
344 // with the current rule expression (on the Node Stack) 352 // with the current rule expression (on the Node Stack)
345 // with the resulting OR expression going to *destRules 353 // with the resulting OR expression going to *destRules
346 // 354 //
347 RBBINode *thisRule = fNodeStack[fNodeStackPtr]; 355 RBBINode *thisRule = fNodeStack[fNodeStackPtr];
348 RBBINode *prevRules = *destRules; 356 RBBINode *prevRules = *destRules;
349 RBBINode *orNode = pushNewNode(RBBINode::opOr); 357 RBBINode *orNode = pushNewNode(RBBINode::opOr);
358 if (U_FAILURE(*fRB->fStatus)) {
359 break;
360 }
350 orNode->fLeftChild = prevRules; 361 orNode->fLeftChild = prevRules;
351 prevRules->fParent = orNode; 362 prevRules->fParent = orNode;
352 orNode->fRightChild = thisRule; 363 orNode->fRightChild = thisRule;
353 thisRule->fParent = orNode; 364 thisRule->fParent = orNode;
354 *destRules = orNode; 365 *destRules = orNode;
355 } 366 }
356 else 367 else
357 { 368 {
358 // This is the first rule encountered (for this direction). 369 // This is the first rule encountered (for this direction).
359 // Just move its parse tree from the stack to *destRules. 370 // Just move its parse tree from the stack to *destRules.
(...skipping 20 matching lines...) Expand all
380 // 391 //
381 // Unary operands + ? * 392 // Unary operands + ? *
382 // These all appear after the operand to which they apply. 393 // These all appear after the operand to which they apply.
383 // When we hit one, the operand (may be a whole sub expression) 394 // When we hit one, the operand (may be a whole sub expression)
384 // will be on the top of the stack. 395 // will be on the top of the stack.
385 // Unary Operator becomes TOS, with the old TOS as its one child. 396 // Unary Operator becomes TOS, with the old TOS as its one child.
386 case doUnaryOpPlus: 397 case doUnaryOpPlus:
387 { 398 {
388 RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; 399 RBBINode *operandNode = fNodeStack[fNodeStackPtr--];
389 RBBINode *plusNode = pushNewNode(RBBINode::opPlus); 400 RBBINode *plusNode = pushNewNode(RBBINode::opPlus);
401 if (U_FAILURE(*fRB->fStatus)) {
402 break;
403 }
390 plusNode->fLeftChild = operandNode; 404 plusNode->fLeftChild = operandNode;
391 operandNode->fParent = plusNode; 405 operandNode->fParent = plusNode;
392 } 406 }
393 break; 407 break;
394 408
395 case doUnaryOpQuestion: 409 case doUnaryOpQuestion:
396 { 410 {
397 RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; 411 RBBINode *operandNode = fNodeStack[fNodeStackPtr--];
398 RBBINode *qNode = pushNewNode(RBBINode::opQuestion); 412 RBBINode *qNode = pushNewNode(RBBINode::opQuestion);
413 if (U_FAILURE(*fRB->fStatus)) {
414 break;
415 }
399 qNode->fLeftChild = operandNode; 416 qNode->fLeftChild = operandNode;
400 operandNode->fParent = qNode; 417 operandNode->fParent = qNode;
401 } 418 }
402 break; 419 break;
403 420
404 case doUnaryOpStar: 421 case doUnaryOpStar:
405 { 422 {
406 RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; 423 RBBINode *operandNode = fNodeStack[fNodeStackPtr--];
407 RBBINode *starNode = pushNewNode(RBBINode::opStar); 424 RBBINode *starNode = pushNewNode(RBBINode::opStar);
425 if (U_FAILURE(*fRB->fStatus)) {
426 break;
427 }
408 starNode->fLeftChild = operandNode; 428 starNode->fLeftChild = operandNode;
409 operandNode->fParent = starNode; 429 operandNode->fParent = starNode;
410 } 430 }
411 break; 431 break;
412 432
413 case doRuleChar: 433 case doRuleChar:
414 // A "Rule Character" is any single character that is a literal part 434 // A "Rule Character" is any single character that is a literal part
415 // of the regular expression. Like a, b and c in the expression "(abc*) | [:L:]" 435 // of the regular expression. Like a, b and c in the expression "(abc*) | [:L:]"
416 // These are pretty uncommon in break rules; the terms are more commonly 436 // These are pretty uncommon in break rules; the terms are more commonly
417 // sets. To keep things uniform, treat these characters like as 437 // sets. To keep things uniform, treat these characters like as
418 // sets that just happen to contain only one character. 438 // sets that just happen to contain only one character.
419 { 439 {
420 n = pushNewNode(RBBINode::setRef); 440 n = pushNewNode(RBBINode::setRef);
441 if (U_FAILURE(*fRB->fStatus)) {
442 break;
443 }
421 findSetFor(UnicodeString(fC.fChar), n); 444 findSetFor(UnicodeString(fC.fChar), n);
422 n->fFirstPos = fScanIndex; 445 n->fFirstPos = fScanIndex;
423 n->fLastPos = fNextIndex; 446 n->fLastPos = fNextIndex;
424 fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); 447 fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
425 break; 448 break;
426 } 449 }
427 450
428 case doDotAny: 451 case doDotAny:
429 // scanned a ".", meaning match any single character. 452 // scanned a ".", meaning match any single character.
430 { 453 {
431 n = pushNewNode(RBBINode::setRef); 454 n = pushNewNode(RBBINode::setRef);
455 if (U_FAILURE(*fRB->fStatus)) {
456 break;
457 }
432 findSetFor(UnicodeString(TRUE, kAny, 3), n); 458 findSetFor(UnicodeString(TRUE, kAny, 3), n);
433 n->fFirstPos = fScanIndex; 459 n->fFirstPos = fScanIndex;
434 n->fLastPos = fNextIndex; 460 n->fLastPos = fNextIndex;
435 fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); 461 fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
436 break; 462 break;
437 } 463 }
438 464
439 case doSlash: 465 case doSlash:
440 // Scanned a '/', which identifies a look-ahead break position in a rule . 466 // Scanned a '/', which identifies a look-ahead break position in a rule .
441 n = pushNewNode(RBBINode::lookAhead); 467 n = pushNewNode(RBBINode::lookAhead);
468 if (U_FAILURE(*fRB->fStatus)) {
469 break;
470 }
442 n->fVal = fRuleNum; 471 n->fVal = fRuleNum;
443 n->fFirstPos = fScanIndex; 472 n->fFirstPos = fScanIndex;
444 n->fLastPos = fNextIndex; 473 n->fLastPos = fNextIndex;
445 fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); 474 fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
446 fLookAheadRule = TRUE; 475 fLookAheadRule = TRUE;
447 break; 476 break;
448 477
449 478
450 case doStartTagValue: 479 case doStartTagValue:
451 // Scanned a '{', the opening delimiter for a tag value within a rule. 480 // Scanned a '{', the opening delimiter for a tag value within a rule.
452 n = pushNewNode(RBBINode::tag); 481 n = pushNewNode(RBBINode::tag);
482 if (U_FAILURE(*fRB->fStatus)) {
483 break;
484 }
453 n->fVal = 0; 485 n->fVal = 0;
454 n->fFirstPos = fScanIndex; 486 n->fFirstPos = fScanIndex;
455 n->fLastPos = fNextIndex; 487 n->fLastPos = fNextIndex;
456 break; 488 break;
457 489
458 case doTagDigit: 490 case doTagDigit:
459 // Just scanned a decimal digit that's part of a tag value 491 // Just scanned a decimal digit that's part of a tag value
460 { 492 {
461 n = fNodeStack[fNodeStackPtr]; 493 n = fNodeStack[fNodeStackPtr];
462 uint32_t v = u_charDigitValue(fC.fChar); 494 uint32_t v = u_charDigitValue(fC.fChar);
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
553 585
554 case doScanUnicodeSet: 586 case doScanUnicodeSet:
555 scanSet(); 587 scanSet();
556 break; 588 break;
557 589
558 default: 590 default:
559 error(U_BRK_INTERNAL_ERROR); 591 error(U_BRK_INTERNAL_ERROR);
560 returnVal = FALSE; 592 returnVal = FALSE;
561 break; 593 break;
562 } 594 }
563 return returnVal; 595 return returnVal && U_SUCCESS(*fRB->fStatus);
564 } 596 }
565 597
566 598
567 599
568 600
569 //------------------------------------------------------------------------------ 601 //------------------------------------------------------------------------------
570 // 602 //
571 // Error Report a rule parse error. 603 // Error Report a rule parse error.
572 // Only report it if no previous error has been recorded. 604 // Only report it if no previous error has been recorded.
573 // 605 //
(...skipping 470 matching lines...) Expand 10 before | Expand all | Expand 10 after
1044 } 1076 }
1045 1077
1046 } 1078 }
1047 1079
1048 // 1080 //
1049 // If there were NO user specified reverse rules, set up the equivalent of " .*;" 1081 // If there were NO user specified reverse rules, set up the equivalent of " .*;"
1050 // 1082 //
1051 if (fRB->fReverseTree == NULL) { 1083 if (fRB->fReverseTree == NULL) {
1052 fRB->fReverseTree = pushNewNode(RBBINode::opStar); 1084 fRB->fReverseTree = pushNewNode(RBBINode::opStar);
1053 RBBINode *operand = pushNewNode(RBBINode::setRef); 1085 RBBINode *operand = pushNewNode(RBBINode::setRef);
1086 if (U_FAILURE(*fRB->fStatus)) {
1087 return;
1088 }
1054 findSetFor(UnicodeString(TRUE, kAny, 3), operand); 1089 findSetFor(UnicodeString(TRUE, kAny, 3), operand);
1055 fRB->fReverseTree->fLeftChild = operand; 1090 fRB->fReverseTree->fLeftChild = operand;
1056 operand->fParent = fRB->fReverseTree; 1091 operand->fParent = fRB->fReverseTree;
1057 fNodeStackPtr -= 2; 1092 fNodeStackPtr -= 2;
1058 } 1093 }
1059 1094
1060 1095
1061 // 1096 //
1062 // Parsing of the input RBBI rules is complete. 1097 // Parsing of the input RBBI rules is complete.
1063 // We now have a parse tree for the rule expressions 1098 // We now have a parse tree for the rule expressions
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
1096 1131
1097 1132
1098 1133
1099 //------------------------------------------------------------------------------ 1134 //------------------------------------------------------------------------------
1100 // 1135 //
1101 // pushNewNode create a new RBBINode of the specified type and push it 1136 // pushNewNode create a new RBBINode of the specified type and push it
1102 // onto the stack of nodes. 1137 // onto the stack of nodes.
1103 // 1138 //
1104 //------------------------------------------------------------------------------ 1139 //------------------------------------------------------------------------------
1105 RBBINode *RBBIRuleScanner::pushNewNode(RBBINode::NodeType t) { 1140 RBBINode *RBBIRuleScanner::pushNewNode(RBBINode::NodeType t) {
1141 if (U_FAILURE(*fRB->fStatus)) {
1142 return NULL;
1143 }
1106 fNodeStackPtr++; 1144 fNodeStackPtr++;
1107 if (fNodeStackPtr >= kStackSize) { 1145 if (fNodeStackPtr >= kStackSize) {
1108 error(U_BRK_INTERNAL_ERROR); 1146 error(U_BRK_INTERNAL_ERROR);
1109 RBBIDebugPuts("RBBIRuleScanner::pushNewNode - stack overflow."); 1147 RBBIDebugPuts("RBBIRuleScanner::pushNewNode - stack overflow.");
1110 *fRB->fStatus = U_BRK_INTERNAL_ERROR; 1148 *fRB->fStatus = U_BRK_INTERNAL_ERROR;
1111 return NULL; 1149 return NULL;
1112 } 1150 }
1113 fNodeStack[fNodeStackPtr] = new RBBINode(t); 1151 fNodeStack[fNodeStackPtr] = new RBBINode(t);
1114 if (fNodeStack[fNodeStackPtr] == NULL) { 1152 if (fNodeStack[fNodeStackPtr] == NULL) {
1115 *fRB->fStatus = U_MEMORY_ALLOCATION_ERROR; 1153 *fRB->fStatus = U_MEMORY_ALLOCATION_ERROR;
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
1185 if (fNextIndex >= i) { 1223 if (fNextIndex >= i) {
1186 break; 1224 break;
1187 } 1225 }
1188 nextCharLL(); 1226 nextCharLL();
1189 } 1227 }
1190 1228
1191 if (U_SUCCESS(*fRB->fStatus)) { 1229 if (U_SUCCESS(*fRB->fStatus)) {
1192 RBBINode *n; 1230 RBBINode *n;
1193 1231
1194 n = pushNewNode(RBBINode::setRef); 1232 n = pushNewNode(RBBINode::setRef);
1233 if (U_FAILURE(*fRB->fStatus)) {
1234 return;
1235 }
1195 n->fFirstPos = startPos; 1236 n->fFirstPos = startPos;
1196 n->fLastPos = fNextIndex; 1237 n->fLastPos = fNextIndex;
1197 fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); 1238 fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
1198 // findSetFor() serves several purposes here: 1239 // findSetFor() serves several purposes here:
1199 // - Adopts storage for the UnicodeSet, will be responsible for dele ting. 1240 // - Adopts storage for the UnicodeSet, will be responsible for dele ting.
1200 // - Mantains collection of all sets in use, needed later for establ ishing 1241 // - Mantains collection of all sets in use, needed later for establ ishing
1201 // character categories for run time engine. 1242 // character categories for run time engine.
1202 // - Eliminates mulitiple instances of the same set. 1243 // - Eliminates mulitiple instances of the same set.
1203 // - Creates a new uset node if necessary (if this isn't a duplicate .) 1244 // - Creates a new uset node if necessary (if this isn't a duplicate .)
1204 findSetFor(n->fText, n, uset); 1245 findSetFor(n->fText, n, uset);
1205 } 1246 }
1206 1247
1207 } 1248 }
1208 1249
1209 U_NAMESPACE_END 1250 U_NAMESPACE_END
1210 1251
1211 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 1252 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
OLDNEW
« no previous file with comments | « source/common/putilimp.h ('k') | source/common/rbbistbl.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698