Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(54)

Side by Side Diff: source/i18n/regexcmp.cpp

Issue 822213003: ICU upgrade to 54.1 step 2 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: readme: better wrapping, declspec patch dropped Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/i18n/regexcmp.h ('k') | source/i18n/regeximp.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // 1 //
2 // file: regexcmp.cpp 2 // file: regexcmp.cpp
3 // 3 //
4 // Copyright (C) 2002-2014 International Business Machines Corporation and othe rs. 4 // Copyright (C) 2002-2014 International Business Machines Corporation and othe rs.
5 // All Rights Reserved. 5 // All Rights Reserved.
6 // 6 //
7 // This file contains the ICU regular expression compiler, which is responsible 7 // This file contains the ICU regular expression compiler, which is responsible
8 // for processing a regular expression pattern into the compiled form that 8 // for processing a regular expression pattern into the compiled form that
9 // is used by the match finding engine. 9 // is used by the match finding engine.
10 // 10 //
(...skipping 283 matching lines...) Expand 10 before | Expand all | Expand 10 after
294 n *= 10; 294 n *= 10;
295 } 295 }
296 296
297 // 297 //
298 // The pattern's fFrameSize so far has accumulated the requirements for 298 // The pattern's fFrameSize so far has accumulated the requirements for
299 // storage for capture parentheses, counters, etc. that are encountered 299 // storage for capture parentheses, counters, etc. that are encountered
300 // in the pattern. Add space for the two variables that are always 300 // in the pattern. Add space for the two variables that are always
301 // present in the saved state: the input string position (int64_t) and 301 // present in the saved state: the input string position (int64_t) and
302 // the position in the compiled pattern. 302 // the position in the compiled pattern.
303 // 303 //
304 fRXPat->fFrameSize+=RESTACKFRAME_HDRCOUNT; 304 allocateStackData(RESTACKFRAME_HDRCOUNT);
305 305
306 // 306 //
307 // Optimization pass 1: NOPs, back-references, and case-folding 307 // Optimization pass 1: NOPs, back-references, and case-folding
308 // 308 //
309 stripNOPs(); 309 stripNOPs();
310 310
311 // 311 //
312 // Get bounds for the minimum and maximum length of a string that this 312 // Get bounds for the minimum and maximum length of a string that this
313 // pattern can match. Used to avoid looking for matches in strings that 313 // pattern can match. Used to avoid looking for matches in strings that
314 // are too short. 314 // are too short.
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
360 360
361 case doPatStart: 361 case doPatStart:
362 // Start of pattern compiles to: 362 // Start of pattern compiles to:
363 //0 SAVE 2 Fall back to position of FAIL 363 //0 SAVE 2 Fall back to position of FAIL
364 //1 jmp 3 364 //1 jmp 3
365 //2 FAIL Stop if we ever reach here. 365 //2 FAIL Stop if we ever reach here.
366 //3 NOP Dummy, so start of pattern looks the same as 366 //3 NOP Dummy, so start of pattern looks the same as
367 // the start of an ( grouping. 367 // the start of an ( grouping.
368 //4 NOP Resreved, will be replaced by a save if there are 368 //4 NOP Resreved, will be replaced by a save if there are
369 // OR | operators at the top level 369 // OR | operators at the top level
370 fRXPat->fCompiledPat->addElement(URX_BUILD(URX_STATE_SAVE, 2), *fStatus) ; 370 appendOp(URX_STATE_SAVE, 2);
371 fRXPat->fCompiledPat->addElement(URX_BUILD(URX_JMP, 3), *fStatus); 371 appendOp(URX_JMP, 3);
372 fRXPat->fCompiledPat->addElement(URX_BUILD(URX_FAIL, 0), *fStatus); 372 appendOp(URX_FAIL, 0);
373 373
374 // Standard open nonCapture paren action emits the two NOPs and 374 // Standard open nonCapture paren action emits the two NOPs and
375 // sets up the paren stack frame. 375 // sets up the paren stack frame.
376 doParseActions(doOpenNonCaptureParen); 376 doParseActions(doOpenNonCaptureParen);
377 break; 377 break;
378 378
379 case doPatFinish: 379 case doPatFinish:
380 // We've scanned to the end of the pattern 380 // We've scanned to the end of the pattern
381 // The end of pattern compiles to: 381 // The end of pattern compiles to:
382 // URX_END 382 // URX_END
383 // which will stop the runtime match engine. 383 // which will stop the runtime match engine.
384 // Encountering end of pattern also behaves like a close paren, 384 // Encountering end of pattern also behaves like a close paren,
385 // and forces fixups of the State Save at the beginning of the compile d pattern 385 // and forces fixups of the State Save at the beginning of the compile d pattern
386 // and of any OR operations at the top level. 386 // and of any OR operations at the top level.
387 // 387 //
388 handleCloseParen(); 388 handleCloseParen();
389 if (fParenStack.size() > 0) { 389 if (fParenStack.size() > 0) {
390 // Missing close paren in pattern. 390 // Missing close paren in pattern.
391 error(U_REGEX_MISMATCHED_PAREN); 391 error(U_REGEX_MISMATCHED_PAREN);
392 } 392 }
393 393
394 // add the END operation to the compiled pattern. 394 // add the END operation to the compiled pattern.
395 fRXPat->fCompiledPat->addElement(URX_BUILD(URX_END, 0), *fStatus); 395 appendOp(URX_END, 0);
396 396
397 // Terminate the pattern compilation state machine. 397 // Terminate the pattern compilation state machine.
398 returnVal = FALSE; 398 returnVal = FALSE;
399 break; 399 break;
400 400
401 401
402 402
403 case doOrOperator: 403 case doOrOperator:
404 // Scanning a '|', as in (A|B) 404 // Scanning a '|', as in (A|B)
405 { 405 {
406 // Generate code for any pending literals preceding the '|' 406 // Generate code for any pending literals preceding the '|'
407 fixLiterals(FALSE); 407 fixLiterals(FALSE);
408 408
409 // Insert a SAVE operation at the start of the pattern section prece ding 409 // Insert a SAVE operation at the start of the pattern section prece ding
410 // this OR at this level. This SAVE will branch the match forward 410 // this OR at this level. This SAVE will branch the match forward
411 // to the right hand side of the OR in the event that the left han d 411 // to the right hand side of the OR in the event that the left han d
412 // side fails to match and backtracks. Locate the position for th e 412 // side fails to match and backtracks. Locate the position for th e
413 // save from the location on the top of the parentheses stack. 413 // save from the location on the top of the parentheses stack.
414 int32_t savePosition = fParenStack.popi(); 414 int32_t savePosition = fParenStack.popi();
415 int32_t op = (int32_t)fRXPat->fCompiledPat->elementAti(savePosition) ; 415 int32_t op = (int32_t)fRXPat->fCompiledPat->elementAti(savePosition) ;
416 U_ASSERT(URX_TYPE(op) == URX_NOP); // original contents of reserved location 416 U_ASSERT(URX_TYPE(op) == URX_NOP); // original contents of reserved location
417 op = URX_BUILD(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+1); 417 op = buildOp(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+1);
418 fRXPat->fCompiledPat->setElementAt(op, savePosition); 418 fRXPat->fCompiledPat->setElementAt(op, savePosition);
419 419
420 // Append an JMP operation into the compiled pattern. The operand f or 420 // Append an JMP operation into the compiled pattern. The operand f or
421 // the JMP will eventually be the location following the ')' for th e 421 // the JMP will eventually be the location following the ')' for th e
422 // group. This will be patched in later, when the ')' is encounter ed. 422 // group. This will be patched in later, when the ')' is encounter ed.
423 op = URX_BUILD(URX_JMP, 0); 423 appendOp(URX_JMP, 0);
424 fRXPat->fCompiledPat->addElement(op, *fStatus);
425 424
426 // Push the position of the newly added JMP op onto the parentheses stack. 425 // Push the position of the newly added JMP op onto the parentheses stack.
427 // This registers if for fixup when this block's close paren is enco untered. 426 // This registers if for fixup when this block's close paren is enco untered.
428 fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); 427 fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus);
429 428
430 // Append a NOP to the compiled pattern. This is the slot reserved 429 // Append a NOP to the compiled pattern. This is the slot reserved
431 // for a SAVE in the event that there is yet another '|' following 430 // for a SAVE in the event that there is yet another '|' following
432 // this one. 431 // this one.
433 fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus); 432 appendOp(URX_NOP, 0);
434 fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); 433 fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus);
435 } 434 }
436 break; 435 break;
437 436
438 437
439 case doOpenCaptureParen: 438 case doOpenCaptureParen:
440 // Open Paren. 439 // Open Paren.
441 // Compile to a 440 // Compile to a
442 // - NOP, which later may be replaced by a save-state if the 441 // - NOP, which later may be replaced by a save-state if the
443 // parenthesized group gets a * quantifier, followed by 442 // parenthesized group gets a * quantifier, followed by
444 // - START_CAPTURE n where n is stack frame offset to the captu re group variables. 443 // - START_CAPTURE n where n is stack frame offset to the captu re group variables.
445 // - NOP, which may later be replaced by a save-state if there 444 // - NOP, which may later be replaced by a save-state if there
446 // is an '|' alternation within the parens. 445 // is an '|' alternation within the parens.
447 // 446 //
448 // Each capture group gets three slots in the save stack frame: 447 // Each capture group gets three slots in the save stack frame:
449 // 0: Capture Group start position (in input string being matche d.) 448 // 0: Capture Group start position (in input string being matche d.)
450 // 1: Capture Group end position. 449 // 1: Capture Group end position.
451 // 2: Start of Match-in-progress. 450 // 2: Start of Match-in-progress.
452 // The first two locations are for a completed capture group, and are 451 // The first two locations are for a completed capture group, and are
453 // referred to by back references and the like. 452 // referred to by back references and the like.
454 // The third location stores the capture start position when an START _CAPTURE is 453 // The third location stores the capture start position when an START _CAPTURE is
455 // encountered. This will be promoted to a completed capture when (and if) the corresponding 454 // encountered. This will be promoted to a completed capture when (and if) the corresponding
456 // END_CAPTURE is encountered. 455 // END_CAPTURE is encountered.
457 { 456 {
458 fixLiterals(); 457 fixLiterals();
459 fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus); 458 appendOp(URX_NOP, 0);
460 int32_t varsLoc = fRXPat->fFrameSize; // Reserve three slots in match stack frame. 459 int32_t varsLoc = allocateStackData(3); // Reserve three slots i n match stack frame.
461 fRXPat->fFrameSize += 3; 460 appendOp(URX_START_CAPTURE, varsLoc);
462 int32_t cop = URX_BUILD(URX_START_CAPTURE, varsLoc); 461 appendOp(URX_NOP, 0);
463 fRXPat->fCompiledPat->addElement(cop, *fStatus);
464 fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
465 462
466 // On the Parentheses stack, start a new frame and add the postions 463 // On the Parentheses stack, start a new frame and add the postions
467 // of the two NOPs. Depending on what follows in the pattern, the 464 // of the two NOPs. Depending on what follows in the pattern, the
468 // NOPs may be changed to SAVE_STATE or JMP ops, with a target 465 // NOPs may be changed to SAVE_STATE or JMP ops, with a target
469 // address of the end of the parenthesized group. 466 // address of the end of the parenthesized group.
470 fParenStack.push(fModeFlags, *fStatus); // Mat ch mode state 467 fParenStack.push(fModeFlags, *fStatus); // Mat ch mode state
471 fParenStack.push(capturing, *fStatus); // Fra me type. 468 fParenStack.push(capturing, *fStatus); // Fra me type.
472 fParenStack.push(fRXPat->fCompiledPat->size()-3, *fStatus); // The first NOP location 469 fParenStack.push(fRXPat->fCompiledPat->size()-3, *fStatus); // The first NOP location
473 fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP loc 470 fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP loc
474 471
475 // Save the mapping from group number to stack frame variable positi on. 472 // Save the mapping from group number to stack frame variable positi on.
476 fRXPat->fGroupMap->addElement(varsLoc, *fStatus); 473 fRXPat->fGroupMap->addElement(varsLoc, *fStatus);
477 } 474 }
478 break; 475 break;
479 476
480 case doOpenNonCaptureParen: 477 case doOpenNonCaptureParen:
481 // Open non-caputuring (grouping only) Paren. 478 // Open non-caputuring (grouping only) Paren.
482 // Compile to a 479 // Compile to a
483 // - NOP, which later may be replaced by a save-state if the 480 // - NOP, which later may be replaced by a save-state if the
484 // parenthesized group gets a * quantifier, followed by 481 // parenthesized group gets a * quantifier, followed by
485 // - NOP, which may later be replaced by a save-state if there 482 // - NOP, which may later be replaced by a save-state if there
486 // is an '|' alternation within the parens. 483 // is an '|' alternation within the parens.
487 { 484 {
488 fixLiterals(); 485 fixLiterals();
489 fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus); 486 appendOp(URX_NOP, 0);
490 fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus); 487 appendOp(URX_NOP, 0);
491 488
492 // On the Parentheses stack, start a new frame and add the postions 489 // On the Parentheses stack, start a new frame and add the postions
493 // of the two NOPs. 490 // of the two NOPs.
494 fParenStack.push(fModeFlags, *fStatus); // Mat ch mode state 491 fParenStack.push(fModeFlags, *fStatus); // Mat ch mode state
495 fParenStack.push(plain, *fStatus); // Beg in a new frame. 492 fParenStack.push(plain, *fStatus); // Beg in a new frame.
496 fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus); // The first NOP location 493 fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus); // The first NOP location
497 fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP loc 494 fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP loc
498 } 495 }
499 break; 496 break;
500 497
501 498
502 case doOpenAtomicParen: 499 case doOpenAtomicParen:
503 // Open Atomic Paren. (?> 500 // Open Atomic Paren. (?>
504 // Compile to a 501 // Compile to a
505 // - NOP, which later may be replaced if the parenthesized group 502 // - NOP, which later may be replaced if the parenthesized group
506 // has a quantifier, followed by 503 // has a quantifier, followed by
507 // - STO_SP save state stack position, so it can be restored at th e ")" 504 // - STO_SP save state stack position, so it can be restored at th e ")"
508 // - NOP, which may later be replaced by a save-state if there 505 // - NOP, which may later be replaced by a save-state if there
509 // is an '|' alternation within the parens. 506 // is an '|' alternation within the parens.
510 { 507 {
511 fixLiterals(); 508 fixLiterals();
512 fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus); 509 appendOp(URX_NOP, 0);
513 int32_t varLoc = fRXPat->fDataSize; // Reserve a data locatio n for saving the 510 int32_t varLoc = allocateData(1); // Reserve a data location for saving the state stack ptr.
514 fRXPat->fDataSize += 1; // state stack ptr. 511 appendOp(URX_STO_SP, varLoc);
515 int32_t stoOp = URX_BUILD(URX_STO_SP, varLoc); 512 appendOp(URX_NOP, 0);
516 fRXPat->fCompiledPat->addElement(stoOp, *fStatus);
517 fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus);
518 513
519 // On the Parentheses stack, start a new frame and add the postions 514 // On the Parentheses stack, start a new frame and add the postions
520 // of the two NOPs. Depending on what follows in the pattern, the 515 // of the two NOPs. Depending on what follows in the pattern, the
521 // NOPs may be changed to SAVE_STATE or JMP ops, with a target 516 // NOPs may be changed to SAVE_STATE or JMP ops, with a target
522 // address of the end of the parenthesized group. 517 // address of the end of the parenthesized group.
523 fParenStack.push(fModeFlags, *fStatus); // Mat ch mode state 518 fParenStack.push(fModeFlags, *fStatus); // Mat ch mode state
524 fParenStack.push(atomic, *fStatus); // Fra me type. 519 fParenStack.push(atomic, *fStatus); // Fra me type.
525 fParenStack.push(fRXPat->fCompiledPat->size()-3, *fStatus); // The first NOP 520 fParenStack.push(fRXPat->fCompiledPat->size()-3, *fStatus); // The first NOP
526 fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP 521 fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP
527 } 522 }
(...skipping 22 matching lines...) Expand all
550 // 6. NOP reserved for use by quantifiers on the block . 545 // 6. NOP reserved for use by quantifiers on the block .
551 // Look-ahead can't have quantifiers, but paren stack 546 // Look-ahead can't have quantifiers, but paren stack
552 // compile time conventions require the slot anyhow. 547 // compile time conventions require the slot anyhow.
553 // 7. NOP may be replaced if there is are '|' ops in t he block. 548 // 7. NOP may be replaced if there is are '|' ops in t he block.
554 // 8. code for parenthesized stuff. 549 // 8. code for parenthesized stuff.
555 // 9. LA_END 550 // 9. LA_END
556 // 551 //
557 // Two data slots are reserved, for saving the stack ptr and the input position. 552 // Two data slots are reserved, for saving the stack ptr and the input position.
558 { 553 {
559 fixLiterals(); 554 fixLiterals();
560 int32_t dataLoc = fRXPat->fDataSize; 555 int32_t dataLoc = allocateData(2);
561 fRXPat->fDataSize += 2; 556 appendOp(URX_LA_START, dataLoc);
562 int32_t op = URX_BUILD(URX_LA_START, dataLoc); 557 appendOp(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+ 2);
563 fRXPat->fCompiledPat->addElement(op, *fStatus); 558 appendOp(URX_JMP, fRXPat->fCompiledPat->size()+ 3);
564 559 appendOp(URX_LA_END, dataLoc);
565 op = URX_BUILD(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+ 2); 560 appendOp(URX_BACKTRACK, 0);
566 fRXPat->fCompiledPat->addElement(op, *fStatus); 561 appendOp(URX_NOP, 0);
567 562 appendOp(URX_NOP, 0);
568 op = URX_BUILD(URX_JMP, fRXPat->fCompiledPat->size()+ 3);
569 fRXPat->fCompiledPat->addElement(op, *fStatus);
570
571 op = URX_BUILD(URX_LA_END, dataLoc);
572 fRXPat->fCompiledPat->addElement(op, *fStatus);
573
574 op = URX_BUILD(URX_BACKTRACK, 0);
575 fRXPat->fCompiledPat->addElement(op, *fStatus);
576
577 op = URX_BUILD(URX_NOP, 0);
578 fRXPat->fCompiledPat->addElement(op, *fStatus);
579 fRXPat->fCompiledPat->addElement(op, *fStatus);
580 563
581 // On the Parentheses stack, start a new frame and add the postions 564 // On the Parentheses stack, start a new frame and add the postions
582 // of the NOPs. 565 // of the NOPs.
583 fParenStack.push(fModeFlags, *fStatus); // Mat ch mode state 566 fParenStack.push(fModeFlags, *fStatus); // Mat ch mode state
584 fParenStack.push(lookAhead, *fStatus); // Fra me type. 567 fParenStack.push(lookAhead, *fStatus); // Fra me type.
585 fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus); // The first NOP location 568 fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus); // The first NOP location
586 fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP location 569 fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP location
587 } 570 }
588 break; 571 break;
589 572
590 case doOpenLookAheadNeg: 573 case doOpenLookAheadNeg:
591 // Negated Lookahead. (?! stuff ) 574 // Negated Lookahead. (?! stuff )
592 // Compiles to 575 // Compiles to
593 // 1. START_LA dataloc 576 // 1. START_LA dataloc
594 // 2. SAVE_STATE 7 // Fail within look-ahead block restor es to this state, 577 // 2. SAVE_STATE 7 // Fail within look-ahead block restor es to this state,
595 // // which continues with the match. 578 // // which continues with the match.
596 // 3. NOP // Std. Open Paren sequence, for possi ble '|' 579 // 3. NOP // Std. Open Paren sequence, for possi ble '|'
597 // 4. code for parenthesized stuff. 580 // 4. code for parenthesized stuff.
598 // 5. END_LA // Cut back stack, remove saved state from step 2. 581 // 5. END_LA // Cut back stack, remove saved state from step 2.
599 // 6. BACKTRACK // code in block succeeded, so neg. lo okahead fails. 582 // 6. BACKTRACK // code in block succeeded, so neg. lo okahead fails.
600 // 7. END_LA // Restore match region, in case look- ahead was using 583 // 7. END_LA // Restore match region, in case look- ahead was using
601 // an alternate (transparent) reg ion. 584 // an alternate (transparent) reg ion.
602 { 585 {
603 fixLiterals(); 586 fixLiterals();
604 int32_t dataLoc = fRXPat->fDataSize; 587 int32_t dataLoc = allocateData(2);
605 fRXPat->fDataSize += 2; 588 appendOp(URX_LA_START, dataLoc);
606 int32_t op = URX_BUILD(URX_LA_START, dataLoc); 589 appendOp(URX_STATE_SAVE, 0); // dest address will be patched late r.
607 fRXPat->fCompiledPat->addElement(op, *fStatus); 590 appendOp(URX_NOP, 0);
608
609 op = URX_BUILD(URX_STATE_SAVE, 0); // dest address will be patche d later.
610 fRXPat->fCompiledPat->addElement(op, *fStatus);
611
612 op = URX_BUILD(URX_NOP, 0);
613 fRXPat->fCompiledPat->addElement(op, *fStatus);
614 591
615 // On the Parentheses stack, start a new frame and add the postions 592 // On the Parentheses stack, start a new frame and add the postions
616 // of the StateSave and NOP. 593 // of the StateSave and NOP.
617 fParenStack.push(fModeFlags, *fStatus); // Mat ch mode state 594 fParenStack.push(fModeFlags, *fStatus); // Mat ch mode state
618 fParenStack.push(negLookAhead, *fStatus); // Fram e type 595 fParenStack.push(negLookAhead, *fStatus); // Fram e type
619 fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus); // The STATE_SAVE location 596 fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus); // The STATE_SAVE location
620 fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP location 597 fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP location
621 598
622 // Instructions #5 - #7 will be added when the ')' is encountered. 599 // Instructions #5 - #7 will be added when the ')' is encountered.
623 } 600 }
(...skipping 17 matching lines...) Expand all
641 // Allocate a block of matcher data, to contain (when runni ng a match) 618 // Allocate a block of matcher data, to contain (when runni ng a match)
642 // 0: Stack ptr on entry 619 // 0: Stack ptr on entry
643 // 1: Input Index on entry 620 // 1: Input Index on entry
644 // 2: Start index of match current match attempt. 621 // 2: Start index of match current match attempt.
645 // 3: Original Input String len. 622 // 3: Original Input String len.
646 623
647 // Generate match code for any pending literals. 624 // Generate match code for any pending literals.
648 fixLiterals(); 625 fixLiterals();
649 626
650 // Allocate data space 627 // Allocate data space
651 int32_t dataLoc = fRXPat->fDataSize; 628 int32_t dataLoc = allocateData(4);
652 fRXPat->fDataSize += 4;
653 629
654 // Emit URX_LB_START 630 // Emit URX_LB_START
655 int32_t op = URX_BUILD(URX_LB_START, dataLoc); 631 appendOp(URX_LB_START, dataLoc);
656 fRXPat->fCompiledPat->addElement(op, *fStatus);
657 632
658 // Emit URX_LB_CONT 633 // Emit URX_LB_CONT
659 op = URX_BUILD(URX_LB_CONT, dataLoc); 634 appendOp(URX_LB_CONT, dataLoc);
660 fRXPat->fCompiledPat->addElement(op, *fStatus); 635 appendOp(URX_RESERVED_OP, 0); // MinMatchLength. To be filled la ter.
661 fRXPat->fCompiledPat->addElement(0, *fStatus); // MinMatchLength . To be filled later. 636 appendOp(URX_RESERVED_OP, 0); // MaxMatchLength. To be filled la ter.
662 fRXPat->fCompiledPat->addElement(0, *fStatus); // MaxMatchLength . To be filled later.
663 637
664 // Emit the NOP 638 // Emit the NOPs
665 op = URX_BUILD(URX_NOP, 0); 639 appendOp(URX_NOP, 0);
666 fRXPat->fCompiledPat->addElement(op, *fStatus); 640 appendOp(URX_NOP, 0);
667 fRXPat->fCompiledPat->addElement(op, *fStatus);
668 641
669 // On the Parentheses stack, start a new frame and add the postions 642 // On the Parentheses stack, start a new frame and add the postions
670 // of the URX_LB_CONT and the NOP. 643 // of the URX_LB_CONT and the NOP.
671 fParenStack.push(fModeFlags, *fStatus); // Mat ch mode state 644 fParenStack.push(fModeFlags, *fStatus); // Mat ch mode state
672 fParenStack.push(lookBehind, *fStatus); // Fra me type 645 fParenStack.push(lookBehind, *fStatus); // Fra me type
673 fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus); // The first NOP location 646 fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus); // The first NOP location
674 fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The 2nd NOP location 647 fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The 2nd NOP location
675 648
676 // The final two instructions will be added when the ')' is encounte red. 649 // The final two instructions will be added when the ')' is encounte red.
677 } 650 }
(...skipping 19 matching lines...) Expand all
697 // Allocate a block of matcher data, to contain (when runni ng a match) 670 // Allocate a block of matcher data, to contain (when runni ng a match)
698 // 0: Stack ptr on entry 671 // 0: Stack ptr on entry
699 // 1: Input Index on entry 672 // 1: Input Index on entry
700 // 2: Start index of match current match attempt. 673 // 2: Start index of match current match attempt.
701 // 3: Original Input String len. 674 // 3: Original Input String len.
702 675
703 // Generate match code for any pending literals. 676 // Generate match code for any pending literals.
704 fixLiterals(); 677 fixLiterals();
705 678
706 // Allocate data space 679 // Allocate data space
707 int32_t dataLoc = fRXPat->fDataSize; 680 int32_t dataLoc = allocateData(4);
708 fRXPat->fDataSize += 4;
709 681
710 // Emit URX_LB_START 682 // Emit URX_LB_START
711 int32_t op = URX_BUILD(URX_LB_START, dataLoc); 683 appendOp(URX_LB_START, dataLoc);
712 fRXPat->fCompiledPat->addElement(op, *fStatus);
713 684
714 // Emit URX_LBN_CONT 685 // Emit URX_LBN_CONT
715 op = URX_BUILD(URX_LBN_CONT, dataLoc); 686 appendOp(URX_LBN_CONT, dataLoc);
716 fRXPat->fCompiledPat->addElement(op, *fStatus); 687 appendOp(URX_RESERVED_OP, 0); // MinMatchLength. To be filled la ter.
717 fRXPat->fCompiledPat->addElement(0, *fStatus); // MinMatchLength . To be filled later. 688 appendOp(URX_RESERVED_OP, 0); // MaxMatchLength. To be filled la ter.
718 fRXPat->fCompiledPat->addElement(0, *fStatus); // MaxMatchLength . To be filled later. 689 appendOp(URX_RESERVED_OP, 0); // Continue Loc. To be filled la ter.
719 fRXPat->fCompiledPat->addElement(0, *fStatus); // Continue Loc. To be filled later.
720 690
721 // Emit the NOP 691 // Emit the NOPs
722 op = URX_BUILD(URX_NOP, 0); 692 appendOp(URX_NOP, 0);
723 fRXPat->fCompiledPat->addElement(op, *fStatus); 693 appendOp(URX_NOP, 0);
724 fRXPat->fCompiledPat->addElement(op, *fStatus);
725 694
726 // On the Parentheses stack, start a new frame and add the postions 695 // On the Parentheses stack, start a new frame and add the postions
727 // of the URX_LB_CONT and the NOP. 696 // of the URX_LB_CONT and the NOP.
728 fParenStack.push(fModeFlags, *fStatus); // Mat ch mode state 697 fParenStack.push(fModeFlags, *fStatus); // Mat ch mode state
729 fParenStack.push(lookBehindN, *fStatus); // Fra me type 698 fParenStack.push(lookBehindN, *fStatus); // Fra me type
730 fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus); // The first NOP location 699 fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus); // The first NOP location
731 fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The 2nd NOP location 700 fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The 2nd NOP location
732 701
733 // The final two instructions will be added when the ')' is encounte red. 702 // The final two instructions will be added when the ')' is encounte red.
734 } 703 }
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
784 { 753 {
785 int32_t topLoc = blockTopLoc(FALSE); // location of item #1 754 int32_t topLoc = blockTopLoc(FALSE); // location of item #1
786 int32_t frameLoc; 755 int32_t frameLoc;
787 756
788 // Check for simple constructs, which may get special optimized code . 757 // Check for simple constructs, which may get special optimized code .
789 if (topLoc == fRXPat->fCompiledPat->size() - 1) { 758 if (topLoc == fRXPat->fCompiledPat->size() - 1) {
790 int32_t repeatedOp = (int32_t)fRXPat->fCompiledPat->elementAti(t opLoc); 759 int32_t repeatedOp = (int32_t)fRXPat->fCompiledPat->elementAti(t opLoc);
791 760
792 if (URX_TYPE(repeatedOp) == URX_SETREF) { 761 if (URX_TYPE(repeatedOp) == URX_SETREF) {
793 // Emit optimized code for [char set]+ 762 // Emit optimized code for [char set]+
794 int32_t loopOpI = URX_BUILD(URX_LOOP_SR_I, URX_VAL(repeatedO p)); 763 appendOp(URX_LOOP_SR_I, URX_VAL(repeatedOp));
795 fRXPat->fCompiledPat->addElement(loopOpI, *fStatus); 764 frameLoc = allocateStackData(1);
796 frameLoc = fRXPat->fFrameSize; 765 appendOp(URX_LOOP_C, frameLoc);
797 fRXPat->fFrameSize++;
798 int32_t loopOpC = URX_BUILD(URX_LOOP_C, frameLoc);
799 fRXPat->fCompiledPat->addElement(loopOpC, *fStatus);
800 break; 766 break;
801 } 767 }
802 768
803 if (URX_TYPE(repeatedOp) == URX_DOTANY || 769 if (URX_TYPE(repeatedOp) == URX_DOTANY ||
804 URX_TYPE(repeatedOp) == URX_DOTANY_ALL || 770 URX_TYPE(repeatedOp) == URX_DOTANY_ALL ||
805 URX_TYPE(repeatedOp) == URX_DOTANY_UNIX) { 771 URX_TYPE(repeatedOp) == URX_DOTANY_UNIX) {
806 // Emit Optimized code for .+ operations. 772 // Emit Optimized code for .+ operations.
807 int32_t loopOpI = URX_BUILD(URX_LOOP_DOT_I, 0); 773 int32_t loopOpI = buildOp(URX_LOOP_DOT_I, 0);
808 if (URX_TYPE(repeatedOp) == URX_DOTANY_ALL) { 774 if (URX_TYPE(repeatedOp) == URX_DOTANY_ALL) {
809 // URX_LOOP_DOT_I operand is a flag indicating ". matche s any" mode. 775 // URX_LOOP_DOT_I operand is a flag indicating ". matche s any" mode.
810 loopOpI |= 1; 776 loopOpI |= 1;
811 } 777 }
812 if (fModeFlags & UREGEX_UNIX_LINES) { 778 if (fModeFlags & UREGEX_UNIX_LINES) {
813 loopOpI |= 2; 779 loopOpI |= 2;
814 } 780 }
815 fRXPat->fCompiledPat->addElement(loopOpI, *fStatus); 781 appendOp(loopOpI);
816 frameLoc = fRXPat->fFrameSize; 782 frameLoc = allocateStackData(1);
817 fRXPat->fFrameSize++; 783 appendOp(URX_LOOP_C, frameLoc);
818 int32_t loopOpC = URX_BUILD(URX_LOOP_C, frameLoc);
819 fRXPat->fCompiledPat->addElement(loopOpC, *fStatus);
820 break; 784 break;
821 } 785 }
822 786
823 } 787 }
824 788
825 // General case. 789 // General case.
826 790
827 // Check for minimum match length of zero, which requires 791 // Check for minimum match length of zero, which requires
828 // extra loop-breaking code. 792 // extra loop-breaking code.
829 if (minMatchLength(topLoc, fRXPat->fCompiledPat->size()-1) == 0) { 793 if (minMatchLength(topLoc, fRXPat->fCompiledPat->size()-1) == 0) {
830 // Zero length match is possible. 794 // Zero length match is possible.
831 // Emit the code sequence that can handle it. 795 // Emit the code sequence that can handle it.
832 insertOp(topLoc); 796 insertOp(topLoc);
833 frameLoc = fRXPat->fFrameSize; 797 frameLoc = allocateStackData(1);
834 fRXPat->fFrameSize++;
835 798
836 int32_t op = URX_BUILD(URX_STO_INP_LOC, frameLoc); 799 int32_t op = buildOp(URX_STO_INP_LOC, frameLoc);
837 fRXPat->fCompiledPat->setElementAt(op, topLoc); 800 fRXPat->fCompiledPat->setElementAt(op, topLoc);
838 801
839 op = URX_BUILD(URX_JMP_SAV_X, topLoc+1); 802 appendOp(URX_JMP_SAV_X, topLoc+1);
840 fRXPat->fCompiledPat->addElement(op, *fStatus);
841 } else { 803 } else {
842 // Simpler code when the repeated body must match something non- empty 804 // Simpler code when the repeated body must match something non- empty
843 int32_t jmpOp = URX_BUILD(URX_JMP_SAV, topLoc); 805 appendOp(URX_JMP_SAV, topLoc);
844 fRXPat->fCompiledPat->addElement(jmpOp, *fStatus);
845 } 806 }
846 } 807 }
847 break; 808 break;
848 809
849 case doNGPlus: 810 case doNGPlus:
850 // Non-greedy '+?' compiles to 811 // Non-greedy '+?' compiles to
851 // 1. stuff to be repeated (already built) 812 // 1. stuff to be repeated (already built)
852 // 2. state-save 1 813 // 2. state-save 1
853 // 3. ... 814 // 3. ...
854 { 815 {
855 int32_t topLoc = blockTopLoc(FALSE); 816 int32_t topLoc = blockTopLoc(FALSE);
856 int32_t saveStateOp = URX_BUILD(URX_STATE_SAVE, topLoc); 817 appendOp(URX_STATE_SAVE, topLoc);
857 fRXPat->fCompiledPat->addElement(saveStateOp, *fStatus);
858 } 818 }
859 break; 819 break;
860 820
861 821
862 case doOpt: 822 case doOpt:
863 // Normal (greedy) ? quantifier. 823 // Normal (greedy) ? quantifier.
864 // Compiles to 824 // Compiles to
865 // 1. state save 3 825 // 1. state save 3
866 // 2. body of optional block 826 // 2. body of optional block
867 // 3. ... 827 // 3. ...
868 // Insert the state save into the compiled pattern, and we're done. 828 // Insert the state save into the compiled pattern, and we're done.
869 { 829 {
870 int32_t saveStateLoc = blockTopLoc(TRUE); 830 int32_t saveStateLoc = blockTopLoc(TRUE);
871 int32_t saveStateOp = URX_BUILD(URX_STATE_SAVE, fRXPat->fCompiled Pat->size()); 831 int32_t saveStateOp = buildOp(URX_STATE_SAVE, fRXPat->fCompiledPa t->size());
872 fRXPat->fCompiledPat->setElementAt(saveStateOp, saveStateLoc); 832 fRXPat->fCompiledPat->setElementAt(saveStateOp, saveStateLoc);
873 } 833 }
874 break; 834 break;
875 835
876 case doNGOpt: 836 case doNGOpt:
877 // Non-greedy ?? quantifier 837 // Non-greedy ?? quantifier
878 // compiles to 838 // compiles to
879 // 1. jmp 4 839 // 1. jmp 4
880 // 2. body of optional block 840 // 2. body of optional block
881 // 3 jmp 5 841 // 3 jmp 5
882 // 4. state save 2 842 // 4. state save 2
883 // 5 ... 843 // 5 ...
884 // This code is less than ideal, with two jmps instead of one, because we can only 844 // This code is less than ideal, with two jmps instead of one, because we can only
885 // insert one instruction at the top of the block being iterated. 845 // insert one instruction at the top of the block being iterated.
886 { 846 {
887 int32_t jmp1_loc = blockTopLoc(TRUE); 847 int32_t jmp1_loc = blockTopLoc(TRUE);
888 int32_t jmp2_loc = fRXPat->fCompiledPat->size(); 848 int32_t jmp2_loc = fRXPat->fCompiledPat->size();
889 849
890 int32_t jmp1_op = URX_BUILD(URX_JMP, jmp2_loc+1); 850 int32_t jmp1_op = buildOp(URX_JMP, jmp2_loc+1);
891 fRXPat->fCompiledPat->setElementAt(jmp1_op, jmp1_loc); 851 fRXPat->fCompiledPat->setElementAt(jmp1_op, jmp1_loc);
892 852
893 int32_t jmp2_op = URX_BUILD(URX_JMP, jmp2_loc+2); 853 appendOp(URX_JMP, jmp2_loc+2);
894 fRXPat->fCompiledPat->addElement(jmp2_op, *fStatus);
895 854
896 int32_t save_op = URX_BUILD(URX_STATE_SAVE, jmp1_loc+1); 855 appendOp(URX_STATE_SAVE, jmp1_loc+1);
897 fRXPat->fCompiledPat->addElement(save_op, *fStatus);
898 } 856 }
899 break; 857 break;
900 858
901 859
902 case doStar: 860 case doStar:
903 // Normal (greedy) * quantifier. 861 // Normal (greedy) * quantifier.
904 // Compiles to 862 // Compiles to
905 // 1. STATE_SAVE 4 863 // 1. STATE_SAVE 4
906 // 2. body of stuff being iterated over 864 // 2. body of stuff being iterated over
907 // 3. JMP_SAV 2 865 // 3. JMP_SAV 2
(...skipping 19 matching lines...) Expand all
927 int32_t topLoc = blockTopLoc(FALSE); 885 int32_t topLoc = blockTopLoc(FALSE);
928 int32_t dataLoc = -1; 886 int32_t dataLoc = -1;
929 887
930 // Check for simple *, where the construct being repeated 888 // Check for simple *, where the construct being repeated
931 // compiled to single opcode, and might be optimizable. 889 // compiled to single opcode, and might be optimizable.
932 if (topLoc == fRXPat->fCompiledPat->size() - 1) { 890 if (topLoc == fRXPat->fCompiledPat->size() - 1) {
933 int32_t repeatedOp = (int32_t)fRXPat->fCompiledPat->elementAti(t opLoc); 891 int32_t repeatedOp = (int32_t)fRXPat->fCompiledPat->elementAti(t opLoc);
934 892
935 if (URX_TYPE(repeatedOp) == URX_SETREF) { 893 if (URX_TYPE(repeatedOp) == URX_SETREF) {
936 // Emit optimized code for a [char set]* 894 // Emit optimized code for a [char set]*
937 int32_t loopOpI = URX_BUILD(URX_LOOP_SR_I, URX_VAL(repeatedO p)); 895 int32_t loopOpI = buildOp(URX_LOOP_SR_I, URX_VAL(repeatedOp) );
938 fRXPat->fCompiledPat->setElementAt(loopOpI, topLoc); 896 fRXPat->fCompiledPat->setElementAt(loopOpI, topLoc);
939 dataLoc = fRXPat->fFrameSize; 897 dataLoc = allocateStackData(1);
940 fRXPat->fFrameSize++; 898 appendOp(URX_LOOP_C, dataLoc);
941 int32_t loopOpC = URX_BUILD(URX_LOOP_C, dataLoc);
942 fRXPat->fCompiledPat->addElement(loopOpC, *fStatus);
943 break; 899 break;
944 } 900 }
945 901
946 if (URX_TYPE(repeatedOp) == URX_DOTANY || 902 if (URX_TYPE(repeatedOp) == URX_DOTANY ||
947 URX_TYPE(repeatedOp) == URX_DOTANY_ALL || 903 URX_TYPE(repeatedOp) == URX_DOTANY_ALL ||
948 URX_TYPE(repeatedOp) == URX_DOTANY_UNIX) { 904 URX_TYPE(repeatedOp) == URX_DOTANY_UNIX) {
949 // Emit Optimized code for .* operations. 905 // Emit Optimized code for .* operations.
950 int32_t loopOpI = URX_BUILD(URX_LOOP_DOT_I, 0); 906 int32_t loopOpI = buildOp(URX_LOOP_DOT_I, 0);
951 if (URX_TYPE(repeatedOp) == URX_DOTANY_ALL) { 907 if (URX_TYPE(repeatedOp) == URX_DOTANY_ALL) {
952 // URX_LOOP_DOT_I operand is a flag indicating . matches any mode. 908 // URX_LOOP_DOT_I operand is a flag indicating . matches any mode.
953 loopOpI |= 1; 909 loopOpI |= 1;
954 } 910 }
955 if ((fModeFlags & UREGEX_UNIX_LINES) != 0) { 911 if ((fModeFlags & UREGEX_UNIX_LINES) != 0) {
956 loopOpI |= 2; 912 loopOpI |= 2;
957 } 913 }
958 fRXPat->fCompiledPat->setElementAt(loopOpI, topLoc); 914 fRXPat->fCompiledPat->setElementAt(loopOpI, topLoc);
959 dataLoc = fRXPat->fFrameSize; 915 dataLoc = allocateStackData(1);
960 fRXPat->fFrameSize++; 916 appendOp(URX_LOOP_C, dataLoc);
961 int32_t loopOpC = URX_BUILD(URX_LOOP_C, dataLoc);
962 fRXPat->fCompiledPat->addElement(loopOpC, *fStatus);
963 break; 917 break;
964 } 918 }
965 } 919 }
966 920
967 // Emit general case code for this * 921 // Emit general case code for this *
968 // The optimizations did not apply. 922 // The optimizations did not apply.
969 923
970 int32_t saveStateLoc = blockTopLoc(TRUE); 924 int32_t saveStateLoc = blockTopLoc(TRUE);
971 int32_t jmpOp = URX_BUILD(URX_JMP_SAV, saveStateLoc+1); 925 int32_t jmpOp = buildOp(URX_JMP_SAV, saveStateLoc+1);
972 926
973 // Check for minimum match length of zero, which requires 927 // Check for minimum match length of zero, which requires
974 // extra loop-breaking code. 928 // extra loop-breaking code.
975 if (minMatchLength(saveStateLoc, fRXPat->fCompiledPat->size()-1) == 0) { 929 if (minMatchLength(saveStateLoc, fRXPat->fCompiledPat->size()-1) == 0) {
976 insertOp(saveStateLoc); 930 insertOp(saveStateLoc);
977 dataLoc = fRXPat->fFrameSize; 931 dataLoc = allocateStackData(1);
978 fRXPat->fFrameSize++;
979 932
980 int32_t op = URX_BUILD(URX_STO_INP_LOC, dataLoc); 933 int32_t op = buildOp(URX_STO_INP_LOC, dataLoc);
981 fRXPat->fCompiledPat->setElementAt(op, saveStateLoc+1); 934 fRXPat->fCompiledPat->setElementAt(op, saveStateLoc+1);
982 jmpOp = URX_BUILD(URX_JMP_SAV_X, saveStateLoc+2); 935 jmpOp = buildOp(URX_JMP_SAV_X, saveStateLoc+2);
983 } 936 }
984 937
985 // Locate the position in the compiled pattern where the match will continue 938 // Locate the position in the compiled pattern where the match will continue
986 // after completing the *. (4 or 5 in the comment above) 939 // after completing the *. (4 or 5 in the comment above)
987 int32_t continueLoc = fRXPat->fCompiledPat->size()+1; 940 int32_t continueLoc = fRXPat->fCompiledPat->size()+1;
988 941
989 // Put together the save state op store it into the compiled code. 942 // Put together the save state op and store it into the compiled cod e.
990 int32_t saveStateOp = URX_BUILD(URX_STATE_SAVE, continueLoc); 943 int32_t saveStateOp = buildOp(URX_STATE_SAVE, continueLoc);
991 fRXPat->fCompiledPat->setElementAt(saveStateOp, saveStateLoc); 944 fRXPat->fCompiledPat->setElementAt(saveStateOp, saveStateLoc);
992 945
993 // Append the URX_JMP_SAV or URX_JMPX operation to the compiled patt ern. 946 // Append the URX_JMP_SAV or URX_JMPX operation to the compiled patt ern.
994 fRXPat->fCompiledPat->addElement(jmpOp, *fStatus); 947 appendOp(jmpOp);
995 } 948 }
996 break; 949 break;
997 950
998 case doNGStar: 951 case doNGStar:
999 // Non-greedy *? quantifier 952 // Non-greedy *? quantifier
1000 // compiles to 953 // compiles to
1001 // 1. JMP 3 954 // 1. JMP 3
1002 // 2. body of stuff being iterated over 955 // 2. body of stuff being iterated over
1003 // 3. STATE_SAVE 2 956 // 3. STATE_SAVE 2
1004 // 4 ... 957 // 4 ...
1005 { 958 {
1006 int32_t jmpLoc = blockTopLoc(TRUE); // loc 1 . 959 int32_t jmpLoc = blockTopLoc(TRUE); // loc 1 .
1007 int32_t saveLoc = fRXPat->fCompiledPat->size(); // loc 3 . 960 int32_t saveLoc = fRXPat->fCompiledPat->size(); // loc 3 .
1008 int32_t jmpOp = URX_BUILD(URX_JMP, saveLoc); 961 int32_t jmpOp = buildOp(URX_JMP, saveLoc);
1009 int32_t stateSaveOp = URX_BUILD(URX_STATE_SAVE, jmpLoc+1);
1010 fRXPat->fCompiledPat->setElementAt(jmpOp, jmpLoc); 962 fRXPat->fCompiledPat->setElementAt(jmpOp, jmpLoc);
1011 fRXPat->fCompiledPat->addElement(stateSaveOp, *fStatus); 963 appendOp(URX_STATE_SAVE, jmpLoc+1);
1012 } 964 }
1013 break; 965 break;
1014 966
1015 967
1016 case doIntervalInit: 968 case doIntervalInit:
1017 // The '{' opening an interval quantifier was just scanned. 969 // The '{' opening an interval quantifier was just scanned.
1018 // Init the counter varaiables that will accumulate the values as the di gits 970 // Init the counter varaiables that will accumulate the values as the di gits
1019 // are scanned. 971 // are scanned.
1020 fIntervalLow = 0; 972 fIntervalLow = 0;
1021 fIntervalUpper = -1; 973 fIntervalUpper = -1;
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
1070 int32_t topLoc = blockTopLoc(FALSE); 1022 int32_t topLoc = blockTopLoc(FALSE);
1071 1023
1072 // Produce normal looping code. 1024 // Produce normal looping code.
1073 compileInterval(URX_CTR_INIT, URX_CTR_LOOP); 1025 compileInterval(URX_CTR_INIT, URX_CTR_LOOP);
1074 1026
1075 // Surround the just-emitted normal looping code with a STO_SP ... L D_SP 1027 // Surround the just-emitted normal looping code with a STO_SP ... L D_SP
1076 // just as if the loop was inclosed in atomic parentheses. 1028 // just as if the loop was inclosed in atomic parentheses.
1077 1029
1078 // First the STO_SP before the start of the loop 1030 // First the STO_SP before the start of the loop
1079 insertOp(topLoc); 1031 insertOp(topLoc);
1080 int32_t varLoc = fRXPat->fDataSize; // Reserve a data locatio n for saving the 1032
1081 fRXPat->fDataSize += 1; // state stack ptr. 1033 int32_t varLoc = allocateData(1); // Reserve a data location for saving the
1082 int32_t op = URX_BUILD(URX_STO_SP, varLoc); 1034 int32_t op = buildOp(URX_STO_SP, varLoc);
1083 fRXPat->fCompiledPat->setElementAt(op, topLoc); 1035 fRXPat->fCompiledPat->setElementAt(op, topLoc);
1084 1036
1085 int32_t loopOp = (int32_t)fRXPat->fCompiledPat->popi(); 1037 int32_t loopOp = (int32_t)fRXPat->fCompiledPat->popi();
1086 U_ASSERT(URX_TYPE(loopOp) == URX_CTR_LOOP && URX_VAL(loopOp) == topL oc); 1038 U_ASSERT(URX_TYPE(loopOp) == URX_CTR_LOOP && URX_VAL(loopOp) == topL oc);
1087 loopOp++; // point LoopOp after the just-inserted STO_SP 1039 loopOp++; // point LoopOp after the just-inserted STO_SP
1088 fRXPat->fCompiledPat->push(loopOp, *fStatus); 1040 fRXPat->fCompiledPat->push(loopOp, *fStatus);
1089 1041
1090 // Then the LD_SP after the end of the loop 1042 // Then the LD_SP after the end of the loop
1091 op = URX_BUILD(URX_LD_SP, varLoc); 1043 appendOp(URX_LD_SP, varLoc);
1092 fRXPat->fCompiledPat->addElement(op, *fStatus);
1093 } 1044 }
1094 1045
1095 break; 1046 break;
1096 1047
1097 case doNGInterval: 1048 case doNGInterval:
1098 // Finished scanning a non-greedy {lower,upper}? interval. Generate the code for it. 1049 // Finished scanning a non-greedy {lower,upper}? interval. Generate the code for it.
1099 compileInterval(URX_CTR_INIT_NG, URX_CTR_LOOP_NG); 1050 compileInterval(URX_CTR_INIT_NG, URX_CTR_LOOP_NG);
1100 break; 1051 break;
1101 1052
1102 case doIntervalError: 1053 case doIntervalError:
(...skipping 15 matching lines...) Expand all
1118 error(U_REGEX_BAD_ESCAPE_SEQUENCE); 1069 error(U_REGEX_BAD_ESCAPE_SEQUENCE);
1119 } 1070 }
1120 literalChar(fC.fChar); 1071 literalChar(fC.fChar);
1121 break; 1072 break;
1122 1073
1123 1074
1124 case doDotAny: 1075 case doDotAny:
1125 // scanned a ".", match any single character. 1076 // scanned a ".", match any single character.
1126 { 1077 {
1127 fixLiterals(FALSE); 1078 fixLiterals(FALSE);
1128 int32_t op;
1129 if (fModeFlags & UREGEX_DOTALL) { 1079 if (fModeFlags & UREGEX_DOTALL) {
1130 op = URX_BUILD(URX_DOTANY_ALL, 0); 1080 appendOp(URX_DOTANY_ALL, 0);
1131 } else if (fModeFlags & UREGEX_UNIX_LINES) { 1081 } else if (fModeFlags & UREGEX_UNIX_LINES) {
1132 op = URX_BUILD(URX_DOTANY_UNIX, 0); 1082 appendOp(URX_DOTANY_UNIX, 0);
1133 } else { 1083 } else {
1134 op = URX_BUILD(URX_DOTANY, 0); 1084 appendOp(URX_DOTANY, 0);
1135 } 1085 }
1136 fRXPat->fCompiledPat->addElement(op, *fStatus);
1137 } 1086 }
1138 break; 1087 break;
1139 1088
1140 case doCaret: 1089 case doCaret:
1141 { 1090 {
1142 fixLiterals(FALSE); 1091 fixLiterals(FALSE);
1143 int32_t op = 0;
1144 if ( (fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & URE GEX_UNIX_LINES) == 0) { 1092 if ( (fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & URE GEX_UNIX_LINES) == 0) {
1145 op = URX_CARET; 1093 appendOp(URX_CARET, 0);
1146 } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & URE GEX_UNIX_LINES) == 0) { 1094 } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & URE GEX_UNIX_LINES) == 0) {
1147 op = URX_CARET_M; 1095 appendOp(URX_CARET_M, 0);
1148 } else if ((fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & URE GEX_UNIX_LINES) != 0) { 1096 } else if ((fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & URE GEX_UNIX_LINES) != 0) {
1149 op = URX_CARET; // Only testing true start of input. 1097 appendOp(URX_CARET, 0); // Only testing true start of input.
1150 } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & URE GEX_UNIX_LINES) != 0) { 1098 } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & URE GEX_UNIX_LINES) != 0) {
1151 op = URX_CARET_M_UNIX; 1099 appendOp(URX_CARET_M_UNIX, 0);
1152 } 1100 }
1153 fRXPat->fCompiledPat->addElement(URX_BUILD(op, 0), *fStatus);
1154 } 1101 }
1155 break; 1102 break;
1156 1103
1157 case doDollar: 1104 case doDollar:
1158 { 1105 {
1159 fixLiterals(FALSE); 1106 fixLiterals(FALSE);
1160 int32_t op = 0;
1161 if ( (fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & URE GEX_UNIX_LINES) == 0) { 1107 if ( (fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & URE GEX_UNIX_LINES) == 0) {
1162 op = URX_DOLLAR; 1108 appendOp(URX_DOLLAR, 0);
1163 } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & URE GEX_UNIX_LINES) == 0) { 1109 } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & URE GEX_UNIX_LINES) == 0) {
1164 op = URX_DOLLAR_M; 1110 appendOp(URX_DOLLAR_M, 0);
1165 } else if ((fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & URE GEX_UNIX_LINES) != 0) { 1111 } else if ((fModeFlags & UREGEX_MULTILINE) == 0 && (fModeFlags & URE GEX_UNIX_LINES) != 0) {
1166 op = URX_DOLLAR_D; 1112 appendOp(URX_DOLLAR_D, 0);
1167 } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & URE GEX_UNIX_LINES) != 0) { 1113 } else if ((fModeFlags & UREGEX_MULTILINE) != 0 && (fModeFlags & URE GEX_UNIX_LINES) != 0) {
1168 op = URX_DOLLAR_MD; 1114 appendOp(URX_DOLLAR_MD, 0);
1169 } 1115 }
1170 fRXPat->fCompiledPat->addElement(URX_BUILD(op, 0), *fStatus);
1171 } 1116 }
1172 break; 1117 break;
1173 1118
1174 case doBackslashA: 1119 case doBackslashA:
1175 fixLiterals(FALSE); 1120 fixLiterals(FALSE);
1176 fRXPat->fCompiledPat->addElement(URX_BUILD(URX_CARET, 0), *fStatus); 1121 appendOp(URX_CARET, 0);
1177 break; 1122 break;
1178 1123
1179 case doBackslashB: 1124 case doBackslashB:
1180 { 1125 {
1181 #if UCONFIG_NO_BREAK_ITERATION==1 1126 #if UCONFIG_NO_BREAK_ITERATION==1
1182 if (fModeFlags & UREGEX_UWORD) { 1127 if (fModeFlags & UREGEX_UWORD) {
1183 error(U_UNSUPPORTED_ERROR); 1128 error(U_UNSUPPORTED_ERROR);
1184 } 1129 }
1185 #endif 1130 #endif
1186 fixLiterals(FALSE); 1131 fixLiterals(FALSE);
1187 int32_t op = (fModeFlags & UREGEX_UWORD)? URX_BACKSLASH_BU : URX_BAC KSLASH_B; 1132 int32_t op = (fModeFlags & UREGEX_UWORD)? URX_BACKSLASH_BU : URX_BAC KSLASH_B;
1188 fRXPat->fCompiledPat->addElement(URX_BUILD(op, 1), *fStatus); 1133 appendOp(op, 1);
1189 } 1134 }
1190 break; 1135 break;
1191 1136
1192 case doBackslashb: 1137 case doBackslashb:
1193 { 1138 {
1194 #if UCONFIG_NO_BREAK_ITERATION==1 1139 #if UCONFIG_NO_BREAK_ITERATION==1
1195 if (fModeFlags & UREGEX_UWORD) { 1140 if (fModeFlags & UREGEX_UWORD) {
1196 error(U_UNSUPPORTED_ERROR); 1141 error(U_UNSUPPORTED_ERROR);
1197 } 1142 }
1198 #endif 1143 #endif
1199 fixLiterals(FALSE); 1144 fixLiterals(FALSE);
1200 int32_t op = (fModeFlags & UREGEX_UWORD)? URX_BACKSLASH_BU : URX_BAC KSLASH_B; 1145 int32_t op = (fModeFlags & UREGEX_UWORD)? URX_BACKSLASH_BU : URX_BAC KSLASH_B;
1201 fRXPat->fCompiledPat->addElement(URX_BUILD(op, 0), *fStatus); 1146 appendOp(op, 0);
1202 } 1147 }
1203 break; 1148 break;
1204 1149
1205 case doBackslashD: 1150 case doBackslashD:
1206 fixLiterals(FALSE); 1151 fixLiterals(FALSE);
1207 fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_D, 1), *fStatus ); 1152 appendOp(URX_BACKSLASH_D, 1);
1208 break; 1153 break;
1209 1154
1210 case doBackslashd: 1155 case doBackslashd:
1211 fixLiterals(FALSE); 1156 fixLiterals(FALSE);
1212 fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_D, 0), *fStatus ); 1157 appendOp(URX_BACKSLASH_D, 0);
1213 break; 1158 break;
1214 1159
1215 case doBackslashG: 1160 case doBackslashG:
1216 fixLiterals(FALSE); 1161 fixLiterals(FALSE);
1217 fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_G, 0), *fStatus ); 1162 appendOp(URX_BACKSLASH_G, 0);
1218 break; 1163 break;
1219 1164
1220 case doBackslashS: 1165 case doBackslashS:
1221 fixLiterals(FALSE); 1166 fixLiterals(FALSE);
1222 fRXPat->fCompiledPat->addElement( 1167 appendOp(URX_STAT_SETREF_N, URX_ISSPACE_SET);
1223 URX_BUILD(URX_STAT_SETREF_N, URX_ISSPACE_SET), *fStatus);
1224 break; 1168 break;
1225 1169
1226 case doBackslashs: 1170 case doBackslashs:
1227 fixLiterals(FALSE); 1171 fixLiterals(FALSE);
1228 fRXPat->fCompiledPat->addElement( 1172 appendOp(URX_STATIC_SETREF, URX_ISSPACE_SET);
1229 URX_BUILD(URX_STATIC_SETREF, URX_ISSPACE_SET), *fStatus);
1230 break; 1173 break;
1231 1174
1232 case doBackslashW: 1175 case doBackslashW:
1233 fixLiterals(FALSE); 1176 fixLiterals(FALSE);
1234 fRXPat->fCompiledPat->addElement( 1177 appendOp(URX_STAT_SETREF_N, URX_ISWORD_SET);
1235 URX_BUILD(URX_STAT_SETREF_N, URX_ISWORD_SET), *fStatus);
1236 break; 1178 break;
1237 1179
1238 case doBackslashw: 1180 case doBackslashw:
1239 fixLiterals(FALSE); 1181 fixLiterals(FALSE);
1240 fRXPat->fCompiledPat->addElement( 1182 appendOp(URX_STATIC_SETREF, URX_ISWORD_SET);
1241 URX_BUILD(URX_STATIC_SETREF, URX_ISWORD_SET), *fStatus);
1242 break; 1183 break;
1243 1184
1244 case doBackslashX: 1185 case doBackslashX:
1245 fixLiterals(FALSE); 1186 fixLiterals(FALSE);
1246 fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_X, 0), *fStatus ); 1187 appendOp(URX_BACKSLASH_X, 0);
1247 break; 1188 break;
1248 1189
1249 1190
1250 case doBackslashZ: 1191 case doBackslashZ:
1251 fixLiterals(FALSE); 1192 fixLiterals(FALSE);
1252 fRXPat->fCompiledPat->addElement(URX_BUILD(URX_DOLLAR, 0), *fStatus); 1193 appendOp(URX_DOLLAR, 0);
1253 break; 1194 break;
1254 1195
1255 case doBackslashz: 1196 case doBackslashz:
1256 fixLiterals(FALSE); 1197 fixLiterals(FALSE);
1257 fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_Z, 0), *fStatus ); 1198 appendOp(URX_BACKSLASH_Z, 0);
1258 break; 1199 break;
1259 1200
1260 case doEscapeError: 1201 case doEscapeError:
1261 error(U_REGEX_BAD_ESCAPE_SEQUENCE); 1202 error(U_REGEX_BAD_ESCAPE_SEQUENCE);
1262 break; 1203 break;
1263 1204
1264 case doExit: 1205 case doExit:
1265 fixLiterals(FALSE); 1206 fixLiterals(FALSE);
1266 returnVal = FALSE; 1207 returnVal = FALSE;
1267 break; 1208 break;
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
1307 } 1248 }
1308 1249
1309 // Scan of the back reference in the source regexp is complete. Now generate 1250 // Scan of the back reference in the source regexp is complete. Now generate
1310 // the compiled code for it. 1251 // the compiled code for it.
1311 // Because capture groups can be forward-referenced by back-referenc es, 1252 // Because capture groups can be forward-referenced by back-referenc es,
1312 // we fill the operand with the capture group number. At the end 1253 // we fill the operand with the capture group number. At the end
1313 // of compilation, it will be changed to the variable's location. 1254 // of compilation, it will be changed to the variable's location.
1314 U_ASSERT(groupNum > 0); // Shouldn't happen. '\0' begins an octal escape sequence, 1255 U_ASSERT(groupNum > 0); // Shouldn't happen. '\0' begins an octal escape sequence,
1315 // and shouldn't enter this code path at all. 1256 // and shouldn't enter this code path at all.
1316 fixLiterals(FALSE); 1257 fixLiterals(FALSE);
1317 int32_t op;
1318 if (fModeFlags & UREGEX_CASE_INSENSITIVE) { 1258 if (fModeFlags & UREGEX_CASE_INSENSITIVE) {
1319 op = URX_BUILD(URX_BACKREF_I, groupNum); 1259 appendOp(URX_BACKREF_I, groupNum);
1320 } else { 1260 } else {
1321 op = URX_BUILD(URX_BACKREF, groupNum); 1261 appendOp(URX_BACKREF, groupNum);
1322 } 1262 }
1323 fRXPat->fCompiledPat->addElement(op, *fStatus);
1324 } 1263 }
1325 break; 1264 break;
1326 1265
1327 1266
1328 case doPossessivePlus: 1267 case doPossessivePlus:
1329 // Possessive ++ quantifier. 1268 // Possessive ++ quantifier.
1330 // Compiles to 1269 // Compiles to
1331 // 1. STO_SP 1270 // 1. STO_SP
1332 // 2. body of stuff being iterated over 1271 // 2. body of stuff being iterated over
1333 // 3. STATE_SAVE 5 1272 // 3. STATE_SAVE 5
1334 // 4. JMP 2 1273 // 4. JMP 2
1335 // 5. LD_SP 1274 // 5. LD_SP
1336 // 6. ... 1275 // 6. ...
1337 // 1276 //
1338 // Note: TODO: This is pretty inefficient. A mass of saved state is built up 1277 // Note: TODO: This is pretty inefficient. A mass of saved state is built up
1339 // then unconditionally discarded. Perhaps introduce a n ew opcode. Ticket 6056 1278 // then unconditionally discarded. Perhaps introduce a n ew opcode. Ticket 6056
1340 // 1279 //
1341 { 1280 {
1342 // Emit the STO_SP 1281 // Emit the STO_SP
1343 int32_t topLoc = blockTopLoc(TRUE); 1282 int32_t topLoc = blockTopLoc(TRUE);
1344 int32_t stoLoc = fRXPat->fDataSize; 1283 int32_t stoLoc = allocateData(1); // Reserve the data location fo r storing save stack ptr.
1345 fRXPat->fDataSize++; // Reserve the data location for storing save stack ptr. 1284 int32_t op = buildOp(URX_STO_SP, stoLoc);
1346 int32_t op = URX_BUILD(URX_STO_SP, stoLoc);
1347 fRXPat->fCompiledPat->setElementAt(op, topLoc); 1285 fRXPat->fCompiledPat->setElementAt(op, topLoc);
1348 1286
1349 // Emit the STATE_SAVE 1287 // Emit the STATE_SAVE
1350 op = URX_BUILD(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+2); 1288 appendOp(URX_STATE_SAVE, fRXPat->fCompiledPat->size()+2);
1351 fRXPat->fCompiledPat->addElement(op, *fStatus);
1352 1289
1353 // Emit the JMP 1290 // Emit the JMP
1354 op = URX_BUILD(URX_JMP, topLoc+1); 1291 appendOp(URX_JMP, topLoc+1);
1355 fRXPat->fCompiledPat->addElement(op, *fStatus);
1356 1292
1357 // Emit the LD_SP 1293 // Emit the LD_SP
1358 op = URX_BUILD(URX_LD_SP, stoLoc); 1294 appendOp(URX_LD_SP, stoLoc);
1359 fRXPat->fCompiledPat->addElement(op, *fStatus);
1360 } 1295 }
1361 break; 1296 break;
1362 1297
1363 case doPossessiveStar: 1298 case doPossessiveStar:
1364 // Possessive *+ quantifier. 1299 // Possessive *+ quantifier.
1365 // Compiles to 1300 // Compiles to
1366 // 1. STO_SP loc 1301 // 1. STO_SP loc
1367 // 2. STATE_SAVE 5 1302 // 2. STATE_SAVE 5
1368 // 3. body of stuff being iterated over 1303 // 3. body of stuff being iterated over
1369 // 4. JMP 2 1304 // 4. JMP 2
1370 // 5. LD_SP loc 1305 // 5. LD_SP loc
1371 // 6 ... 1306 // 6 ...
1372 // TODO: do something to cut back the state stack each time through the loop. 1307 // TODO: do something to cut back the state stack each time through the loop.
1373 { 1308 {
1374 // Reserve two slots at the top of the block. 1309 // Reserve two slots at the top of the block.
1375 int32_t topLoc = blockTopLoc(TRUE); 1310 int32_t topLoc = blockTopLoc(TRUE);
1376 insertOp(topLoc); 1311 insertOp(topLoc);
1377 1312
1378 // emit STO_SP loc 1313 // emit STO_SP loc
1379 int32_t stoLoc = fRXPat->fDataSize; 1314 int32_t stoLoc = allocateData(1); // Reserve the data location for storing save stack ptr.
1380 fRXPat->fDataSize++; // Reserve the data location for storing save stack ptr. 1315 int32_t op = buildOp(URX_STO_SP, stoLoc);
1381 int32_t op = URX_BUILD(URX_STO_SP, stoLoc);
1382 fRXPat->fCompiledPat->setElementAt(op, topLoc); 1316 fRXPat->fCompiledPat->setElementAt(op, topLoc);
1383 1317
1384 // Emit the SAVE_STATE 5 1318 // Emit the SAVE_STATE 5
1385 int32_t L7 = fRXPat->fCompiledPat->size()+1; 1319 int32_t L7 = fRXPat->fCompiledPat->size()+1;
1386 op = URX_BUILD(URX_STATE_SAVE, L7); 1320 op = buildOp(URX_STATE_SAVE, L7);
1387 fRXPat->fCompiledPat->setElementAt(op, topLoc+1); 1321 fRXPat->fCompiledPat->setElementAt(op, topLoc+1);
1388 1322
1389 // Append the JMP operation. 1323 // Append the JMP operation.
1390 op = URX_BUILD(URX_JMP, topLoc+1); 1324 appendOp(URX_JMP, topLoc+1);
1391 fRXPat->fCompiledPat->addElement(op, *fStatus);
1392 1325
1393 // Emit the LD_SP loc 1326 // Emit the LD_SP loc
1394 op = URX_BUILD(URX_LD_SP, stoLoc); 1327 appendOp(URX_LD_SP, stoLoc);
1395 fRXPat->fCompiledPat->addElement(op, *fStatus);
1396 } 1328 }
1397 break; 1329 break;
1398 1330
1399 case doPossessiveOpt: 1331 case doPossessiveOpt:
1400 // Possessive ?+ quantifier. 1332 // Possessive ?+ quantifier.
1401 // Compiles to 1333 // Compiles to
1402 // 1. STO_SP loc 1334 // 1. STO_SP loc
1403 // 2. SAVE_STATE 5 1335 // 2. SAVE_STATE 5
1404 // 3. body of optional block 1336 // 3. body of optional block
1405 // 4. LD_SP loc 1337 // 4. LD_SP loc
1406 // 5. ... 1338 // 5. ...
1407 // 1339 //
1408 { 1340 {
1409 // Reserve two slots at the top of the block. 1341 // Reserve two slots at the top of the block.
1410 int32_t topLoc = blockTopLoc(TRUE); 1342 int32_t topLoc = blockTopLoc(TRUE);
1411 insertOp(topLoc); 1343 insertOp(topLoc);
1412 1344
1413 // Emit the STO_SP 1345 // Emit the STO_SP
1414 int32_t stoLoc = fRXPat->fDataSize; 1346 int32_t stoLoc = allocateData(1); // Reserve the data location f or storing save stack ptr.
1415 fRXPat->fDataSize++; // Reserve the data location for storing save stack ptr. 1347 int32_t op = buildOp(URX_STO_SP, stoLoc);
1416 int32_t op = URX_BUILD(URX_STO_SP, stoLoc);
1417 fRXPat->fCompiledPat->setElementAt(op, topLoc); 1348 fRXPat->fCompiledPat->setElementAt(op, topLoc);
1418 1349
1419 // Emit the SAVE_STATE 1350 // Emit the SAVE_STATE
1420 int32_t continueLoc = fRXPat->fCompiledPat->size()+1; 1351 int32_t continueLoc = fRXPat->fCompiledPat->size()+1;
1421 op = URX_BUILD(URX_STATE_SAVE, continueLoc); 1352 op = buildOp(URX_STATE_SAVE, continueLoc);
1422 fRXPat->fCompiledPat->setElementAt(op, topLoc+1); 1353 fRXPat->fCompiledPat->setElementAt(op, topLoc+1);
1423 1354
1424 // Emit the LD_SP 1355 // Emit the LD_SP
1425 op = URX_BUILD(URX_LD_SP, stoLoc); 1356 appendOp(URX_LD_SP, stoLoc);
1426 fRXPat->fCompiledPat->addElement(op, *fStatus);
1427 } 1357 }
1428 break; 1358 break;
1429 1359
1430 1360
1431 case doBeginMatchMode: 1361 case doBeginMatchMode:
1432 fNewModeFlags = fModeFlags; 1362 fNewModeFlags = fModeFlags;
1433 fSetModeFlag = TRUE; 1363 fSetModeFlag = TRUE;
1434 break; 1364 break;
1435 1365
1436 case doMatchMode: // (?i) and similar 1366 case doMatchMode: // (?i) and similar
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
1473 // We've got a (?i: or similar. Begin a parenthesized block, save old 1403 // We've got a (?i: or similar. Begin a parenthesized block, save old
1474 // mode flags so they can be restored at the close of the block. 1404 // mode flags so they can be restored at the close of the block.
1475 // 1405 //
1476 // Compile to a 1406 // Compile to a
1477 // - NOP, which later may be replaced by a save-state if the 1407 // - NOP, which later may be replaced by a save-state if the
1478 // parenthesized group gets a * quantifier, followed by 1408 // parenthesized group gets a * quantifier, followed by
1479 // - NOP, which may later be replaced by a save-state if there 1409 // - NOP, which may later be replaced by a save-state if there
1480 // is an '|' alternation within the parens. 1410 // is an '|' alternation within the parens.
1481 { 1411 {
1482 fixLiterals(FALSE); 1412 fixLiterals(FALSE);
1483 fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus); 1413 appendOp(URX_NOP, 0);
1484 fRXPat->fCompiledPat->addElement(URX_BUILD(URX_NOP, 0), *fStatus); 1414 appendOp(URX_NOP, 0);
1485 1415
1486 // On the Parentheses stack, start a new frame and add the postions 1416 // On the Parentheses stack, start a new frame and add the postions
1487 // of the two NOPs (a normal non-capturing () frame, except for th e 1417 // of the two NOPs (a normal non-capturing () frame, except for th e
1488 // saving of the orignal mode flags.) 1418 // saving of the orignal mode flags.)
1489 fParenStack.push(fModeFlags, *fStatus); 1419 fParenStack.push(fModeFlags, *fStatus);
1490 fParenStack.push(flags, *fStatus); // Fra me Marker 1420 fParenStack.push(flags, *fStatus); // Fra me Marker
1491 fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus); // The first NOP 1421 fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus); // The first NOP
1492 fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP 1422 fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus); // The second NOP
1493 1423
1494 // Set the current mode flags to the new values. 1424 // Set the current mode flags to the new values.
(...skipping 316 matching lines...) Expand 10 before | Expand all | Expand 10 after
1811 // string in a pattern, emit the code to match the 1741 // string in a pattern, emit the code to match the
1812 // accumulated literal string. 1742 // accumulated literal string.
1813 // 1743 //
1814 // Optionally, split the last char of the string off in to 1744 // Optionally, split the last char of the string off in to
1815 // a single "ONE_CHAR" operation, so that quantifiers c an 1745 // a single "ONE_CHAR" operation, so that quantifiers c an
1816 // apply to that char alone. Example: abc* 1746 // apply to that char alone. Example: abc*
1817 // The * must apply to the 'c' only. 1747 // The * must apply to the 'c' only.
1818 // 1748 //
1819 //------------------------------------------------------------------------------ 1749 //------------------------------------------------------------------------------
1820 void RegexCompile::fixLiterals(UBool split) { 1750 void RegexCompile::fixLiterals(UBool split) {
1821 int32_t op = 0; // An op from/for the compiled patter n.
1822 1751
1823 // If no literal characters have been scanned but not yet had code generated 1752 // If no literal characters have been scanned but not yet had code generated
1824 // for them, nothing needs to be done. 1753 // for them, nothing needs to be done.
1825 if (fLiteralChars.length() == 0) { 1754 if (fLiteralChars.length() == 0) {
1826 return; 1755 return;
1827 } 1756 }
1828 1757
1829 int32_t indexOfLastCodePoint = fLiteralChars.moveIndex32(fLiteralChars.lengt h(), -1); 1758 int32_t indexOfLastCodePoint = fLiteralChars.moveIndex32(fLiteralChars.lengt h(), -1);
1830 UChar32 lastCodePoint = fLiteralChars.char32At(indexOfLastCodePoint); 1759 UChar32 lastCodePoint = fLiteralChars.char32At(indexOfLastCodePoint);
1831 1760
(...skipping 18 matching lines...) Expand all
1850 if (fModeFlags & UREGEX_CASE_INSENSITIVE) { 1779 if (fModeFlags & UREGEX_CASE_INSENSITIVE) {
1851 fLiteralChars.foldCase(); 1780 fLiteralChars.foldCase();
1852 indexOfLastCodePoint = fLiteralChars.moveIndex32(fLiteralChars.length(), -1); 1781 indexOfLastCodePoint = fLiteralChars.moveIndex32(fLiteralChars.length(), -1);
1853 lastCodePoint = fLiteralChars.char32At(indexOfLastCodePoint); 1782 lastCodePoint = fLiteralChars.char32At(indexOfLastCodePoint);
1854 } 1783 }
1855 1784
1856 if (indexOfLastCodePoint == 0) { 1785 if (indexOfLastCodePoint == 0) {
1857 // Single character, emit a URX_ONECHAR op to match it. 1786 // Single character, emit a URX_ONECHAR op to match it.
1858 if ((fModeFlags & UREGEX_CASE_INSENSITIVE) && 1787 if ((fModeFlags & UREGEX_CASE_INSENSITIVE) &&
1859 u_hasBinaryProperty(lastCodePoint, UCHAR_CASE_SENSITIVE)) { 1788 u_hasBinaryProperty(lastCodePoint, UCHAR_CASE_SENSITIVE)) {
1860 op = URX_BUILD(URX_ONECHAR_I, lastCodePoint); 1789 appendOp(URX_ONECHAR_I, lastCodePoint);
1861 } else { 1790 } else {
1862 op = URX_BUILD(URX_ONECHAR, lastCodePoint); 1791 appendOp(URX_ONECHAR, lastCodePoint);
1863 } 1792 }
1864 fRXPat->fCompiledPat->addElement(op, *fStatus);
1865 } else { 1793 } else {
1866 // Two or more chars, emit a URX_STRING to match them. 1794 // Two or more chars, emit a URX_STRING to match them.
1795 if (fLiteralChars.length() > 0x00ffffff || fRXPat->fLiteralText.length() > 0x00ffffff) {
1796 error(U_REGEX_PATTERN_TOO_BIG);
1797 }
1867 if (fModeFlags & UREGEX_CASE_INSENSITIVE) { 1798 if (fModeFlags & UREGEX_CASE_INSENSITIVE) {
1868 op = URX_BUILD(URX_STRING_I, fRXPat->fLiteralText.length()); 1799 appendOp(URX_STRING_I, fRXPat->fLiteralText.length());
1869 } else { 1800 } else {
1870 // TODO here: add optimization to split case sensitive strings of l ength two 1801 // TODO here: add optimization to split case sensitive strings of l ength two
1871 // into two single char ops, for efficiency. 1802 // into two single char ops, for efficiency.
1872 op = URX_BUILD(URX_STRING, fRXPat->fLiteralText.length()); 1803 appendOp(URX_STRING, fRXPat->fLiteralText.length());
1873 } 1804 }
1874 fRXPat->fCompiledPat->addElement(op, *fStatus); 1805 appendOp(URX_STRING_LEN, fLiteralChars.length());
1875 op = URX_BUILD(URX_STRING_LEN, fLiteralChars.length());
1876 fRXPat->fCompiledPat->addElement(op, *fStatus);
1877 1806
1878 // Add this string into the accumulated strings of the compiled pattern. 1807 // Add this string into the accumulated strings of the compiled pattern.
1879 fRXPat->fLiteralText.append(fLiteralChars); 1808 fRXPat->fLiteralText.append(fLiteralChars);
1880 } 1809 }
1881 1810
1882 fLiteralChars.remove(); 1811 fLiteralChars.remove();
1883 } 1812 }
1884 1813
1885 1814
1886 1815 int32_t RegexCompile::buildOp(int32_t type, int32_t val) {
1887 1816 if (U_FAILURE(*fStatus)) {
1817 return 0;
1818 }
1819 if (type < 0 || type > 255) {
1820 U_ASSERT(FALSE);
1821 error(U_REGEX_INTERNAL_ERROR);
1822 type = URX_RESERVED_OP;
1823 }
1824 if (val > 0x00ffffff) {
1825 U_ASSERT(FALSE);
1826 error(U_REGEX_INTERNAL_ERROR);
1827 val = 0;
1828 }
1829 if (val < 0) {
1830 if (!(type == URX_RESERVED_OP_N || type == URX_RESERVED_OP)) {
1831 U_ASSERT(FALSE);
1832 error(U_REGEX_INTERNAL_ERROR);
1833 return -1;
1834 }
1835 if (URX_TYPE(val) != 0xff) {
1836 U_ASSERT(FALSE);
1837 error(U_REGEX_INTERNAL_ERROR);
1838 return -1;
1839 }
1840 type = URX_RESERVED_OP_N;
1841 }
1842 return (type << 24) | val;
1843 }
1888 1844
1889 1845
1890 //------------------------------------------------------------------------------ 1846 //------------------------------------------------------------------------------
1847 //
1848 // appendOp() Append a new instruction onto the compiled pattern
1849 // Includes error checking, limiting the size of the
1850 // pattern to lengths that can be represented in the
1851 // 24 bit operand field of an instruction.
1852 //
1853 //------------------------------------------------------------------------------
1854 void RegexCompile::appendOp(int32_t op) {
1855 if (U_FAILURE(*fStatus)) {
1856 return;
1857 }
1858 fRXPat->fCompiledPat->addElement(op, *fStatus);
1859 if ((fRXPat->fCompiledPat->size() > 0x00fffff0) && U_SUCCESS(*fStatus)) {
1860 error(U_REGEX_PATTERN_TOO_BIG);
1861 }
1862 }
1863
1864 void RegexCompile::appendOp(int32_t type, int32_t val) {
1865 appendOp(buildOp(type, val));
1866 }
1867
1868
1869 //------------------------------------------------------------------------------
1891 // 1870 //
1892 // insertOp() Insert a slot for a new opcode into the already 1871 // insertOp() Insert a slot for a new opcode into the already
1893 // compiled pattern code. 1872 // compiled pattern code.
1894 // 1873 //
1895 // Fill the slot with a NOP. Our caller will replace i t 1874 // Fill the slot with a NOP. Our caller will replace i t
1896 // with what they really wanted. 1875 // with what they really wanted.
1897 // 1876 //
1898 //------------------------------------------------------------------------------ 1877 //------------------------------------------------------------------------------
1899 void RegexCompile::insertOp(int32_t where) { 1878 void RegexCompile::insertOp(int32_t where) {
1900 UVector64 *code = fRXPat->fCompiledPat; 1879 UVector64 *code = fRXPat->fCompiledPat;
1901 U_ASSERT(where>0 && where < code->size()); 1880 U_ASSERT(where>0 && where < code->size());
1902 1881
1903 int32_t nop = URX_BUILD(URX_NOP, 0); 1882 int32_t nop = buildOp(URX_NOP, 0);
1904 code->insertElementAt(nop, where, *fStatus); 1883 code->insertElementAt(nop, where, *fStatus);
1905 1884
1906 // Walk through the pattern, looking for any ops with targets that 1885 // Walk through the pattern, looking for any ops with targets that
1907 // were moved down by the insert. Fix them. 1886 // were moved down by the insert. Fix them.
1908 int32_t loc; 1887 int32_t loc;
1909 for (loc=0; loc<code->size(); loc++) { 1888 for (loc=0; loc<code->size(); loc++) {
1910 int32_t op = (int32_t)code->elementAti(loc); 1889 int32_t op = (int32_t)code->elementAti(loc);
1911 int32_t opType = URX_TYPE(op); 1890 int32_t opType = URX_TYPE(op);
1912 int32_t opValue = URX_VAL(op); 1891 int32_t opValue = URX_VAL(op);
1913 if ((opType == URX_JMP || 1892 if ((opType == URX_JMP ||
1914 opType == URX_JMPX || 1893 opType == URX_JMPX ||
1915 opType == URX_STATE_SAVE || 1894 opType == URX_STATE_SAVE ||
1916 opType == URX_CTR_LOOP || 1895 opType == URX_CTR_LOOP ||
1917 opType == URX_CTR_LOOP_NG || 1896 opType == URX_CTR_LOOP_NG ||
1918 opType == URX_JMP_SAV || 1897 opType == URX_JMP_SAV ||
1919 opType == URX_JMP_SAV_X || 1898 opType == URX_JMP_SAV_X ||
1920 opType == URX_RELOC_OPRND) && opValue > where) { 1899 opType == URX_RELOC_OPRND) && opValue > where) {
1921 // Target location for this opcode is after the insertion point and 1900 // Target location for this opcode is after the insertion point and
1922 // needs to be incremented to adjust for the insertion. 1901 // needs to be incremented to adjust for the insertion.
1923 opValue++; 1902 opValue++;
1924 op = URX_BUILD(opType, opValue); 1903 op = buildOp(opType, opValue);
1925 code->setElementAt(op, loc); 1904 code->setElementAt(op, loc);
1926 } 1905 }
1927 } 1906 }
1928 1907
1929 // Now fix up the parentheses stack. All positive values in it are location s in 1908 // Now fix up the parentheses stack. All positive values in it are location s in
1930 // the compiled pattern. (Negative values are frame boundaries, and don't need fixing.) 1909 // the compiled pattern. (Negative values are frame boundaries, and don't need fixing.)
1931 for (loc=0; loc<fParenStack.size(); loc++) { 1910 for (loc=0; loc<fParenStack.size(); loc++) {
1932 int32_t x = fParenStack.elementAti(loc); 1911 int32_t x = fParenStack.elementAti(loc);
1933 U_ASSERT(x < code->size()); 1912 U_ASSERT(x < code->size());
1934 if (x>where) { 1913 if (x>where) {
1935 x++; 1914 x++;
1936 fParenStack.setElementAt(x, loc); 1915 fParenStack.setElementAt(x, loc);
1937 } 1916 }
1938 } 1917 }
1939 1918
1940 if (fMatchCloseParen > where) { 1919 if (fMatchCloseParen > where) {
1941 fMatchCloseParen++; 1920 fMatchCloseParen++;
1942 } 1921 }
1943 if (fMatchOpenParen > where) { 1922 if (fMatchOpenParen > where) {
1944 fMatchOpenParen++; 1923 fMatchOpenParen++;
1945 } 1924 }
1946 } 1925 }
1947 1926
1948 1927
1949
1950 //------------------------------------------------------------------------------ 1928 //------------------------------------------------------------------------------
1951 // 1929 //
1930 // allocateData() Allocate storage in the matcher's static data area.
1931 // Return the index for the newly allocated data.
1932 // The storage won't actually exist until we are running a match
1933 // operation, but the storage indexes are inserted into various
1934 // opcodes while compiling the pattern.
1935 //
1936 //------------------------------------------------------------------------------
1937 int32_t RegexCompile::allocateData(int32_t size) {
1938 if (U_FAILURE(*fStatus)) {
1939 return 0;
1940 }
1941 if (size <= 0 || size > 0x100 || fRXPat->fDataSize < 0) {
1942 error(U_REGEX_INTERNAL_ERROR);
1943 return 0;
1944 }
1945 int32_t dataIndex = fRXPat->fDataSize;
1946 fRXPat->fDataSize += size;
1947 if (fRXPat->fDataSize >= 0x00fffff0) {
1948 error(U_REGEX_INTERNAL_ERROR);
1949 }
1950 return dataIndex;
1951 }
1952
1953
1954 //------------------------------------------------------------------------------
1955 //
1956 // allocateStackData() Allocate space in the back-tracking stack frame.
1957 // Return the index for the newly allocated data.
1958 // The frame indexes are inserted into various
1959 // opcodes while compiling the pattern, meaning that fra me
1960 // size must be restricted to the size that will fit
1961 // as an operand (24 bits).
1962 //
1963 //------------------------------------------------------------------------------
1964 int32_t RegexCompile::allocateStackData(int32_t size) {
1965 if (U_FAILURE(*fStatus)) {
1966 return 0;
1967 }
1968 if (size <= 0 || size > 0x100 || fRXPat->fFrameSize < 0) {
1969 error(U_REGEX_INTERNAL_ERROR);
1970 return 0;
1971 }
1972 int32_t dataIndex = fRXPat->fFrameSize;
1973 fRXPat->fFrameSize += size;
1974 if (fRXPat->fFrameSize >= 0x00fffff0) {
1975 error(U_REGEX_PATTERN_TOO_BIG);
1976 }
1977 return dataIndex;
1978 }
1979
1980
1981 //------------------------------------------------------------------------------
1982 //
1952 // blockTopLoc() Find or create a location in the compiled pattern 1983 // blockTopLoc() Find or create a location in the compiled pattern
1953 // at the start of the operation or block that has 1984 // at the start of the operation or block that has
1954 // just been compiled. Needed when a quantifier (* or 1985 // just been compiled. Needed when a quantifier (* or
1955 // whatever) appears, and we need to add an operation 1986 // whatever) appears, and we need to add an operation
1956 // at the start of the thing being quantified. 1987 // at the start of the thing being quantified.
1957 // 1988 //
1958 // (Parenthesized Blocks) have a slot with a NOP that 1989 // (Parenthesized Blocks) have a slot with a NOP that
1959 // is reserved for this purpose. .* or similar don't 1990 // is reserved for this purpose. .* or similar don't
1960 // and a slot needs to be added. 1991 // and a slot needs to be added.
1961 // 1992 //
(...skipping 19 matching lines...) Expand all
1981 // No slot for STATE_SAVE was pre-reserved in the compiled code. 2012 // No slot for STATE_SAVE was pre-reserved in the compiled code.
1982 // We need to make space now. 2013 // We need to make space now.
1983 theLoc = fRXPat->fCompiledPat->size()-1; 2014 theLoc = fRXPat->fCompiledPat->size()-1;
1984 int32_t opAtTheLoc = (int32_t)fRXPat->fCompiledPat->elementAti(theLoc); 2015 int32_t opAtTheLoc = (int32_t)fRXPat->fCompiledPat->elementAti(theLoc);
1985 if (URX_TYPE(opAtTheLoc) == URX_STRING_LEN) { 2016 if (URX_TYPE(opAtTheLoc) == URX_STRING_LEN) {
1986 // Strings take two opcode, we want the position of the first one. 2017 // Strings take two opcode, we want the position of the first one.
1987 // We can have a string at this point if a single character case-fol ded to two. 2018 // We can have a string at this point if a single character case-fol ded to two.
1988 theLoc--; 2019 theLoc--;
1989 } 2020 }
1990 if (reserveLoc) { 2021 if (reserveLoc) {
1991 int32_t nop = URX_BUILD(URX_NOP, 0); 2022 int32_t nop = buildOp(URX_NOP, 0);
1992 fRXPat->fCompiledPat->insertElementAt(nop, theLoc, *fStatus); 2023 fRXPat->fCompiledPat->insertElementAt(nop, theLoc, *fStatus);
1993 } 2024 }
1994 } 2025 }
1995 return theLoc; 2026 return theLoc;
1996 } 2027 }
1997 2028
1998 2029
1999 2030
2000 //------------------------------------------------------------------------------ 2031 //------------------------------------------------------------------------------
2001 // 2032 //
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
2056 case capturing: 2087 case capturing:
2057 // Capturing Parentheses. 2088 // Capturing Parentheses.
2058 // Insert a End Capture op into the pattern. 2089 // Insert a End Capture op into the pattern.
2059 // The frame offset of the variables for this cg is obtained from the 2090 // The frame offset of the variables for this cg is obtained from the
2060 // start capture op and put it into the end-capture op. 2091 // start capture op and put it into the end-capture op.
2061 { 2092 {
2062 int32_t captureOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMat chOpenParen+1); 2093 int32_t captureOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMat chOpenParen+1);
2063 U_ASSERT(URX_TYPE(captureOp) == URX_START_CAPTURE); 2094 U_ASSERT(URX_TYPE(captureOp) == URX_START_CAPTURE);
2064 2095
2065 int32_t frameVarLocation = URX_VAL(captureOp); 2096 int32_t frameVarLocation = URX_VAL(captureOp);
2066 int32_t endCaptureOp = URX_BUILD(URX_END_CAPTURE, frameVarLocation ); 2097 appendOp(URX_END_CAPTURE, frameVarLocation);
2067 fRXPat->fCompiledPat->addElement(endCaptureOp, *fStatus);
2068 } 2098 }
2069 break; 2099 break;
2070 case atomic: 2100 case atomic:
2071 // Atomic Parenthesis. 2101 // Atomic Parenthesis.
2072 // Insert a LD_SP operation to restore the state stack to the position 2102 // Insert a LD_SP operation to restore the state stack to the position
2073 // it was when the atomic parens were entered. 2103 // it was when the atomic parens were entered.
2074 { 2104 {
2075 int32_t stoOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchOp enParen+1); 2105 int32_t stoOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchOp enParen+1);
2076 U_ASSERT(URX_TYPE(stoOp) == URX_STO_SP); 2106 U_ASSERT(URX_TYPE(stoOp) == URX_STO_SP);
2077 int32_t stoLoc = URX_VAL(stoOp); 2107 int32_t stoLoc = URX_VAL(stoOp);
2078 int32_t ldOp = URX_BUILD(URX_LD_SP, stoLoc); 2108 appendOp(URX_LD_SP, stoLoc);
2079 fRXPat->fCompiledPat->addElement(ldOp, *fStatus);
2080 } 2109 }
2081 break; 2110 break;
2082 2111
2083 case lookAhead: 2112 case lookAhead:
2084 { 2113 {
2085 int32_t startOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchO penParen-5); 2114 int32_t startOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchO penParen-5);
2086 U_ASSERT(URX_TYPE(startOp) == URX_LA_START); 2115 U_ASSERT(URX_TYPE(startOp) == URX_LA_START);
2087 int32_t dataLoc = URX_VAL(startOp); 2116 int32_t dataLoc = URX_VAL(startOp);
2088 int32_t op = URX_BUILD(URX_LA_END, dataLoc); 2117 appendOp(URX_LA_END, dataLoc);
2089 fRXPat->fCompiledPat->addElement(op, *fStatus);
2090 } 2118 }
2091 break; 2119 break;
2092 2120
2093 case negLookAhead: 2121 case negLookAhead:
2094 { 2122 {
2095 // See comment at doOpenLookAheadNeg 2123 // See comment at doOpenLookAheadNeg
2096 int32_t startOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchO penParen-1); 2124 int32_t startOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchO penParen-1);
2097 U_ASSERT(URX_TYPE(startOp) == URX_LA_START); 2125 U_ASSERT(URX_TYPE(startOp) == URX_LA_START);
2098 int32_t dataLoc = URX_VAL(startOp); 2126 int32_t dataLoc = URX_VAL(startOp);
2099 int32_t op = URX_BUILD(URX_LA_END, dataLoc); 2127 appendOp(URX_LA_END, dataLoc);
2100 fRXPat->fCompiledPat->addElement(op, *fStatus); 2128 appendOp(URX_BACKTRACK, 0);
2101 op = URX_BUILD(URX_BACKTRACK, 0); 2129 appendOp(URX_LA_END, dataLoc);
2102 fRXPat->fCompiledPat->addElement(op, *fStatus);
2103 op = URX_BUILD(URX_LA_END, dataLoc);
2104 fRXPat->fCompiledPat->addElement(op, *fStatus);
2105 2130
2106 // Patch the URX_SAVE near the top of the block. 2131 // Patch the URX_SAVE near the top of the block.
2107 // The destination of the SAVE is the final LA_END that was just add ed. 2132 // The destination of the SAVE is the final LA_END that was just add ed.
2108 int32_t saveOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchO penParen); 2133 int32_t saveOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchO penParen);
2109 U_ASSERT(URX_TYPE(saveOp) == URX_STATE_SAVE); 2134 U_ASSERT(URX_TYPE(saveOp) == URX_STATE_SAVE);
2110 int32_t dest = fRXPat->fCompiledPat->size()-1; 2135 int32_t dest = fRXPat->fCompiledPat->size()-1;
2111 saveOp = URX_BUILD(URX_STATE_SAVE, dest); 2136 saveOp = buildOp(URX_STATE_SAVE, dest);
2112 fRXPat->fCompiledPat->setElementAt(saveOp, fMatchOpenParen); 2137 fRXPat->fCompiledPat->setElementAt(saveOp, fMatchOpenParen);
2113 } 2138 }
2114 break; 2139 break;
2115 2140
2116 case lookBehind: 2141 case lookBehind:
2117 { 2142 {
2118 // See comment at doOpenLookBehind. 2143 // See comment at doOpenLookBehind.
2119 2144
2120 // Append the URX_LB_END and URX_LA_END to the compiled pattern. 2145 // Append the URX_LB_END and URX_LA_END to the compiled pattern.
2121 int32_t startOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchO penParen-4); 2146 int32_t startOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchO penParen-4);
2122 U_ASSERT(URX_TYPE(startOp) == URX_LB_START); 2147 U_ASSERT(URX_TYPE(startOp) == URX_LB_START);
2123 int32_t dataLoc = URX_VAL(startOp); 2148 int32_t dataLoc = URX_VAL(startOp);
2124 int32_t op = URX_BUILD(URX_LB_END, dataLoc); 2149 appendOp(URX_LB_END, dataLoc);
2125 fRXPat->fCompiledPat->addElement(op, *fStatus); 2150 appendOp(URX_LA_END, dataLoc);
2126 op = URX_BUILD(URX_LA_END, dataLoc);
2127 fRXPat->fCompiledPat->addElement(op, *fStatus);
2128 2151
2129 // Determine the min and max bounds for the length of the 2152 // Determine the min and max bounds for the length of the
2130 // string that the pattern can match. 2153 // string that the pattern can match.
2131 // An unbounded upper limit is an error. 2154 // An unbounded upper limit is an error.
2132 int32_t patEnd = fRXPat->fCompiledPat->size() - 1; 2155 int32_t patEnd = fRXPat->fCompiledPat->size() - 1;
2133 int32_t minML = minMatchLength(fMatchOpenParen, patEnd); 2156 int32_t minML = minMatchLength(fMatchOpenParen, patEnd);
2134 int32_t maxML = maxMatchLength(fMatchOpenParen, patEnd); 2157 int32_t maxML = maxMatchLength(fMatchOpenParen, patEnd);
2158 if (URX_TYPE(maxML) != 0) {
2159 error(U_REGEX_LOOK_BEHIND_LIMIT);
2160 break;
2161 }
2135 if (maxML == INT32_MAX) { 2162 if (maxML == INT32_MAX) {
2136 error(U_REGEX_LOOK_BEHIND_LIMIT); 2163 error(U_REGEX_LOOK_BEHIND_LIMIT);
2137 break; 2164 break;
2138 } 2165 }
2139 U_ASSERT(minML <= maxML); 2166 U_ASSERT(minML <= maxML);
2140 2167
2141 // Insert the min and max match len bounds into the URX_LB_CONT op t hat 2168 // Insert the min and max match len bounds into the URX_LB_CONT op t hat
2142 // appears at the top of the look-behind block, at location fMatchO penParen+1 2169 // appears at the top of the look-behind block, at location fMatchO penParen+1
2143 fRXPat->fCompiledPat->setElementAt(minML, fMatchOpenParen-2); 2170 fRXPat->fCompiledPat->setElementAt(minML, fMatchOpenParen-2);
2144 fRXPat->fCompiledPat->setElementAt(maxML, fMatchOpenParen-1); 2171 fRXPat->fCompiledPat->setElementAt(maxML, fMatchOpenParen-1);
2145 2172
2146 } 2173 }
2147 break; 2174 break;
2148 2175
2149 2176
2150 2177
2151 case lookBehindN: 2178 case lookBehindN:
2152 { 2179 {
2153 // See comment at doOpenLookBehindNeg. 2180 // See comment at doOpenLookBehindNeg.
2154 2181
2155 // Append the URX_LBN_END to the compiled pattern. 2182 // Append the URX_LBN_END to the compiled pattern.
2156 int32_t startOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchO penParen-5); 2183 int32_t startOp = (int32_t)fRXPat->fCompiledPat->elementAti(fMatchO penParen-5);
2157 U_ASSERT(URX_TYPE(startOp) == URX_LB_START); 2184 U_ASSERT(URX_TYPE(startOp) == URX_LB_START);
2158 int32_t dataLoc = URX_VAL(startOp); 2185 int32_t dataLoc = URX_VAL(startOp);
2159 int32_t op = URX_BUILD(URX_LBN_END, dataLoc); 2186 appendOp(URX_LBN_END, dataLoc);
2160 fRXPat->fCompiledPat->addElement(op, *fStatus);
2161 2187
2162 // Determine the min and max bounds for the length of the 2188 // Determine the min and max bounds for the length of the
2163 // string that the pattern can match. 2189 // string that the pattern can match.
2164 // An unbounded upper limit is an error. 2190 // An unbounded upper limit is an error.
2165 int32_t patEnd = fRXPat->fCompiledPat->size() - 1; 2191 int32_t patEnd = fRXPat->fCompiledPat->size() - 1;
2166 int32_t minML = minMatchLength(fMatchOpenParen, patEnd); 2192 int32_t minML = minMatchLength(fMatchOpenParen, patEnd);
2167 int32_t maxML = maxMatchLength(fMatchOpenParen, patEnd); 2193 int32_t maxML = maxMatchLength(fMatchOpenParen, patEnd);
2194 if (URX_TYPE(maxML) != 0) {
2195 error(U_REGEX_LOOK_BEHIND_LIMIT);
2196 break;
2197 }
2168 if (maxML == INT32_MAX) { 2198 if (maxML == INT32_MAX) {
2169 error(U_REGEX_LOOK_BEHIND_LIMIT); 2199 error(U_REGEX_LOOK_BEHIND_LIMIT);
2170 break; 2200 break;
2171 } 2201 }
2172 U_ASSERT(minML <= maxML); 2202 U_ASSERT(minML <= maxML);
2173 2203
2174 // Insert the min and max match len bounds into the URX_LB_CONT op t hat 2204 // Insert the min and max match len bounds into the URX_LB_CONT op t hat
2175 // appears at the top of the look-behind block, at location fMatchO penParen+1 2205 // appears at the top of the look-behind block, at location fMatchO penParen+1
2176 fRXPat->fCompiledPat->setElementAt(minML, fMatchOpenParen-3); 2206 fRXPat->fCompiledPat->setElementAt(minML, fMatchOpenParen-3);
2177 fRXPat->fCompiledPat->setElementAt(maxML, fMatchOpenParen-2); 2207 fRXPat->fCompiledPat->setElementAt(maxML, fMatchOpenParen-2);
2178 2208
2179 // Insert the pattern location to continue at after a successful mat ch 2209 // Insert the pattern location to continue at after a successful mat ch
2180 // as the last operand of the URX_LBN_CONT 2210 // as the last operand of the URX_LBN_CONT
2181 op = URX_BUILD(URX_RELOC_OPRND, fRXPat->fCompiledPat->size()); 2211 int32_t op = buildOp(URX_RELOC_OPRND, fRXPat->fCompiledPat->size());
2182 fRXPat->fCompiledPat->setElementAt(op, fMatchOpenParen-1); 2212 fRXPat->fCompiledPat->setElementAt(op, fMatchOpenParen-1);
2183 } 2213 }
2184 break; 2214 break;
2185 2215
2186 2216
2187 2217
2188 default: 2218 default:
2189 U_ASSERT(FALSE); 2219 U_ASSERT(FALSE);
2190 } 2220 }
2191 2221
(...skipping 20 matching lines...) Expand all
2212 // There shoudn't be any, but just in case. 2242 // There shoudn't be any, but just in case.
2213 // (Case Closure can add them; if we had a simple case closure avaialble that 2243 // (Case Closure can add them; if we had a simple case closure avaialble that
2214 // ignored strings, that would be better.) 2244 // ignored strings, that would be better.)
2215 theSet->removeAllStrings(); 2245 theSet->removeAllStrings();
2216 int32_t setSize = theSet->size(); 2246 int32_t setSize = theSet->size();
2217 2247
2218 switch (setSize) { 2248 switch (setSize) {
2219 case 0: 2249 case 0:
2220 { 2250 {
2221 // Set of no elements. Always fails to match. 2251 // Set of no elements. Always fails to match.
2222 fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKTRACK, 0), *fStat us); 2252 appendOp(URX_BACKTRACK, 0);
2223 delete theSet; 2253 delete theSet;
2224 } 2254 }
2225 break; 2255 break;
2226 2256
2227 case 1: 2257 case 1:
2228 { 2258 {
2229 // The set contains only a single code point. Put it into 2259 // The set contains only a single code point. Put it into
2230 // the compiled pattern as a single char operation rather 2260 // the compiled pattern as a single char operation rather
2231 // than a set, and discard the set itself. 2261 // than a set, and discard the set itself.
2232 literalChar(theSet->charAt(0)); 2262 literalChar(theSet->charAt(0));
2233 delete theSet; 2263 delete theSet;
2234 } 2264 }
2235 break; 2265 break;
2236 2266
2237 default: 2267 default:
2238 { 2268 {
2239 // The set contains two or more chars. (the normal case) 2269 // The set contains two or more chars. (the normal case)
2240 // Put it into the compiled pattern as a set. 2270 // Put it into the compiled pattern as a set.
2241 int32_t setNumber = fRXPat->fSets->size(); 2271 int32_t setNumber = fRXPat->fSets->size();
2242 fRXPat->fSets->addElement(theSet, *fStatus); 2272 fRXPat->fSets->addElement(theSet, *fStatus);
2243 int32_t setOp = URX_BUILD(URX_SETREF, setNumber); 2273 appendOp(URX_SETREF, setNumber);
2244 fRXPat->fCompiledPat->addElement(setOp, *fStatus);
2245 } 2274 }
2246 } 2275 }
2247 } 2276 }
2248 2277
2249 2278
2250 //------------------------------------------------------------------------------ 2279 //------------------------------------------------------------------------------
2251 // 2280 //
2252 // compileInterval Generate the code for a {min, max} style interval quanti fier. 2281 // compileInterval Generate the code for a {min, max} style interval quanti fier.
2253 // Except for the specific opcodes used, the code is the sa me 2282 // Except for the specific opcodes used, the code is the sa me
2254 // for all three types (greedy, non-greedy, possessive) of 2283 // for all three types (greedy, non-greedy, possessive) of
(...skipping 18 matching lines...) Expand all
2273 int32_t topOfBlock = blockTopLoc(TRUE); 2302 int32_t topOfBlock = blockTopLoc(TRUE);
2274 insertOp(topOfBlock); 2303 insertOp(topOfBlock);
2275 insertOp(topOfBlock); 2304 insertOp(topOfBlock);
2276 insertOp(topOfBlock); 2305 insertOp(topOfBlock);
2277 2306
2278 // The operands for the CTR_INIT opcode include the index in the matcher dat a 2307 // The operands for the CTR_INIT opcode include the index in the matcher dat a
2279 // of the counter. Allocate it now. There are two data items 2308 // of the counter. Allocate it now. There are two data items
2280 // counterLoc --> Loop counter 2309 // counterLoc --> Loop counter
2281 // +1 --> Input index (for breaking non-progressing loops) 2310 // +1 --> Input index (for breaking non-progressing loops)
2282 // (Only present if unbounded upper limit on loop) 2311 // (Only present if unbounded upper limit on loop)
2283 int32_t counterLoc = fRXPat->fFrameSize; 2312 int32_t dataSize = fIntervalUpper < 0 ? 2 : 1;
2284 fRXPat->fFrameSize++; 2313 int32_t counterLoc = allocateStackData(dataSize);
2285 if (fIntervalUpper < 0) {
2286 fRXPat->fFrameSize++;
2287 }
2288 2314
2289 int32_t op = URX_BUILD(InitOp, counterLoc); 2315 int32_t op = buildOp(InitOp, counterLoc);
2290 fRXPat->fCompiledPat->setElementAt(op, topOfBlock); 2316 fRXPat->fCompiledPat->setElementAt(op, topOfBlock);
2291 2317
2292 // The second operand of CTR_INIT is the location following the end of the l oop. 2318 // The second operand of CTR_INIT is the location following the end of the l oop.
2293 // Must put in as a URX_RELOC_OPRND so that the value will be adjusted if the 2319 // Must put in as a URX_RELOC_OPRND so that the value will be adjusted if the
2294 // compilation of something later on causes the code to grow and the targe t 2320 // compilation of something later on causes the code to grow and the targe t
2295 // position to move. 2321 // position to move.
2296 int32_t loopEnd = fRXPat->fCompiledPat->size(); 2322 int32_t loopEnd = fRXPat->fCompiledPat->size();
2297 op = URX_BUILD(URX_RELOC_OPRND, loopEnd); 2323 op = buildOp(URX_RELOC_OPRND, loopEnd);
2298 fRXPat->fCompiledPat->setElementAt(op, topOfBlock+1); 2324 fRXPat->fCompiledPat->setElementAt(op, topOfBlock+1);
2299 2325
2300 // Followed by the min and max counts. 2326 // Followed by the min and max counts.
2301 fRXPat->fCompiledPat->setElementAt(fIntervalLow, topOfBlock+2); 2327 fRXPat->fCompiledPat->setElementAt(fIntervalLow, topOfBlock+2);
2302 fRXPat->fCompiledPat->setElementAt(fIntervalUpper, topOfBlock+3); 2328 fRXPat->fCompiledPat->setElementAt(fIntervalUpper, topOfBlock+3);
2303 2329
2304 // Apend the CTR_LOOP op. The operand is the location of the CTR_INIT op. 2330 // Apend the CTR_LOOP op. The operand is the location of the CTR_INIT op.
2305 // Goes at end of the block being looped over, so just append to the code so far. 2331 // Goes at end of the block being looped over, so just append to the code so far.
2306 op = URX_BUILD(LoopOp, topOfBlock); 2332 appendOp(LoopOp, topOfBlock);
2307 fRXPat->fCompiledPat->addElement(op, *fStatus);
2308 2333
2309 if ((fIntervalLow & 0xff000000) != 0 || 2334 if ((fIntervalLow & 0xff000000) != 0 ||
2310 (fIntervalUpper > 0 && (fIntervalUpper & 0xff000000) != 0)) { 2335 (fIntervalUpper > 0 && (fIntervalUpper & 0xff000000) != 0)) {
2311 error(U_REGEX_NUMBER_TOO_BIG); 2336 error(U_REGEX_NUMBER_TOO_BIG);
2312 } 2337 }
2313 2338
2314 if (fIntervalLow > fIntervalUpper && fIntervalUpper != -1) { 2339 if (fIntervalLow > fIntervalUpper && fIntervalUpper != -1) {
2315 error(U_REGEX_MAX_LT_MIN); 2340 error(U_REGEX_MAX_LT_MIN);
2316 } 2341 }
2317 } 2342 }
2318 2343
2319 2344
2320 2345
2321 UBool RegexCompile::compileInlineInterval() { 2346 UBool RegexCompile::compileInlineInterval() {
2322 if (fIntervalUpper > 10 || fIntervalUpper < fIntervalLow) { 2347 if (fIntervalUpper > 10 || fIntervalUpper < fIntervalLow) {
2323 // Too big to inline. Fail, which will cause looping code to be generat ed. 2348 // Too big to inline. Fail, which will cause looping code to be generat ed.
2324 // (Upper < Lower picks up unbounded upper and errors, both.) 2349 // (Upper < Lower picks up unbounded upper and errors, both.)
2325 return FALSE; 2350 return FALSE;
2326 } 2351 }
2327 2352
2328 int32_t topOfBlock = blockTopLoc(FALSE); 2353 int32_t topOfBlock = blockTopLoc(FALSE);
2329 if (fIntervalUpper == 0) { 2354 if (fIntervalUpper == 0) {
2330 // Pathological case. Attempt no matches, as if the block doesn't exist . 2355 // Pathological case. Attempt no matches, as if the block doesn't exist .
2356 // Discard the generated code for the block.
2357 // If the block included parens, discard the info pertaining to them as well.
2331 fRXPat->fCompiledPat->setSize(topOfBlock); 2358 fRXPat->fCompiledPat->setSize(topOfBlock);
2359 if (fMatchOpenParen >= topOfBlock) {
2360 fMatchOpenParen = -1;
2361 }
2362 if (fMatchCloseParen >= topOfBlock) {
2363 fMatchCloseParen = -1;
2364 }
2332 return TRUE; 2365 return TRUE;
2333 } 2366 }
2334 2367
2335 if (topOfBlock != fRXPat->fCompiledPat->size()-1 && fIntervalUpper != 1) { 2368 if (topOfBlock != fRXPat->fCompiledPat->size()-1 && fIntervalUpper != 1) {
2336 // The thing being repeated is not a single op, but some 2369 // The thing being repeated is not a single op, but some
2337 // more complex block. Do it as a loop, not inlines. 2370 // more complex block. Do it as a loop, not inlines.
2338 // Note that things "repeated" a max of once are handled as inline, be cause 2371 // Note that things "repeated" a max of once are handled as inline, be cause
2339 // the one copy of the code already generated is just fine. 2372 // the one copy of the code already generated is just fine.
2340 return FALSE; 2373 return FALSE;
2341 } 2374 }
2342 2375
2343 // Pick up the opcode that is to be repeated 2376 // Pick up the opcode that is to be repeated
2344 // 2377 //
2345 int32_t op = (int32_t)fRXPat->fCompiledPat->elementAti(topOfBlock); 2378 int32_t op = (int32_t)fRXPat->fCompiledPat->elementAti(topOfBlock);
2346 2379
2347 // Compute the pattern location where the inline sequence 2380 // Compute the pattern location where the inline sequence
2348 // will end, and set up the state save op that will be needed. 2381 // will end, and set up the state save op that will be needed.
2349 // 2382 //
2350 int32_t endOfSequenceLoc = fRXPat->fCompiledPat->size()-1 2383 int32_t endOfSequenceLoc = fRXPat->fCompiledPat->size()-1
2351 + fIntervalUpper + (fIntervalUpper-fIntervalLow) ; 2384 + fIntervalUpper + (fIntervalUpper-fIntervalLow) ;
2352 int32_t saveOp = URX_BUILD(URX_STATE_SAVE, endOfSequenceLoc); 2385 int32_t saveOp = buildOp(URX_STATE_SAVE, endOfSequenceLoc);
2353 if (fIntervalLow == 0) { 2386 if (fIntervalLow == 0) {
2354 insertOp(topOfBlock); 2387 insertOp(topOfBlock);
2355 fRXPat->fCompiledPat->setElementAt(saveOp, topOfBlock); 2388 fRXPat->fCompiledPat->setElementAt(saveOp, topOfBlock);
2356 } 2389 }
2357 2390
2358 2391
2359 2392
2360 // Loop, emitting the op for the thing being repeated each time. 2393 // Loop, emitting the op for the thing being repeated each time.
2361 // Loop starts at 1 because one instance of the op already exists in the pattern, 2394 // Loop starts at 1 because one instance of the op already exists in the pattern,
2362 // it was put there when it was originally encountered. 2395 // it was put there when it was originally encountered.
2363 int32_t i; 2396 int32_t i;
2364 for (i=1; i<fIntervalUpper; i++ ) { 2397 for (i=1; i<fIntervalUpper; i++ ) {
2365 if (i == fIntervalLow) { 2398 if (i >= fIntervalLow) {
2366 fRXPat->fCompiledPat->addElement(saveOp, *fStatus); 2399 appendOp(saveOp);
2367 } 2400 }
2368 if (i > fIntervalLow) { 2401 appendOp(op);
2369 fRXPat->fCompiledPat->addElement(saveOp, *fStatus);
2370 }
2371 fRXPat->fCompiledPat->addElement(op, *fStatus);
2372 } 2402 }
2373 return TRUE; 2403 return TRUE;
2374 } 2404 }
2375 2405
2376 2406
2377 2407
2378 //------------------------------------------------------------------------------ 2408 //------------------------------------------------------------------------------
2379 // 2409 //
2380 // caseInsensitiveStart given a single code point from a pattern string, dete rmine the 2410 // caseInsensitiveStart given a single code point from a pattern string, dete rmine the
2381 // set of characters that could potentially begin a case -insensitive 2411 // set of characters that could potentially begin a case -insensitive
(...skipping 1198 matching lines...) Expand 10 before | Expand all | Expand 10 after
3580 case URX_CTR_LOOP_NG: 3610 case URX_CTR_LOOP_NG:
3581 case URX_RELOC_OPRND: 3611 case URX_RELOC_OPRND:
3582 case URX_JMPX: 3612 case URX_JMPX:
3583 case URX_JMP_SAV: 3613 case URX_JMP_SAV:
3584 case URX_JMP_SAV_X: 3614 case URX_JMP_SAV_X:
3585 // These are instructions with operands that refer to code locations . 3615 // These are instructions with operands that refer to code locations .
3586 { 3616 {
3587 int32_t operandAddress = URX_VAL(op); 3617 int32_t operandAddress = URX_VAL(op);
3588 U_ASSERT(operandAddress>=0 && operandAddress<deltas.size()); 3618 U_ASSERT(operandAddress>=0 && operandAddress<deltas.size());
3589 int32_t fixedOperandAddress = operandAddress - deltas.elementAti (operandAddress); 3619 int32_t fixedOperandAddress = operandAddress - deltas.elementAti (operandAddress);
3590 op = URX_BUILD(opType, fixedOperandAddress); 3620 op = buildOp(opType, fixedOperandAddress);
3591 fRXPat->fCompiledPat->setElementAt(op, dst); 3621 fRXPat->fCompiledPat->setElementAt(op, dst);
3592 dst++; 3622 dst++;
3593 break; 3623 break;
3594 } 3624 }
3595 3625
3596 case URX_BACKREF: 3626 case URX_BACKREF:
3597 case URX_BACKREF_I: 3627 case URX_BACKREF_I:
3598 { 3628 {
3599 int32_t where = URX_VAL(op); 3629 int32_t where = URX_VAL(op);
3600 if (where > fRXPat->fGroupMap->size()) { 3630 if (where > fRXPat->fGroupMap->size()) {
3601 error(U_REGEX_INVALID_BACK_REF); 3631 error(U_REGEX_INVALID_BACK_REF);
3602 break; 3632 break;
3603 } 3633 }
3604 where = fRXPat->fGroupMap->elementAti(where-1); 3634 where = fRXPat->fGroupMap->elementAti(where-1);
3605 op = URX_BUILD(opType, where); 3635 op = buildOp(opType, where);
3606 fRXPat->fCompiledPat->setElementAt(op, dst); 3636 fRXPat->fCompiledPat->setElementAt(op, dst);
3607 dst++; 3637 dst++;
3608 3638
3609 fRXPat->fNeedsAltInput = TRUE; 3639 fRXPat->fNeedsAltInput = TRUE;
3610 break; 3640 break;
3611 } 3641 }
3612 case URX_RESERVED_OP: 3642 case URX_RESERVED_OP:
3613 case URX_RESERVED_OP_N: 3643 case URX_RESERVED_OP_N:
3614 case URX_BACKTRACK: 3644 case URX_BACKTRACK:
3615 case URX_END: 3645 case URX_END:
(...skipping 331 matching lines...) Expand 10 before | Expand all | Expand 10 after
3947 fEOLComments = TRUE; 3977 fEOLComments = TRUE;
3948 3978
3949 // putc(c.fChar, stdout); 3979 // putc(c.fChar, stdout);
3950 } 3980 }
3951 3981
3952 3982
3953 3983
3954 //------------------------------------------------------------------------------ 3984 //------------------------------------------------------------------------------
3955 // 3985 //
3956 // scanNamedChar 3986 // scanNamedChar
3957 // Get a UChar32 from a \N{UNICODE CHARACTER NAME} in the pattern. 3987 // Get a UChar32 from a \N{UNICODE CHARACTER NAME} in the pattern.
3958 // 3988 //
3959 // The scan position will be at the 'N'. On return 3989 // The scan position will be at the 'N'. On return
3960 // the scan position should be just after the '}' 3990 // the scan position should be just after the '}'
3961 // 3991 //
3962 // Return the UChar32 3992 // Return the UChar32
3963 // 3993 //
3964 //------------------------------------------------------------------------------ 3994 //------------------------------------------------------------------------------
3965 UChar32 RegexCompile::scanNamedChar() { 3995 UChar32 RegexCompile::scanNamedChar() {
3966 if (U_FAILURE(*fStatus)) { 3996 if (U_FAILURE(*fStatus)) {
3967 return 0; 3997 return 0;
(...skipping 451 matching lines...) Expand 10 before | Expand all | Expand 10 after
4419 4449
4420 void RegexCompile::setPushOp(int32_t op) { 4450 void RegexCompile::setPushOp(int32_t op) {
4421 setEval(op); 4451 setEval(op);
4422 fSetOpStack.push(op, *fStatus); 4452 fSetOpStack.push(op, *fStatus);
4423 fSetStack.push(new UnicodeSet(), *fStatus); 4453 fSetStack.push(new UnicodeSet(), *fStatus);
4424 } 4454 }
4425 4455
4426 U_NAMESPACE_END 4456 U_NAMESPACE_END
4427 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS 4457 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
4428 4458
OLDNEW
« no previous file with comments | « source/i18n/regexcmp.h ('k') | source/i18n/regeximp.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698