Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(29)

Side by Side Diff: source/i18n/repattrn.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/i18n/rematch.cpp ('k') | source/i18n/rulebasedcollator.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // 1 //
2 // file: repattrn.cpp 2 // file: repattrn.cpp
3 // 3 //
4 /* 4 /*
5 *************************************************************************** 5 ***************************************************************************
6 * Copyright (C) 2002-2012 International Business Machines Corporation * 6 * Copyright (C) 2002-2013 International Business Machines Corporation *
7 * and others. All rights reserved. * 7 * and others. All rights reserved. *
8 *************************************************************************** 8 ***************************************************************************
9 */ 9 */
10 10
11 #include "unicode/utypes.h" 11 #include "unicode/utypes.h"
12 12
13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS 13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
14 14
15 #include "unicode/regex.h" 15 #include "unicode/regex.h"
16 #include "unicode/uclean.h" 16 #include "unicode/uclean.h"
(...skipping 251 matching lines...) Expand 10 before | Expand all | Expand 10 after
268 //--------------------------------------------------------------------- 268 //---------------------------------------------------------------------
269 RegexPattern * U_EXPORT2 269 RegexPattern * U_EXPORT2
270 RegexPattern::compile(const UnicodeString &regex, 270 RegexPattern::compile(const UnicodeString &regex,
271 uint32_t flags, 271 uint32_t flags,
272 UParseError &pe, 272 UParseError &pe,
273 UErrorCode &status) 273 UErrorCode &status)
274 { 274 {
275 if (U_FAILURE(status)) { 275 if (U_FAILURE(status)) {
276 return NULL; 276 return NULL;
277 } 277 }
278 278
279 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX _COMMENTS | 279 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX _COMMENTS |
280 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | 280 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
281 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITER AL; 281 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITER AL;
282 282
283 if ((flags & ~allFlags) != 0) { 283 if ((flags & ~allFlags) != 0) {
284 status = U_REGEX_INVALID_FLAG; 284 status = U_REGEX_INVALID_FLAG;
285 return NULL; 285 return NULL;
286 } 286 }
287 287
288 if ((flags & UREGEX_CANON_EQ) != 0) { 288 if ((flags & UREGEX_CANON_EQ) != 0) {
289 status = U_REGEX_UNIMPLEMENTED; 289 status = U_REGEX_UNIMPLEMENTED;
290 return NULL; 290 return NULL;
291 } 291 }
292 292
293 RegexPattern *This = new RegexPattern; 293 RegexPattern *This = new RegexPattern;
294 if (This == NULL) { 294 if (This == NULL) {
295 status = U_MEMORY_ALLOCATION_ERROR; 295 status = U_MEMORY_ALLOCATION_ERROR;
296 return NULL; 296 return NULL;
297 } 297 }
298 if (U_FAILURE(This->fDeferredStatus)) { 298 if (U_FAILURE(This->fDeferredStatus)) {
299 status = This->fDeferredStatus; 299 status = This->fDeferredStatus;
300 delete This; 300 delete This;
301 return NULL; 301 return NULL;
302 } 302 }
303 This->fFlags = flags; 303 This->fFlags = flags;
304 304
305 RegexCompile compiler(This, status); 305 RegexCompile compiler(This, status);
306 compiler.compile(regex, pe, status); 306 compiler.compile(regex, pe, status);
307 307
308 if (U_FAILURE(status)) { 308 if (U_FAILURE(status)) {
309 delete This; 309 delete This;
310 This = NULL; 310 This = NULL;
311 } 311 }
312 312
313 return This; 313 return This;
314 } 314 }
315 315
316 316
317 // 317 //
318 // compile, UText mode 318 // compile, UText mode
319 // 319 //
320 RegexPattern * U_EXPORT2 320 RegexPattern * U_EXPORT2
321 RegexPattern::compile(UText *regex, 321 RegexPattern::compile(UText *regex,
322 uint32_t flags, 322 uint32_t flags,
(...skipping 25 matching lines...) Expand all
348 } 348 }
349 if (U_FAILURE(This->fDeferredStatus)) { 349 if (U_FAILURE(This->fDeferredStatus)) {
350 status = This->fDeferredStatus; 350 status = This->fDeferredStatus;
351 delete This; 351 delete This;
352 return NULL; 352 return NULL;
353 } 353 }
354 This->fFlags = flags; 354 This->fFlags = flags;
355 355
356 RegexCompile compiler(This, status); 356 RegexCompile compiler(This, status);
357 compiler.compile(regex, pe, status); 357 compiler.compile(regex, pe, status);
358 358
359 if (U_FAILURE(status)) { 359 if (U_FAILURE(status)) {
360 delete This; 360 delete This;
361 This = NULL; 361 This = NULL;
362 } 362 }
363 363
364 return This; 364 return This;
365 } 365 }
366 366
367 // 367 //
368 // compile with default flags. 368 // compile with default flags.
(...skipping 162 matching lines...) Expand 10 before | Expand all | Expand 10 after
531 UnicodeString RegexPattern::pattern() const { 531 UnicodeString RegexPattern::pattern() const {
532 if (fPatternString != NULL) { 532 if (fPatternString != NULL) {
533 return *fPatternString; 533 return *fPatternString;
534 } else if (fPattern == NULL) { 534 } else if (fPattern == NULL) {
535 return UnicodeString(); 535 return UnicodeString();
536 } else { 536 } else {
537 UErrorCode status = U_ZERO_ERROR; 537 UErrorCode status = U_ZERO_ERROR;
538 int64_t nativeLen = utext_nativeLength(fPattern); 538 int64_t nativeLen = utext_nativeLength(fPattern);
539 int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error 539 int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
540 UnicodeString result; 540 UnicodeString result;
541 541
542 status = U_ZERO_ERROR; 542 status = U_ZERO_ERROR;
543 UChar *resultChars = result.getBuffer(len16); 543 UChar *resultChars = result.getBuffer(len16);
544 utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // u nterminated warning 544 utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // u nterminated warning
545 result.releaseBuffer(len16); 545 result.releaseBuffer(len16);
546 546
547 return result; 547 return result;
548 } 548 }
549 } 549 }
550 550
551 551
552 552
553 553
554 //--------------------------------------------------------------------- 554 //---------------------------------------------------------------------
555 // 555 //
556 // patternText 556 // patternText
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
615 } 615 }
616 616
617 617
618 618
619 //--------------------------------------------------------------------- 619 //---------------------------------------------------------------------
620 // 620 //
621 // dump Output the compiled form of the pattern. 621 // dump Output the compiled form of the pattern.
622 // Debugging function only. 622 // Debugging function only.
623 // 623 //
624 //--------------------------------------------------------------------- 624 //---------------------------------------------------------------------
625 void RegexPattern::dumpOp(int32_t index) const {
626 (void)index; // Suppress warnings in non-debug build.
625 #if defined(REGEX_DEBUG) 627 #if defined(REGEX_DEBUG)
626 void RegexPattern::dumpOp(int32_t index) const {
627 static const char * const opNames[] = {URX_OPCODE_NAMES}; 628 static const char * const opNames[] = {URX_OPCODE_NAMES};
628 int32_t op = fCompiledPat->elementAti(index); 629 int32_t op = fCompiledPat->elementAti(index);
629 int32_t val = URX_VAL(op); 630 int32_t val = URX_VAL(op);
630 int32_t type = URX_TYPE(op); 631 int32_t type = URX_TYPE(op);
631 int32_t pinnedType = type; 632 int32_t pinnedType = type;
632 if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) { 633 if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) {
633 pinnedType = 0; 634 pinnedType = 0;
634 } 635 }
635 636
636 REGEX_DUMP_DEBUG_PRINTF(("%4d %08x %-15s ", index, op, opNames[pinnedT ype])); 637 printf("%4d %08x %-15s ", index, op, opNames[pinnedType]);
637 switch (type) { 638 switch (type) {
638 case URX_NOP: 639 case URX_NOP:
639 case URX_DOTANY: 640 case URX_DOTANY:
640 case URX_DOTANY_ALL: 641 case URX_DOTANY_ALL:
641 case URX_FAIL: 642 case URX_FAIL:
642 case URX_CARET: 643 case URX_CARET:
643 case URX_DOLLAR: 644 case URX_DOLLAR:
644 case URX_BACKSLASH_G: 645 case URX_BACKSLASH_G:
645 case URX_BACKSLASH_X: 646 case URX_BACKSLASH_X:
646 case URX_END: 647 case URX_END:
(...skipping 28 matching lines...) Expand all
675 case URX_LA_END: 676 case URX_LA_END:
676 case URX_BACKREF_I: 677 case URX_BACKREF_I:
677 case URX_LB_START: 678 case URX_LB_START:
678 case URX_LB_CONT: 679 case URX_LB_CONT:
679 case URX_LB_END: 680 case URX_LB_END:
680 case URX_LBN_CONT: 681 case URX_LBN_CONT:
681 case URX_LBN_END: 682 case URX_LBN_END:
682 case URX_LOOP_C: 683 case URX_LOOP_C:
683 case URX_LOOP_DOT_I: 684 case URX_LOOP_DOT_I:
684 // types with an integer operand field. 685 // types with an integer operand field.
685 REGEX_DUMP_DEBUG_PRINTF(("%d", val)); 686 printf("%d", val);
686 break; 687 break;
687 688
688 case URX_ONECHAR: 689 case URX_ONECHAR:
689 case URX_ONECHAR_I: 690 case URX_ONECHAR_I:
690 REGEX_DUMP_DEBUG_PRINTF(("%c", val<256?val:'?')); 691 printf("%c", val<256?val:'?');
691 break; 692 break;
692 693
693 case URX_STRING: 694 case URX_STRING:
694 case URX_STRING_I: 695 case URX_STRING_I:
695 { 696 {
696 int32_t lengthOp = fCompiledPat->elementAti(index+1); 697 int32_t lengthOp = fCompiledPat->elementAti(index+1);
697 U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN); 698 U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
698 int32_t length = URX_VAL(lengthOp); 699 int32_t length = URX_VAL(lengthOp);
699 int32_t i; 700 int32_t i;
700 for (i=val; i<val+length; i++) { 701 for (i=val; i<val+length; i++) {
701 UChar c = fLiteralText[i]; 702 UChar c = fLiteralText[i];
702 if (c < 32 || c >= 256) {c = '.';} 703 if (c < 32 || c >= 256) {c = '.';}
703 REGEX_DUMP_DEBUG_PRINTF(("%c", c)); 704 printf("%c", c);
704 } 705 }
705 } 706 }
706 break; 707 break;
707 708
708 case URX_SETREF: 709 case URX_SETREF:
709 case URX_LOOP_SR_I: 710 case URX_LOOP_SR_I:
710 { 711 {
711 UnicodeString s; 712 UnicodeString s;
712 UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val); 713 UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
713 set->toPattern(s, TRUE); 714 set->toPattern(s, TRUE);
714 for (int32_t i=0; i<s.length(); i++) { 715 for (int32_t i=0; i<s.length(); i++) {
715 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i))); 716 printf("%c", s.charAt(i));
716 } 717 }
717 } 718 }
718 break; 719 break;
719 720
720 case URX_STATIC_SETREF: 721 case URX_STATIC_SETREF:
721 case URX_STAT_SETREF_N: 722 case URX_STAT_SETREF_N:
722 { 723 {
723 UnicodeString s; 724 UnicodeString s;
724 if (val & URX_NEG_SET) { 725 if (val & URX_NEG_SET) {
725 REGEX_DUMP_DEBUG_PRINTF(("NOT ")); 726 printf("NOT ");
726 val &= ~URX_NEG_SET; 727 val &= ~URX_NEG_SET;
727 } 728 }
728 UnicodeSet *set = fStaticSets[val]; 729 UnicodeSet *set = fStaticSets[val];
729 set->toPattern(s, TRUE); 730 set->toPattern(s, TRUE);
730 for (int32_t i=0; i<s.length(); i++) { 731 for (int32_t i=0; i<s.length(); i++) {
731 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i))); 732 printf("%c", s.charAt(i));
732 } 733 }
733 } 734 }
734 break; 735 break;
735 736
736 737
737 default: 738 default:
738 REGEX_DUMP_DEBUG_PRINTF(("??????")); 739 printf("??????");
739 break; 740 break;
740 } 741 }
741 REGEX_DUMP_DEBUG_PRINTF(("\n")); 742 printf("\n");
743 #endif
742 } 744 }
743 #endif
744 745
745 746
747 void RegexPattern::dumpPattern() const {
746 #if defined(REGEX_DEBUG) 748 #if defined(REGEX_DEBUG)
747 U_CAPI void U_EXPORT2
748 RegexPatternDump(const RegexPattern *This) {
749 int index; 749 int index;
750 int i; 750 int i;
751 751
752 REGEX_DUMP_DEBUG_PRINTF(("Original Pattern: ")); 752 printf("Original Pattern: ");
753 UChar32 c = utext_next32From(This->fPattern, 0); 753 UChar32 c = utext_next32From(fPattern, 0);
754 while (c != U_SENTINEL) { 754 while (c != U_SENTINEL) {
755 if (c<32 || c>256) { 755 if (c<32 || c>256) {
756 c = '.'; 756 c = '.';
757 } 757 }
758 REGEX_DUMP_DEBUG_PRINTF(("%c", c)); 758 printf("%c", c);
759 759
760 c = UTEXT_NEXT32(This->fPattern); 760 c = UTEXT_NEXT32(fPattern);
761 } 761 }
762 REGEX_DUMP_DEBUG_PRINTF(("\n")); 762 printf("\n");
763 REGEX_DUMP_DEBUG_PRINTF((" Min Match Length: %d\n", This->fMinMatchLen)); 763 printf(" Min Match Length: %d\n", fMinMatchLen);
764 REGEX_DUMP_DEBUG_PRINTF((" Match Start Type: %s\n", START_OF_MATCH_STR(Th is->fStartType))); 764 printf(" Match Start Type: %s\n", START_OF_MATCH_STR(fStartType));
765 if (This->fStartType == START_STRING) { 765 if (fStartType == START_STRING) {
766 REGEX_DUMP_DEBUG_PRINTF((" Initial match string: \"")); 766 printf(" Initial match string: \"");
767 for (i=This->fInitialStringIdx; i<This->fInitialStringIdx+This->fInitial StringLen; i++) { 767 for (i=fInitialStringIdx; i<fInitialStringIdx+fInitialStringLen; i++) {
768 REGEX_DUMP_DEBUG_PRINTF(("%c", This->fLiteralText[i])); // TODO: non-printables, surrogates. 768 printf("%c", fLiteralText[i]); // TODO: non-printables, surrogate s.
769 } 769 }
770 REGEX_DUMP_DEBUG_PRINTF(("\"\n")); 770 printf("\"\n");
771 771
772 } else if (This->fStartType == START_SET) { 772 } else if (fStartType == START_SET) {
773 int32_t numSetChars = This->fInitialChars->size(); 773 int32_t numSetChars = fInitialChars->size();
774 if (numSetChars > 20) { 774 if (numSetChars > 20) {
775 numSetChars = 20; 775 numSetChars = 20;
776 } 776 }
777 REGEX_DUMP_DEBUG_PRINTF((" Match First Chars : ")); 777 printf(" Match First Chars : ");
778 for (i=0; i<numSetChars; i++) { 778 for (i=0; i<numSetChars; i++) {
779 UChar32 c = This->fInitialChars->charAt(i); 779 UChar32 c = fInitialChars->charAt(i);
780 if (0x20<c && c <0x7e) { 780 if (0x20<c && c <0x7e) {
781 REGEX_DUMP_DEBUG_PRINTF(("%c ", c)); 781 printf("%c ", c);
782 } else { 782 } else {
783 REGEX_DUMP_DEBUG_PRINTF(("%#x ", c)); 783 printf("%#x ", c);
784 } 784 }
785 } 785 }
786 if (numSetChars < This->fInitialChars->size()) { 786 if (numSetChars < fInitialChars->size()) {
787 REGEX_DUMP_DEBUG_PRINTF((" ...")); 787 printf(" ...");
788 } 788 }
789 REGEX_DUMP_DEBUG_PRINTF(("\n")); 789 printf("\n");
790 790
791 } else if (This->fStartType == START_CHAR) { 791 } else if (fStartType == START_CHAR) {
792 REGEX_DUMP_DEBUG_PRINTF((" First char of Match : ")); 792 printf(" First char of Match : ");
793 if (0x20 < This->fInitialChar && This->fInitialChar<0x7e) { 793 if (0x20 < fInitialChar && fInitialChar<0x7e) {
794 REGEX_DUMP_DEBUG_PRINTF(("%c\n", This->fInitialChar)); 794 printf("%c\n", fInitialChar);
795 } else { 795 } else {
796 REGEX_DUMP_DEBUG_PRINTF(("%#x\n", This->fInitialChar)); 796 printf("%#x\n", fInitialChar);
797 } 797 }
798 } 798 }
799 799
800 REGEX_DUMP_DEBUG_PRINTF(("\nIndex Binary Type Operand\n" \ 800 printf("\nIndex Binary Type Operand\n" \
801 "-------------------------------------------\n")); 801 "-------------------------------------------\n");
802 for (index = 0; index<This->fCompiledPat->size(); index++) { 802 for (index = 0; index<fCompiledPat->size(); index++) {
803 This->dumpOp(index); 803 dumpOp(index);
804 } 804 }
805 REGEX_DUMP_DEBUG_PRINTF(("\n\n")); 805 printf("\n\n");
806 #endif
806 } 807 }
807 #endif
808 808
809 809
810 810
811 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern) 811 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
812 812
813 U_NAMESPACE_END 813 U_NAMESPACE_END
814 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS 814 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
OLDNEW
« no previous file with comments | « source/i18n/rematch.cpp ('k') | source/i18n/rulebasedcollator.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698