OLD | NEW |
1 | 1 |
2 // | 2 // |
3 // file: rbbiscan.cpp | 3 // file: rbbiscan.cpp |
4 // | 4 // |
5 // Copyright (C) 2002-2012, International Business Machines Corporation and oth
ers. | 5 // Copyright (C) 2002-2014, International Business Machines Corporation and oth
ers. |
6 // All Rights Reserved. | 6 // All Rights Reserved. |
7 // | 7 // |
8 // This file contains the Rule Based Break Iterator Rule Builder functions for | 8 // This file contains the Rule Based Break Iterator Rule Builder functions for |
9 // scanning the rules and assembling a parse tree. This is the first phase | 9 // scanning the rules and assembling a parse tree. This is the first phase |
10 // of compiling the rules. | 10 // of compiling the rules. |
11 // | 11 // |
12 // The overall of the rules is managed by class RBBIRuleBuilder, which will | 12 // The overall of the rules is managed by class RBBIRuleBuilder, which will |
13 // create and use an instance of this class as part of the process. | 13 // create and use an instance of this class as part of the process. |
14 // | 14 // |
15 | 15 |
(...skipping 12 matching lines...) Expand all Loading... |
28 | 28 |
29 #include "rbbirpt.h" // Contains state table for the rbbi rules parser. | 29 #include "rbbirpt.h" // Contains state table for the rbbi rules parser. |
30 // generated by a Perl script. | 30 // generated by a Perl script. |
31 #include "rbbirb.h" | 31 #include "rbbirb.h" |
32 #include "rbbinode.h" | 32 #include "rbbinode.h" |
33 #include "rbbiscan.h" | 33 #include "rbbiscan.h" |
34 #include "rbbitblb.h" | 34 #include "rbbitblb.h" |
35 | 35 |
36 #include "uassert.h" | 36 #include "uassert.h" |
37 | 37 |
38 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | |
39 | |
40 //------------------------------------------------------------------------------ | 38 //------------------------------------------------------------------------------ |
41 // | 39 // |
42 // Unicode Set init strings for each of the character classes needed for parsing
a rule file. | 40 // Unicode Set init strings for each of the character classes needed for parsing
a rule file. |
43 // (Initialized with hex values for portability to EBCDIC based ma
chines. | 41 // (Initialized with hex values for portability to EBCDIC based ma
chines. |
44 // Really ugly, but there's no good way to avoid it.) | 42 // Really ugly, but there's no good way to avoid it.) |
45 // | 43 // |
46 // The sets are referred to by name in the rbbirpt.txt, which is th
e | 44 // The sets are referred to by name in the rbbirpt.txt, which is th
e |
47 // source form of the state transition table for the RBBI rule pars
er. | 45 // source form of the state transition table for the RBBI rule pars
er. |
48 // | 46 // |
49 //------------------------------------------------------------------------------ | 47 //------------------------------------------------------------------------------ |
(...skipping 524 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
574 // Only report it if no previous error has been recorded. | 572 // Only report it if no previous error has been recorded. |
575 // | 573 // |
576 //------------------------------------------------------------------------------ | 574 //------------------------------------------------------------------------------ |
577 void RBBIRuleScanner::error(UErrorCode e) { | 575 void RBBIRuleScanner::error(UErrorCode e) { |
578 if (U_SUCCESS(*fRB->fStatus)) { | 576 if (U_SUCCESS(*fRB->fStatus)) { |
579 *fRB->fStatus = e; | 577 *fRB->fStatus = e; |
580 if (fRB->fParseError) { | 578 if (fRB->fParseError) { |
581 fRB->fParseError->line = fLineNum; | 579 fRB->fParseError->line = fLineNum; |
582 fRB->fParseError->offset = fCharNum; | 580 fRB->fParseError->offset = fCharNum; |
583 fRB->fParseError->preContext[0] = 0; | 581 fRB->fParseError->preContext[0] = 0; |
584 fRB->fParseError->preContext[0] = 0; | 582 fRB->fParseError->postContext[0] = 0; |
585 } | 583 } |
586 } | 584 } |
587 } | 585 } |
588 | 586 |
589 | 587 |
590 | 588 |
591 | 589 |
592 //------------------------------------------------------------------------------ | 590 //------------------------------------------------------------------------------ |
593 // | 591 // |
594 // fixOpStack The parse stack holds partially assembled chunks of the parse t
ree. | 592 // fixOpStack The parse stack holds partially assembled chunks of the parse t
ree. |
(...skipping 391 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
986 break; | 984 break; |
987 } | 985 } |
988 if (tableEl->fCharClass == 252 && fC.fChar == (UChar32)-1) { | 986 if (tableEl->fCharClass == 252 && fC.fChar == (UChar32)-1) { |
989 // Table row specified eof and we hit eof on the input. | 987 // Table row specified eof and we hit eof on the input. |
990 break; | 988 break; |
991 } | 989 } |
992 | 990 |
993 if (tableEl->fCharClass >= 128 && tableEl->fCharClass < 240 && //
Table specs a char class && | 991 if (tableEl->fCharClass >= 128 && tableEl->fCharClass < 240 && //
Table specs a char class && |
994 fC.fEscaped == FALSE && //
char is not escaped && | 992 fC.fEscaped == FALSE && //
char is not escaped && |
995 fC.fChar != (UChar32)-1) { //
char is not EOF | 993 fC.fChar != (UChar32)-1) { //
char is not EOF |
996 U_ASSERT((tableEl->fCharClass-128) < LENGTHOF(fRuleSets)); | 994 U_ASSERT((tableEl->fCharClass-128) < UPRV_LENGTHOF(fRuleSets)); |
997 if (fRuleSets[tableEl->fCharClass-128].contains(fC.fChar)) { | 995 if (fRuleSets[tableEl->fCharClass-128].contains(fC.fChar)) { |
998 // Table row specified a character class, or set of characte
rs, | 996 // Table row specified a character class, or set of characte
rs, |
999 // and the current char matches it. | 997 // and the current char matches it. |
1000 break; | 998 break; |
1001 } | 999 } |
1002 } | 1000 } |
1003 | 1001 |
1004 // No match on this row, advance to the next row for this state, | 1002 // No match on this row, advance to the next row for this state, |
1005 tableEl++; | 1003 tableEl++; |
1006 } | 1004 } |
(...skipping 197 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1204 // - Eliminates mulitiple instances of the same set. | 1202 // - Eliminates mulitiple instances of the same set. |
1205 // - Creates a new uset node if necessary (if this isn't a duplicate
.) | 1203 // - Creates a new uset node if necessary (if this isn't a duplicate
.) |
1206 findSetFor(n->fText, n, uset); | 1204 findSetFor(n->fText, n, uset); |
1207 } | 1205 } |
1208 | 1206 |
1209 } | 1207 } |
1210 | 1208 |
1211 U_NAMESPACE_END | 1209 U_NAMESPACE_END |
1212 | 1210 |
1213 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ | 1211 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
OLD | NEW |