| OLD | NEW |
| 1 | 1 |
| 2 // | 2 // |
| 3 // file: rbbiscan.cpp | 3 // file: rbbiscan.cpp |
| 4 // | 4 // |
| 5 // Copyright (C) 2002-2012, International Business Machines Corporation and oth
ers. | 5 // Copyright (C) 2002-2014, International Business Machines Corporation and oth
ers. |
| 6 // All Rights Reserved. | 6 // All Rights Reserved. |
| 7 // | 7 // |
| 8 // This file contains the Rule Based Break Iterator Rule Builder functions for | 8 // This file contains the Rule Based Break Iterator Rule Builder functions for |
| 9 // scanning the rules and assembling a parse tree. This is the first phase | 9 // scanning the rules and assembling a parse tree. This is the first phase |
| 10 // of compiling the rules. | 10 // of compiling the rules. |
| 11 // | 11 // |
| 12 // The overall of the rules is managed by class RBBIRuleBuilder, which will | 12 // The overall of the rules is managed by class RBBIRuleBuilder, which will |
| 13 // create and use an instance of this class as part of the process. | 13 // create and use an instance of this class as part of the process. |
| 14 // | 14 // |
| 15 | 15 |
| (...skipping 12 matching lines...) Expand all Loading... |
| 28 | 28 |
| 29 #include "rbbirpt.h" // Contains state table for the rbbi rules parser. | 29 #include "rbbirpt.h" // Contains state table for the rbbi rules parser. |
| 30 // generated by a Perl script. | 30 // generated by a Perl script. |
| 31 #include "rbbirb.h" | 31 #include "rbbirb.h" |
| 32 #include "rbbinode.h" | 32 #include "rbbinode.h" |
| 33 #include "rbbiscan.h" | 33 #include "rbbiscan.h" |
| 34 #include "rbbitblb.h" | 34 #include "rbbitblb.h" |
| 35 | 35 |
| 36 #include "uassert.h" | 36 #include "uassert.h" |
| 37 | 37 |
| 38 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | |
| 39 | |
| 40 //------------------------------------------------------------------------------ | 38 //------------------------------------------------------------------------------ |
| 41 // | 39 // |
| 42 // Unicode Set init strings for each of the character classes needed for parsing
a rule file. | 40 // Unicode Set init strings for each of the character classes needed for parsing
a rule file. |
| 43 // (Initialized with hex values for portability to EBCDIC based ma
chines. | 41 // (Initialized with hex values for portability to EBCDIC based ma
chines. |
| 44 // Really ugly, but there's no good way to avoid it.) | 42 // Really ugly, but there's no good way to avoid it.) |
| 45 // | 43 // |
| 46 // The sets are referred to by name in the rbbirpt.txt, which is th
e | 44 // The sets are referred to by name in the rbbirpt.txt, which is th
e |
| 47 // source form of the state transition table for the RBBI rule pars
er. | 45 // source form of the state transition table for the RBBI rule pars
er. |
| 48 // | 46 // |
| 49 //------------------------------------------------------------------------------ | 47 //------------------------------------------------------------------------------ |
| (...skipping 524 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 574 // Only report it if no previous error has been recorded. | 572 // Only report it if no previous error has been recorded. |
| 575 // | 573 // |
| 576 //------------------------------------------------------------------------------ | 574 //------------------------------------------------------------------------------ |
| 577 void RBBIRuleScanner::error(UErrorCode e) { | 575 void RBBIRuleScanner::error(UErrorCode e) { |
| 578 if (U_SUCCESS(*fRB->fStatus)) { | 576 if (U_SUCCESS(*fRB->fStatus)) { |
| 579 *fRB->fStatus = e; | 577 *fRB->fStatus = e; |
| 580 if (fRB->fParseError) { | 578 if (fRB->fParseError) { |
| 581 fRB->fParseError->line = fLineNum; | 579 fRB->fParseError->line = fLineNum; |
| 582 fRB->fParseError->offset = fCharNum; | 580 fRB->fParseError->offset = fCharNum; |
| 583 fRB->fParseError->preContext[0] = 0; | 581 fRB->fParseError->preContext[0] = 0; |
| 584 fRB->fParseError->preContext[0] = 0; | 582 fRB->fParseError->postContext[0] = 0; |
| 585 } | 583 } |
| 586 } | 584 } |
| 587 } | 585 } |
| 588 | 586 |
| 589 | 587 |
| 590 | 588 |
| 591 | 589 |
| 592 //------------------------------------------------------------------------------ | 590 //------------------------------------------------------------------------------ |
| 593 // | 591 // |
| 594 // fixOpStack The parse stack holds partially assembled chunks of the parse t
ree. | 592 // fixOpStack The parse stack holds partially assembled chunks of the parse t
ree. |
| (...skipping 391 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 986 break; | 984 break; |
| 987 } | 985 } |
| 988 if (tableEl->fCharClass == 252 && fC.fChar == (UChar32)-1) { | 986 if (tableEl->fCharClass == 252 && fC.fChar == (UChar32)-1) { |
| 989 // Table row specified eof and we hit eof on the input. | 987 // Table row specified eof and we hit eof on the input. |
| 990 break; | 988 break; |
| 991 } | 989 } |
| 992 | 990 |
| 993 if (tableEl->fCharClass >= 128 && tableEl->fCharClass < 240 && //
Table specs a char class && | 991 if (tableEl->fCharClass >= 128 && tableEl->fCharClass < 240 && //
Table specs a char class && |
| 994 fC.fEscaped == FALSE && //
char is not escaped && | 992 fC.fEscaped == FALSE && //
char is not escaped && |
| 995 fC.fChar != (UChar32)-1) { //
char is not EOF | 993 fC.fChar != (UChar32)-1) { //
char is not EOF |
| 996 U_ASSERT((tableEl->fCharClass-128) < LENGTHOF(fRuleSets)); | 994 U_ASSERT((tableEl->fCharClass-128) < UPRV_LENGTHOF(fRuleSets)); |
| 997 if (fRuleSets[tableEl->fCharClass-128].contains(fC.fChar)) { | 995 if (fRuleSets[tableEl->fCharClass-128].contains(fC.fChar)) { |
| 998 // Table row specified a character class, or set of characte
rs, | 996 // Table row specified a character class, or set of characte
rs, |
| 999 // and the current char matches it. | 997 // and the current char matches it. |
| 1000 break; | 998 break; |
| 1001 } | 999 } |
| 1002 } | 1000 } |
| 1003 | 1001 |
| 1004 // No match on this row, advance to the next row for this state, | 1002 // No match on this row, advance to the next row for this state, |
| 1005 tableEl++; | 1003 tableEl++; |
| 1006 } | 1004 } |
| (...skipping 197 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1204 // - Eliminates mulitiple instances of the same set. | 1202 // - Eliminates mulitiple instances of the same set. |
| 1205 // - Creates a new uset node if necessary (if this isn't a duplicate
.) | 1203 // - Creates a new uset node if necessary (if this isn't a duplicate
.) |
| 1206 findSetFor(n->fText, n, uset); | 1204 findSetFor(n->fText, n, uset); |
| 1207 } | 1205 } |
| 1208 | 1206 |
| 1209 } | 1207 } |
| 1210 | 1208 |
| 1211 U_NAMESPACE_END | 1209 U_NAMESPACE_END |
| 1212 | 1210 |
| 1213 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ | 1211 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
| OLD | NEW |