icu46/source/common/rbbiscan.h - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/common/rbbiscan.h

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 //

	2 // rbbiscan.h

	3 //

	4 // Copyright (C) 2002-2008, International Business Machines Corporation and oth ers.

	5 // All Rights Reserved.

	6 //

	7 // This file contains declarations for class RBBIRuleScanner

	8 //

	9

	10

	11 #ifndef RBBISCAN_H

	12 #define RBBISCAN_H

	13

	14 #include "unicode/utypes.h"

	15 #include "unicode/uobject.h"

	16 #include "unicode/rbbi.h"

	17 #include "unicode/uniset.h"

	18 #include "unicode/parseerr.h"

	19 #include "uhash.h"

	20 #include "uvector.h"

	21 #include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that

	22 // looks up references to $variables within a set.

	23 #include "rbbinode.h"

	24 //#include "rbbitblb.h"

	25

	26

	27

	28 U_NAMESPACE_BEGIN

	29

	30 class RBBIRuleBuilder;

	31 class RBBISymbolTable;

	32

	33

	34 //------------------------------------------------------------------------------ --

	35 //

	36 // class RBBIRuleScanner does the lowest level, character-at-a-time

	37 // scanning of break iterator rules.

	38 //

	39 // The output of the scanner is parse trees for

	40 // the rule expressions and a list of all Unicode Sets

	41 // encountered.

	42 //

	43 //------------------------------------------------------------------------------ --

	44

	45 class RBBIRuleScanner : public UMemory {

	46 public:

	47

	48 enum {

	49 kStackSize = 100 // The size of the state stack for

	50 }; // rules parsing. Corresponds roughly

	51 // to the depth of parentheses nesting

	52 // that is allowed in the rules.

	53

	54 struct RBBIRuleChar {

	55 UChar32 fChar;

	56 UBool fEscaped;

	57 };

	58

	59 RBBIRuleScanner(RBBIRuleBuilder *rb);

	60

	61

	62 virtual ~RBBIRuleScanner();

	63

	64 void nextChar(RBBIRuleChar &c); // Get the next char from th e input stream.

	65 // Return false if at end.

	66

	67 UBool push(const RBBIRuleChar &c); // Push (unget) one characte r.

	68 // Only a single character may be pushed.

	69

	70 void parse(); // Parse the rules, generati ng two parse

	71 // trees, one each for the forward and

	72 // reverse rules,

	73 // and a list of UnicodeSe ts encountered.

	74

	75 /**

	76 * Return a rules string without unnecessary

	77 * characters.

	78 */

	79 static UnicodeString stripRules(const UnicodeString &rules);

	80 private:

	81

	82 UBool doParseActions(int32_t a);

	83 void error(UErrorCode e); // error reporting conven ience function.

	84 void fixOpStack(RBBINode::OpPrecedence p);

	85 // a character.

	86 void findSetFor(const UnicodeString &s, RBBINode node, UnicodeSet s etToAdopt = NULL);

	87

	88 UChar32 nextCharLL();

	89 #ifdef RBBI_DEBUG

	90 void printNodeStack(const char *title);

	91 #endif

	92 RBBINode *pushNewNode(RBBINode::NodeType t);

	93 void scanSet();

	94

	95

	96 RBBIRuleBuilder *fRB; // The rule builder that we are part of.

	97

	98 int32_t fScanIndex; // Index of current charact er being processed

	99 // in the rule input stri ng.

	100 int32_t fNextIndex; // Index of the next charac ter, which

	101 // is the first character not yet scanned.

	102 UBool fQuoteMode; // Scan is in a 'quoted reg ion'

	103 int32_t fLineNum; // Line number in input fil e.

	104 int32_t fCharNum; // Char position within the line.

	105 UChar32 fLastChar; // Previous char, needed to count CR-LF

	106 // as a single line, not two.

	107

	108 RBBIRuleChar fC; // Current char for parse s tate machine

	109 // processing.

	110 UnicodeString fVarName; // $variableName, valid whe n we've just

	111 // scanned one.

	112

	113 RBBIRuleTableEl **fStateTable; // State Transition Table f or RBBI Rule

	114 // parsing. index by p[s tate][char-class]

	115

	116 uint16_t fStack[kStackSize]; // State stack, holds sta te pushes

	117 int32_t fStackPtr; // and pops as specified in the state

	118 // transition rules.

	119

	120 RBBINode *fNodeStack[kStackSize]; // Node stack, holds nodes created

	121 // during the parse of a rule

	122 int32_t fNodeStackPtr;

	123

	124

	125 UBool fReverseRule; // True if the rule current ly being scanned

	126 // is a reverse direction rule (if it

	127 // starts with a '!')

	128

	129 UBool fLookAheadRule; // True if the rule include s a '/'

	130 // somewhere within it.

	131

	132 RBBISymbolTable *fSymbolTable; // symbol table, holds defi nitions of

	133 // $variable symbols.

	134

	135 UHashtable *fSetTable; // UnicocodeSet hash table, holds indexes to

	136 // the sets created while parsing rules.

	137 // The key is the string used for creating

	138 // the set.

	139

	140 UnicodeSet fRuleSets[10]; // Unicode Sets that are ne eded during

	141 // the scanning of RBBI ru les. The

	142 // indicies for these are assigned by the

	143 // perl script that builds the state tables.

	144 // See rbbirpt.h.

	145

	146 int32_t fRuleNum; // Counts each rule as it i s scanned.

	147

	148 int32_t fOptionStart; // Input index of start of a !!option

	149 // keyword, while being s canned.

	150

	151 UnicodeSet *gRuleSet_rule_char;

	152 UnicodeSet *gRuleSet_white_space;

	153 UnicodeSet *gRuleSet_name_char;

	154 UnicodeSet *gRuleSet_name_start_char;

	155

	156 RBBIRuleScanner(const RBBIRuleScanner &other); // forbid copying of this cla ss

	157 RBBIRuleScanner &operator=(const RBBIRuleScanner &other); // forbid copying of this class

	158 };

	159

	160 U_NAMESPACE_END

	161

	162 #endif

OLD	NEW

« no previous file with comments | « icu46/source/common/rbbirpt.txt ('k') | icu46/source/common/rbbiscan.cpp » ('j') | no next file with comments »