Index: icu46/source/common/rbbiscan.h |
=================================================================== |
--- icu46/source/common/rbbiscan.h (revision 0) |
+++ icu46/source/common/rbbiscan.h (revision 0) |
@@ -0,0 +1,162 @@ |
+// |
+// rbbiscan.h |
+// |
+// Copyright (C) 2002-2008, International Business Machines Corporation and others. |
+// All Rights Reserved. |
+// |
+// This file contains declarations for class RBBIRuleScanner |
+// |
+ |
+ |
+#ifndef RBBISCAN_H |
+#define RBBISCAN_H |
+ |
+#include "unicode/utypes.h" |
+#include "unicode/uobject.h" |
+#include "unicode/rbbi.h" |
+#include "unicode/uniset.h" |
+#include "unicode/parseerr.h" |
+#include "uhash.h" |
+#include "uvector.h" |
+#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that |
+ // looks up references to $variables within a set. |
+#include "rbbinode.h" |
+//#include "rbbitblb.h" |
+ |
+ |
+ |
+U_NAMESPACE_BEGIN |
+ |
+class RBBIRuleBuilder; |
+class RBBISymbolTable; |
+ |
+ |
+//-------------------------------------------------------------------------------- |
+// |
+// class RBBIRuleScanner does the lowest level, character-at-a-time |
+// scanning of break iterator rules. |
+// |
+// The output of the scanner is parse trees for |
+// the rule expressions and a list of all Unicode Sets |
+// encountered. |
+// |
+//-------------------------------------------------------------------------------- |
+ |
+class RBBIRuleScanner : public UMemory { |
+public: |
+ |
+ enum { |
+ kStackSize = 100 // The size of the state stack for |
+ }; // rules parsing. Corresponds roughly |
+ // to the depth of parentheses nesting |
+ // that is allowed in the rules. |
+ |
+ struct RBBIRuleChar { |
+ UChar32 fChar; |
+ UBool fEscaped; |
+ }; |
+ |
+ RBBIRuleScanner(RBBIRuleBuilder *rb); |
+ |
+ |
+ virtual ~RBBIRuleScanner(); |
+ |
+ void nextChar(RBBIRuleChar &c); // Get the next char from the input stream. |
+ // Return false if at end. |
+ |
+ UBool push(const RBBIRuleChar &c); // Push (unget) one character. |
+ // Only a single character may be pushed. |
+ |
+ void parse(); // Parse the rules, generating two parse |
+ // trees, one each for the forward and |
+ // reverse rules, |
+ // and a list of UnicodeSets encountered. |
+ |
+ /** |
+ * Return a rules string without unnecessary |
+ * characters. |
+ */ |
+ static UnicodeString stripRules(const UnicodeString &rules); |
+private: |
+ |
+ UBool doParseActions(int32_t a); |
+ void error(UErrorCode e); // error reporting convenience function. |
+ void fixOpStack(RBBINode::OpPrecedence p); |
+ // a character. |
+ void findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt = NULL); |
+ |
+ UChar32 nextCharLL(); |
+#ifdef RBBI_DEBUG |
+ void printNodeStack(const char *title); |
+#endif |
+ RBBINode *pushNewNode(RBBINode::NodeType t); |
+ void scanSet(); |
+ |
+ |
+ RBBIRuleBuilder *fRB; // The rule builder that we are part of. |
+ |
+ int32_t fScanIndex; // Index of current character being processed |
+ // in the rule input string. |
+ int32_t fNextIndex; // Index of the next character, which |
+ // is the first character not yet scanned. |
+ UBool fQuoteMode; // Scan is in a 'quoted region' |
+ int32_t fLineNum; // Line number in input file. |
+ int32_t fCharNum; // Char position within the line. |
+ UChar32 fLastChar; // Previous char, needed to count CR-LF |
+ // as a single line, not two. |
+ |
+ RBBIRuleChar fC; // Current char for parse state machine |
+ // processing. |
+ UnicodeString fVarName; // $variableName, valid when we've just |
+ // scanned one. |
+ |
+ RBBIRuleTableEl **fStateTable; // State Transition Table for RBBI Rule |
+ // parsing. index by p[state][char-class] |
+ |
+ uint16_t fStack[kStackSize]; // State stack, holds state pushes |
+ int32_t fStackPtr; // and pops as specified in the state |
+ // transition rules. |
+ |
+ RBBINode *fNodeStack[kStackSize]; // Node stack, holds nodes created |
+ // during the parse of a rule |
+ int32_t fNodeStackPtr; |
+ |
+ |
+ UBool fReverseRule; // True if the rule currently being scanned |
+ // is a reverse direction rule (if it |
+ // starts with a '!') |
+ |
+ UBool fLookAheadRule; // True if the rule includes a '/' |
+ // somewhere within it. |
+ |
+ RBBISymbolTable *fSymbolTable; // symbol table, holds definitions of |
+ // $variable symbols. |
+ |
+ UHashtable *fSetTable; // UnicocodeSet hash table, holds indexes to |
+ // the sets created while parsing rules. |
+ // The key is the string used for creating |
+ // the set. |
+ |
+ UnicodeSet fRuleSets[10]; // Unicode Sets that are needed during |
+ // the scanning of RBBI rules. The |
+ // indicies for these are assigned by the |
+ // perl script that builds the state tables. |
+ // See rbbirpt.h. |
+ |
+ int32_t fRuleNum; // Counts each rule as it is scanned. |
+ |
+ int32_t fOptionStart; // Input index of start of a !!option |
+ // keyword, while being scanned. |
+ |
+ UnicodeSet *gRuleSet_rule_char; |
+ UnicodeSet *gRuleSet_white_space; |
+ UnicodeSet *gRuleSet_name_char; |
+ UnicodeSet *gRuleSet_name_start_char; |
+ |
+ RBBIRuleScanner(const RBBIRuleScanner &other); // forbid copying of this class |
+ RBBIRuleScanner &operator=(const RBBIRuleScanner &other); // forbid copying of this class |
+}; |
+ |
+U_NAMESPACE_END |
+ |
+#endif |
Property changes on: icu46/source/common/rbbiscan.h |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |