OLD | NEW |
(Empty) | |
| 1 // |
| 2 // rbbisetb.h |
| 3 /* |
| 4 ********************************************************************** |
| 5 * Copyright (c) 2001-2005, International Business Machines |
| 6 * Corporation and others. All Rights Reserved. |
| 7 ********************************************************************** |
| 8 */ |
| 9 |
| 10 #ifndef RBBISETB_H |
| 11 #define RBBISETB_H |
| 12 |
| 13 #include "unicode/utypes.h" |
| 14 #include "unicode/uobject.h" |
| 15 #include "rbbirb.h" |
| 16 #include "uvector.h" |
| 17 |
| 18 struct UNewTrie; |
| 19 |
| 20 U_NAMESPACE_BEGIN |
| 21 |
| 22 // |
| 23 // RBBISetBuilder Derives the character categories used by the runtime RBBI e
ngine |
| 24 // from the Unicode Sets appearing in the source RBBI rules,
and |
| 25 // creates the TRIE table used to map from Unicode to the |
| 26 // character categories. |
| 27 // |
| 28 |
| 29 |
| 30 // |
| 31 // RangeDescriptor |
| 32 // |
| 33 // Each of the non-overlapping character ranges gets one of these descriptor
s. |
| 34 // All of them are strung together in a linked list, which is kept in order |
| 35 // (by character) |
| 36 // |
| 37 class RangeDescriptor : public UMemory { |
| 38 public: |
| 39 UChar32 fStartChar; // Start of range, unicode 32 bit value. |
| 40 UChar32 fEndChar; // End of range, unicode 32 bit value. |
| 41 int32_t fNum; // runtime-mapped input value for this r
ange. |
| 42 UVector *fIncludesSets; // vector of the the original |
| 43 // Unicode sets that include this rang
e. |
| 44 // (Contains ptrs to uset nodes) |
| 45 RangeDescriptor *fNext; // Next RangeDescriptor in the linked li
st. |
| 46 |
| 47 RangeDescriptor(UErrorCode &status); |
| 48 RangeDescriptor(const RangeDescriptor &other, UErrorCode &status); |
| 49 ~RangeDescriptor(); |
| 50 void split(UChar32 where, UErrorCode &status); // Spit this range in two a
t "where", with |
| 51 // where appearing in the second (high
er) part. |
| 52 void setDictionaryFlag(); // Check whether this range appears as p
art of |
| 53 // the Unicode set named "dictionary" |
| 54 |
| 55 private: |
| 56 RangeDescriptor(const RangeDescriptor &other); // forbid copying of this cla
ss |
| 57 RangeDescriptor &operator=(const RangeDescriptor &other); // forbid copying
of this class |
| 58 }; |
| 59 |
| 60 |
| 61 // |
| 62 // RBBISetBuilder Handles processing of Unicode Sets from RBBI rules. |
| 63 // |
| 64 // Starting with the rules parse tree from the scanner, |
| 65 // |
| 66 // - Enumerate the set of UnicodeSets that are referenced |
| 67 // by the RBBI rules. |
| 68 // - compute a derived set of non-overlapping UnicodeSets |
| 69 // that will correspond to columns in the state table for |
| 70 // the RBBI execution engine. |
| 71 // - construct the trie table that maps input characters |
| 72 // to set numbers in the non-overlapping set of sets. |
| 73 // |
| 74 |
| 75 |
| 76 class RBBISetBuilder : public UMemory { |
| 77 public: |
| 78 RBBISetBuilder(RBBIRuleBuilder *rb); |
| 79 ~RBBISetBuilder(); |
| 80 |
| 81 void build(); |
| 82 void addValToSets(UVector *sets, uint32_t val); |
| 83 void addValToSet (RBBINode *usetNode, uint32_t val); |
| 84 int32_t getNumCharCategories() const; // CharCategories are the same as i
nput symbol set to the |
| 85 // runtime state machine, which
are the same as |
| 86 // columns in the DFA state tabl
e |
| 87 int32_t getTrieSize() /*const*/; // Size in bytes of the serialized
Trie. |
| 88 void serializeTrie(uint8_t *where); // write out the serialized Trie. |
| 89 UChar32 getFirstChar(int32_t val) const; |
| 90 UBool sawBOF() const; // Indicate whether any references
to the {bof} pseudo |
| 91 // character were encountered. |
| 92 #ifdef RBBI_DEBUG |
| 93 void printSets(); |
| 94 void printRanges(); |
| 95 void printRangeGroups(); |
| 96 #else |
| 97 #define printSets() |
| 98 #define printRanges() |
| 99 #define printRangeGroups() |
| 100 #endif |
| 101 |
| 102 private: |
| 103 void numberSets(); |
| 104 |
| 105 RBBIRuleBuilder *fRB; // The RBBI Rule Compiler that owns
us. |
| 106 UErrorCode *fStatus; |
| 107 |
| 108 RangeDescriptor *fRangeList; // Head of the linked list of RangeD
escriptors |
| 109 |
| 110 UNewTrie *fTrie; // The mapping TRIE that is the end
result of processing |
| 111 uint32_t fTrieSize; // the Unicode Sets. |
| 112 |
| 113 // Groups correspond to character categories - |
| 114 // groups of ranges that are in the same original UnicodeSets. |
| 115 // fGroupCount is the index of the last used group. |
| 116 // fGroupCount+1 is also the number of columns in the RBBI state table
being compiled. |
| 117 // State table column 0 is not used. Column 1 is for end-of-input. |
| 118 // column 2 is for group 0. Funny counting. |
| 119 int32_t fGroupCount; |
| 120 |
| 121 UBool fSawBOF; |
| 122 |
| 123 RBBISetBuilder(const RBBISetBuilder &other); // forbid copying of this class |
| 124 RBBISetBuilder &operator=(const RBBISetBuilder &other); // forbid copying of
this class |
| 125 }; |
| 126 |
| 127 |
| 128 |
| 129 U_NAMESPACE_END |
| 130 #endif |
OLD | NEW |