Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(109)

Side by Side Diff: icu46/source/common/rbbisetb.h

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/
Patch Set: Created 10 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « icu46/source/common/rbbiscan.cpp ('k') | icu46/source/common/rbbisetb.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 //
2 // rbbisetb.h
3 /*
4 **********************************************************************
5 * Copyright (c) 2001-2005, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 */
9
10 #ifndef RBBISETB_H
11 #define RBBISETB_H
12
13 #include "unicode/utypes.h"
14 #include "unicode/uobject.h"
15 #include "rbbirb.h"
16 #include "uvector.h"
17
18 struct UNewTrie;
19
20 U_NAMESPACE_BEGIN
21
22 //
23 // RBBISetBuilder Derives the character categories used by the runtime RBBI e ngine
24 // from the Unicode Sets appearing in the source RBBI rules, and
25 // creates the TRIE table used to map from Unicode to the
26 // character categories.
27 //
28
29
30 //
31 // RangeDescriptor
32 //
33 // Each of the non-overlapping character ranges gets one of these descriptor s.
34 // All of them are strung together in a linked list, which is kept in order
35 // (by character)
36 //
37 class RangeDescriptor : public UMemory {
38 public:
39 UChar32 fStartChar; // Start of range, unicode 32 bit value.
40 UChar32 fEndChar; // End of range, unicode 32 bit value.
41 int32_t fNum; // runtime-mapped input value for this r ange.
42 UVector *fIncludesSets; // vector of the the original
43 // Unicode sets that include this rang e.
44 // (Contains ptrs to uset nodes)
45 RangeDescriptor *fNext; // Next RangeDescriptor in the linked li st.
46
47 RangeDescriptor(UErrorCode &status);
48 RangeDescriptor(const RangeDescriptor &other, UErrorCode &status);
49 ~RangeDescriptor();
50 void split(UChar32 where, UErrorCode &status); // Spit this range in two a t "where", with
51 // where appearing in the second (high er) part.
52 void setDictionaryFlag(); // Check whether this range appears as p art of
53 // the Unicode set named "dictionary"
54
55 private:
56 RangeDescriptor(const RangeDescriptor &other); // forbid copying of this cla ss
57 RangeDescriptor &operator=(const RangeDescriptor &other); // forbid copying of this class
58 };
59
60
61 //
62 // RBBISetBuilder Handles processing of Unicode Sets from RBBI rules.
63 //
64 // Starting with the rules parse tree from the scanner,
65 //
66 // - Enumerate the set of UnicodeSets that are referenced
67 // by the RBBI rules.
68 // - compute a derived set of non-overlapping UnicodeSets
69 // that will correspond to columns in the state table for
70 // the RBBI execution engine.
71 // - construct the trie table that maps input characters
72 // to set numbers in the non-overlapping set of sets.
73 //
74
75
76 class RBBISetBuilder : public UMemory {
77 public:
78 RBBISetBuilder(RBBIRuleBuilder *rb);
79 ~RBBISetBuilder();
80
81 void build();
82 void addValToSets(UVector *sets, uint32_t val);
83 void addValToSet (RBBINode *usetNode, uint32_t val);
84 int32_t getNumCharCategories() const; // CharCategories are the same as i nput symbol set to the
85 // runtime state machine, which are the same as
86 // columns in the DFA state tabl e
87 int32_t getTrieSize() /*const*/; // Size in bytes of the serialized Trie.
88 void serializeTrie(uint8_t *where); // write out the serialized Trie.
89 UChar32 getFirstChar(int32_t val) const;
90 UBool sawBOF() const; // Indicate whether any references to the {bof} pseudo
91 // character were encountered.
92 #ifdef RBBI_DEBUG
93 void printSets();
94 void printRanges();
95 void printRangeGroups();
96 #else
97 #define printSets()
98 #define printRanges()
99 #define printRangeGroups()
100 #endif
101
102 private:
103 void numberSets();
104
105 RBBIRuleBuilder *fRB; // The RBBI Rule Compiler that owns us.
106 UErrorCode *fStatus;
107
108 RangeDescriptor *fRangeList; // Head of the linked list of RangeD escriptors
109
110 UNewTrie *fTrie; // The mapping TRIE that is the end result of processing
111 uint32_t fTrieSize; // the Unicode Sets.
112
113 // Groups correspond to character categories -
114 // groups of ranges that are in the same original UnicodeSets.
115 // fGroupCount is the index of the last used group.
116 // fGroupCount+1 is also the number of columns in the RBBI state table being compiled.
117 // State table column 0 is not used. Column 1 is for end-of-input.
118 // column 2 is for group 0. Funny counting.
119 int32_t fGroupCount;
120
121 UBool fSawBOF;
122
123 RBBISetBuilder(const RBBISetBuilder &other); // forbid copying of this class
124 RBBISetBuilder &operator=(const RBBISetBuilder &other); // forbid copying of this class
125 };
126
127
128
129 U_NAMESPACE_END
130 #endif
OLDNEW
« no previous file with comments | « icu46/source/common/rbbiscan.cpp ('k') | icu46/source/common/rbbisetb.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698