OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * |
| 4 * Copyright (C) 1999-2005,2008 International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 ******************************************************************************* |
| 8 * file name: rbbidata.h |
| 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) |
| 11 * indentation:4 |
| 12 * |
| 13 * RBBI data formats Includes |
| 14 * |
| 15 * Structs that describes the format of the Binary RBBI
data, |
| 16 * as it is stored in ICU's data file. |
| 17 * |
| 18 * RBBIDataWrapper - Instances of this class sit between the |
| 19 * raw data structs and the RulesBasedBreakIterator obje
cts |
| 20 * that are created by applications. The wrapper class |
| 21 * provides reference counting for the underlying data, |
| 22 * and direct pointers to data that would not otherwise |
| 23 * be accessible without ugly pointer arithmetic. The |
| 24 * wrapper does not attempt to provide any higher level |
| 25 * abstractions for the data itself. |
| 26 * |
| 27 * There will be only one instance of RBBIDataWrapper fo
r any |
| 28 * set of RBBI run time data being shared by instances |
| 29 * (clones) of RulesBasedBreakIterator. |
| 30 */ |
| 31 |
| 32 #ifndef __RBBIDATA_H__ |
| 33 #define __RBBIDATA_H__ |
| 34 |
| 35 #include "unicode/utypes.h" |
| 36 #include "unicode/udata.h" |
| 37 #include "udataswp.h" |
| 38 |
| 39 /** |
| 40 * Swap RBBI data. See udataswp.h. |
| 41 * @internal |
| 42 */ |
| 43 U_CAPI int32_t U_EXPORT2 |
| 44 ubrk_swap(const UDataSwapper *ds, |
| 45 const void *inData, int32_t length, void *outData, |
| 46 UErrorCode *pErrorCode); |
| 47 |
| 48 #ifdef XP_CPLUSPLUS |
| 49 |
| 50 #include "unicode/uobject.h" |
| 51 #include "unicode/unistr.h" |
| 52 #include "utrie.h" |
| 53 |
| 54 U_NAMESPACE_BEGIN |
| 55 |
| 56 /* |
| 57 * The following structs map exactly onto the raw data from ICU common data fi
le. |
| 58 */ |
| 59 struct RBBIDataHeader { |
| 60 uint32_t fMagic; /* == 0xbla0
*/ |
| 61 uint8_t fFormatVersion[4]; /* Data Format. Same as the value in st
ruct UDataInfo */ |
| 62 /* if there is one associated with this
data. */ |
| 63 /* (version originates in rbbi, is co
pied to UDataInfo) */ |
| 64 /* For ICU 3.2 and earlier, this field
was */ |
| 65 /* uint32_t fVersion
*/ |
| 66 /* with a value of 1.
*/ |
| 67 uint32_t fLength; /* Total length in bytes of this RBBI Da
ta, */ |
| 68 /* including all sections, not just
the header. */ |
| 69 uint32_t fCatCount; /* Number of character categories.
*/ |
| 70 |
| 71 /* */ |
| 72 /* Offsets and sizes of each of the subsections within the RBBI data. */ |
| 73 /* All offsets are bytes from the start of the RBBIDataHeader. */ |
| 74 /* All sizes are in bytes. */ |
| 75 /* */ |
| 76 uint32_t fFTable; /* forward state transition table. */ |
| 77 uint32_t fFTableLen; |
| 78 uint32_t fRTable; /* Offset to the reverse state transition
table. */ |
| 79 uint32_t fRTableLen; |
| 80 uint32_t fSFTable; /* safe point forward transition table */ |
| 81 uint32_t fSFTableLen; |
| 82 uint32_t fSRTable; /* safe point reverse transition table */ |
| 83 uint32_t fSRTableLen; |
| 84 uint32_t fTrie; /* Offset to Trie data for character cate
gories */ |
| 85 uint32_t fTrieLen; |
| 86 uint32_t fRuleSource; /* Offset to the source for for the break
*/ |
| 87 uint32_t fRuleSourceLen; /* rules. Stored UChar *. */ |
| 88 uint32_t fStatusTable; /* Offset to the table of rule status valu
es */ |
| 89 uint32_t fStatusTableLen; |
| 90 |
| 91 uint32_t fReserved[6]; /* Reserved for expansion */ |
| 92 |
| 93 }; |
| 94 |
| 95 |
| 96 |
| 97 struct RBBIStateTableRow { |
| 98 int16_t fAccepting; /* Non-zero if this row is for an accepting
state. */ |
| 99 /* Value 0: not an accepting state.
*/ |
| 100 /* -1: Unconditional Accepting state.
*/ |
| 101 /* positive: Look-ahead match has comple
ted. */ |
| 102 /* Actual boundary position happen
ed earlier */ |
| 103 /* Value here == fLookAhead in ear
lier */ |
| 104 /* state, at actual boundary po
s. */ |
| 105 int16_t fLookAhead; /* Non-zero if this row is for a state that
*/ |
| 106 /* corresponds to a '/' in the rule sourc
e. */ |
| 107 /* Value is the same as the fAccepting
*/ |
| 108 /* value for the rule (which will appea
r */ |
| 109 /* in a different state.
*/ |
| 110 int16_t fTagIdx; /* Non-zero if this row covers a {tagged} p
osition */ |
| 111 /* from a rule. Value is the index in t
he */ |
| 112 /* StatusTable of the set of matching
*/ |
| 113 /* tags (rule status values)
*/ |
| 114 int16_t fReserved; |
| 115 uint16_t fNextState[2]; /* Next State, indexed by char category.
*/ |
| 116 /* Array Size is fNumCols from the
*/ |
| 117 /* state table header.
*/ |
| 118 /* CAUTION: see RBBITableBuilder::getTab
leSize() */ |
| 119 /* before changing anything her
e. */ |
| 120 }; |
| 121 |
| 122 |
| 123 struct RBBIStateTable { |
| 124 uint32_t fNumStates; /* Number of states.
*/ |
| 125 uint32_t fRowLen; /* Length of a state table row, in bytes.
*/ |
| 126 uint32_t fFlags; /* Option Flags for this state table
*/ |
| 127 uint32_t fReserved; /* reserved
*/ |
| 128 char fTableData[4]; /* First RBBIStateTableRow begins here.
*/ |
| 129 /* (making it char[] simplifies ugly addr
ess */ |
| 130 /* arithmetic for indexing variable leng
th rows.) */ |
| 131 }; |
| 132 |
| 133 typedef enum { |
| 134 RBBI_LOOKAHEAD_HARD_BREAK = 1, |
| 135 RBBI_BOF_REQUIRED = 2 |
| 136 } RBBIStateTableFlags; |
| 137 |
| 138 |
| 139 /* */ |
| 140 /* The reference counting wrapper class */ |
| 141 /* */ |
| 142 class RBBIDataWrapper : public UMemory { |
| 143 public: |
| 144 enum EDontAdopt { |
| 145 kDontAdopt |
| 146 }; |
| 147 RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status); |
| 148 RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErro
rCode &status); |
| 149 RBBIDataWrapper(UDataMemory* udm, UErrorCode &status); |
| 150 ~RBBIDataWrapper(); |
| 151 |
| 152 void init(const RBBIDataHeader *data, UErrorCode &status); |
| 153 RBBIDataWrapper *addReference(); |
| 154 void removeReference(); |
| 155 UBool operator ==(const RBBIDataWrapper &other) const; |
| 156 int32_t hashCode(); |
| 157 const UnicodeString &getRuleSourceString() const; |
| 158 #ifdef RBBI_DEBUG |
| 159 void printData(); |
| 160 void printTable(const char *heading, const RBBIStateTable *
table); |
| 161 #else |
| 162 #define printData() |
| 163 #define printTable(heading, table) |
| 164 #endif |
| 165 |
| 166 /* */ |
| 167 /* Pointers to items within the data */ |
| 168 /* */ |
| 169 const RBBIDataHeader *fHeader; |
| 170 const RBBIStateTable *fForwardTable; |
| 171 const RBBIStateTable *fReverseTable; |
| 172 const RBBIStateTable *fSafeFwdTable; |
| 173 const RBBIStateTable *fSafeRevTable; |
| 174 const UChar *fRuleSource; |
| 175 const int32_t *fRuleStatusTable; |
| 176 |
| 177 /* number of int32_t values in the rule status table. Used to sanity check
indexing */ |
| 178 int32_t fStatusMaxIdx; |
| 179 |
| 180 UTrie fTrie; |
| 181 |
| 182 private: |
| 183 int32_t fRefCount; |
| 184 UDataMemory *fUDataMem; |
| 185 UnicodeString fRuleString; |
| 186 UBool fDontFreeData; |
| 187 |
| 188 RBBIDataWrapper(const RBBIDataWrapper &other); /* forbid copying of this cl
ass */ |
| 189 RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /* forbid copying
of this class */ |
| 190 }; |
| 191 |
| 192 |
| 193 |
| 194 U_NAMESPACE_END |
| 195 |
| 196 #endif /* C++ */ |
| 197 |
| 198 #endif |
OLD | NEW |