OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * |
| 4 * Copyright (C) 2000-2010, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 ******************************************************************************* |
| 8 * file name: ucol_elm.h |
| 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) |
| 11 * indentation:4 |
| 12 * |
| 13 * created 02/22/2001 |
| 14 * created by: Vladimir Weinstein |
| 15 * |
| 16 * This program reads the Franctional UCA table and generates |
| 17 * internal format for UCA table as well as inverse UCA table. |
| 18 * It then writes binary files containing the data: ucadata.dat |
| 19 * & invuca.dat |
| 20 */ |
| 21 #ifndef UCOL_UCAELEMS_H |
| 22 #define UCOL_UCAELEMS_H |
| 23 |
| 24 #include "unicode/utypes.h" |
| 25 #include "unicode/uniset.h" |
| 26 #include "ucol_tok.h" |
| 27 |
| 28 #if !UCONFIG_NO_COLLATION |
| 29 |
| 30 #include "ucol_imp.h" |
| 31 |
| 32 #ifdef UCOL_DEBUG |
| 33 #include "cmemory.h" |
| 34 #include <stdio.h> |
| 35 #endif |
| 36 |
| 37 U_CDECL_BEGIN |
| 38 |
| 39 /* This is the maximum trie capacity for the mapping trie. |
| 40 Due to current limitations in genuca and the design of UTrie, |
| 41 this number can't be more than 256K. |
| 42 As of Unicode 5, it currently could safely go to 128K without |
| 43 a problem. Normally, less than 32K are tailored. |
| 44 */ |
| 45 #define UCOL_ELM_TRIE_CAPACITY 0x40000 |
| 46 |
| 47 /* This is the maxmun capacity for temparay combining class |
| 48 * table. The table will be compacted after scanning all the |
| 49 * Unicode codepoints. |
| 50 */ |
| 51 #define UCOL_MAX_CM_TAB 0x10000 |
| 52 |
| 53 |
| 54 typedef struct { |
| 55 uint32_t *CEs; |
| 56 int32_t position; |
| 57 int32_t size; |
| 58 } ExpansionTable; |
| 59 |
| 60 typedef struct { |
| 61 UChar prefixChars[128]; |
| 62 UChar *prefix; |
| 63 uint32_t prefixSize; |
| 64 UChar uchars[128]; |
| 65 UChar *cPoints; |
| 66 uint32_t cSize; /* Number of characters in sequence - for contracti
on */ |
| 67 uint32_t noOfCEs; /* Number of collation elements
*/ |
| 68 uint32_t CEs[128]; /* These are collation elements - there could be mor
e than one - in case of expansion */ |
| 69 uint32_t mapCE; /* This is the value element maps in original table
*/ |
| 70 uint32_t sizePrim[128]; |
| 71 uint32_t sizeSec[128]; |
| 72 uint32_t sizeTer[128]; |
| 73 UBool caseBit; |
| 74 UBool isThai; |
| 75 } UCAElements; |
| 76 |
| 77 typedef struct { |
| 78 uint32_t *endExpansionCE; |
| 79 UBool *isV; |
| 80 int32_t position; |
| 81 int32_t size; |
| 82 uint8_t maxLSize; |
| 83 uint8_t maxVSize; |
| 84 uint8_t maxTSize; |
| 85 } MaxJamoExpansionTable; |
| 86 |
| 87 typedef struct { |
| 88 uint32_t *endExpansionCE; |
| 89 uint8_t *expansionCESize; |
| 90 int32_t position; |
| 91 int32_t size; |
| 92 } MaxExpansionTable; |
| 93 |
| 94 typedef struct { |
| 95 uint16_t index[256]; /* index of cPoints by combining class 0-255. */ |
| 96 UChar *cPoints; /* code point array of all combining marks */ |
| 97 uint32_t size; /* total number of combining marks */ |
| 98 } CombinClassTable; |
| 99 |
| 100 typedef struct { |
| 101 /*CompactEIntArray *mapping; */ |
| 102 UNewTrie *mapping; |
| 103 ExpansionTable *expansions; |
| 104 struct CntTable *contractions; |
| 105 UCATableHeader *image; |
| 106 UColOptionSet *options; |
| 107 MaxExpansionTable *maxExpansions; |
| 108 MaxJamoExpansionTable *maxJamoExpansions; |
| 109 uint8_t *unsafeCP; |
| 110 uint8_t *contrEndCP; |
| 111 const UCollator *UCA; |
| 112 UHashtable *prefixLookup; |
| 113 CombinClassTable *cmLookup; /* combining class lookup for tailoring. */ |
| 114 } tempUCATable; |
| 115 |
| 116 typedef struct { |
| 117 UChar cp; |
| 118 uint16_t cClass; // combining class |
| 119 }CompData; |
| 120 |
| 121 typedef struct { |
| 122 CompData *precomp; |
| 123 int32_t precompLen; |
| 124 UChar *decomp; |
| 125 int32_t decompLen; |
| 126 UChar *comp; |
| 127 int32_t compLen; |
| 128 uint16_t curClass; |
| 129 uint16_t tailoringCM; |
| 130 int32_t cmPos; |
| 131 }tempTailorContext; |
| 132 |
| 133 U_CAPI tempUCATable * U_EXPORT2 uprv_uca_initTempTable(UCATableHeader *image, UC
olOptionSet *opts, const UCollator *UCA, UColCETags initTag, UColCETags suppleme
ntaryInitTag, UErrorCode *status); |
| 134 U_CAPI void U_EXPORT2 uprv_uca_closeTempTable(tempUCATable *t); |
| 135 U_CAPI uint32_t U_EXPORT2 uprv_uca_addAnElement(tempUCATable *t, UCAElements *el
ement, UErrorCode *status); |
| 136 U_CAPI UCATableHeader * U_EXPORT2 uprv_uca_assembleTable(tempUCATable *t, UError
Code *status); |
| 137 |
| 138 U_CAPI int32_t U_EXPORT2 |
| 139 uprv_uca_canonicalClosure(tempUCATable *t, UColTokenParser *src, |
| 140 U_NAMESPACE_QUALIFIER UnicodeSet *closed, UErrorCode *
status); |
| 141 |
| 142 U_CDECL_END |
| 143 |
| 144 #endif /* #if !UCONFIG_NO_COLLATION */ |
| 145 |
| 146 #endif |
OLD | NEW |