OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ****************************************************************************** |
| 3 * |
| 4 * Copyright (C) 2008-2009, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 ****************************************************************************** |
| 8 * file name: uspoof_conf.h |
| 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) |
| 11 * indentation:4 |
| 12 * |
| 13 * created on: 2009Jan05 |
| 14 * created by: Andy Heninger |
| 15 * |
| 16 * Internal classes for compiling confusable data into its binary (runtime) for
m. |
| 17 */ |
| 18 |
| 19 #ifndef __USPOOF_BUILDCONF_H__ |
| 20 #define __USPOOF_BUILDCONF_H__ |
| 21 |
| 22 #if !UCONFIG_NO_NORMALIZATION |
| 23 |
| 24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS |
| 25 |
| 26 #include "uspoof_impl.h" |
| 27 |
| 28 U_NAMESPACE_BEGIN |
| 29 |
| 30 // SPUString |
| 31 // Holds a string that is the result of one of the mappings defined |
| 32 // by the confusable mapping data (confusables.txt from Unicode.org
) |
| 33 // Instances of SPUString exist during the compilation process only
. |
| 34 |
| 35 struct SPUString : public UMemory { |
| 36 UnicodeString *fStr; // The actual string. |
| 37 int32_t fStrTableIndex; // Index into the final runtime data for t
his string. |
| 38 // (or, for length 1, the single string c
har itself, |
| 39 // there being no string table entry for
it.) |
| 40 SPUString(UnicodeString *s); |
| 41 ~SPUString(); |
| 42 }; |
| 43 |
| 44 |
| 45 // String Pool A utility class for holding the strings that are the result of |
| 46 // the spoof mappings. These strings will utimately end up in th
e |
| 47 // run-time String Table. |
| 48 // This is sort of like a sorted set of strings, except that ICU'
s anemic |
| 49 // built-in collections don't support those, so it is implemented
with a |
| 50 // combination of a uhash and a UVector. |
| 51 |
| 52 |
| 53 class SPUStringPool : public UMemory { |
| 54 public: |
| 55 SPUStringPool(UErrorCode &status); |
| 56 ~SPUStringPool(); |
| 57 |
| 58 // Add a string. Return the string from the table. |
| 59 // If the input parameter string is already in the table, delete the |
| 60 // input parameter and return the existing string. |
| 61 SPUString *addString(UnicodeString *src, UErrorCode &status); |
| 62 |
| 63 |
| 64 // Get the n-th string in the collection. |
| 65 SPUString *getByIndex(int32_t i); |
| 66 |
| 67 // Sort the contents; affects the ordering of getByIndex(). |
| 68 void sort(UErrorCode &status); |
| 69 |
| 70 int32_t size(); |
| 71 |
| 72 private: |
| 73 UVector *fVec; // Elements are SPUString * |
| 74 UHashtable *fHash; // Key: UnicodeString Value: SPUString |
| 75 }; |
| 76 |
| 77 |
| 78 // class ConfusabledataBuilder |
| 79 // An instance of this class exists while the confusable data is being built
from source. |
| 80 // It encapsulates the intermediate data structures that are used for buildi
ng. |
| 81 // It exports one static function, to do a confusable data build. |
| 82 |
| 83 class ConfusabledataBuilder : public UMemory { |
| 84 private: |
| 85 SpoofImpl *fSpoofImpl; |
| 86 UChar *fInput; |
| 87 UHashtable *fSLTable; |
| 88 UHashtable *fSATable; |
| 89 UHashtable *fMLTable; |
| 90 UHashtable *fMATable; |
| 91 UnicodeSet *fKeySet; // A set of all keys (UChar32s) that go into the fo
ur mapping tables. |
| 92 |
| 93 // The binary data is first assembled into the following four collections, t
hen |
| 94 // copied to its final raw-memory destination. |
| 95 UVector *fKeyVec; |
| 96 UVector *fValueVec; |
| 97 UnicodeString *fStringTable; |
| 98 UVector *fStringLengthsTable; |
| 99 |
| 100 SPUStringPool *stringPool; |
| 101 URegularExpression *fParseLine; |
| 102 URegularExpression *fParseHexNum; |
| 103 int32_t fLineNum; |
| 104 |
| 105 ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status); |
| 106 ~ConfusabledataBuilder(); |
| 107 void build(const char * confusables, int32_t confusablesLen, UErrorCode &sta
tus); |
| 108 |
| 109 // Add an entry to the key and value tables being built |
| 110 // input: data from SLTable, MATable, etc. |
| 111 // outut: entry added to fKeyVec and fValueVec |
| 112 void addKeyEntry(UChar32 keyChar, // The key character |
| 113 UHashtable *table, // The table, one of SATable, MATa
ble, etc. |
| 114 int32_t tableFlag, // One of USPOOF_SA_TABLE_FLAG, et
c. |
| 115 UErrorCode &status); |
| 116 |
| 117 // From an index into fKeyVec & fValueVec |
| 118 // get a UnicodeString with the corresponding mapping. |
| 119 UnicodeString getMapping(int32_t key); |
| 120 |
| 121 // Populate the final binary output data array with the compiled data. |
| 122 void outputData(UErrorCode &status); |
| 123 |
| 124 public: |
| 125 static void buildConfusableData(SpoofImpl *spImpl, const char * confusables, |
| 126 int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode
&status); |
| 127 }; |
| 128 U_NAMESPACE_END |
| 129 |
| 130 #endif |
| 131 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS |
| 132 #endif // __USPOOF_BUILDCONF_H__ |
OLD | NEW |