| Index: icu46/source/i18n/uspoof_conf.cpp
|
| ===================================================================
|
| --- icu46/source/i18n/uspoof_conf.cpp (revision 0)
|
| +++ icu46/source/i18n/uspoof_conf.cpp (revision 0)
|
| @@ -0,0 +1,600 @@
|
| +/*
|
| +******************************************************************************
|
| +*
|
| +* Copyright (C) 2008-2010, International Business Machines
|
| +* Corporation and others. All Rights Reserved.
|
| +*
|
| +******************************************************************************
|
| +* file name: uspoof_conf.cpp
|
| +* encoding: US-ASCII
|
| +* tab size: 8 (not used)
|
| +* indentation:4
|
| +*
|
| +* created on: 2009Jan05 (refactoring earlier files)
|
| +* created by: Andy Heninger
|
| +*
|
| +* Internal classes for compililing confusable data into its binary (runtime) form.
|
| +*/
|
| +
|
| +#include "unicode/utypes.h"
|
| +#include "unicode/uspoof.h"
|
| +#if !UCONFIG_NO_REGULAR_EXPRESSIONS
|
| +#if !UCONFIG_NO_NORMALIZATION
|
| +
|
| +#include "unicode/unorm.h"
|
| +#include "unicode/uregex.h"
|
| +#include "unicode/ustring.h"
|
| +#include "cmemory.h"
|
| +#include "uspoof_impl.h"
|
| +#include "uhash.h"
|
| +#include "uvector.h"
|
| +#include "uassert.h"
|
| +#include "uarrsort.h"
|
| +#include "uspoof_conf.h"
|
| +
|
| +U_NAMESPACE_USE
|
| +
|
| +
|
| +//---------------------------------------------------------------------
|
| +//
|
| +// buildConfusableData Compile the source confusable data, as defined by
|
| +// the Unicode data file confusables.txt, into the binary
|
| +// structures used by the confusable detector.
|
| +//
|
| +// The binary structures are described in uspoof_impl.h
|
| +//
|
| +// 1. parse the data, building 4 hash tables, one each for the SL, SA, ML and MA
|
| +// tables. Each maps from a UChar32 to a String.
|
| +//
|
| +// 2. Sort all of the strings encountered by length, since they will need to
|
| +// be stored in that order in the final string table.
|
| +//
|
| +// 3. Build a list of keys (UChar32s) from the four mapping tables. Sort the
|
| +// list because that will be the ordering of our runtime table.
|
| +//
|
| +// 4. Generate the run time string table. This is generated before the key & value
|
| +// tables because we need the string indexes when building those tables.
|
| +//
|
| +// 5. Build the run-time key and value tables. These are parallel tables, and are built
|
| +// at the same time
|
| +//
|
| +
|
| +SPUString::SPUString(UnicodeString *s) {
|
| + fStr = s;
|
| + fStrTableIndex = 0;
|
| +}
|
| +
|
| +
|
| +SPUString::~SPUString() {
|
| + delete fStr;
|
| +}
|
| +
|
| +
|
| +SPUStringPool::SPUStringPool(UErrorCode &status) : fVec(NULL), fHash(NULL) {
|
| + fVec = new UVector(status);
|
| + fHash = uhash_open(uhash_hashUnicodeString, // key hash function
|
| + uhash_compareUnicodeString, // Key Comparator
|
| + NULL, // Value Comparator
|
| + &status);
|
| +}
|
| +
|
| +
|
| +SPUStringPool::~SPUStringPool() {
|
| + int i;
|
| + for (i=fVec->size()-1; i>=0; i--) {
|
| + SPUString *s = static_cast<SPUString *>(fVec->elementAt(i));
|
| + delete s;
|
| + }
|
| + delete fVec;
|
| + uhash_close(fHash);
|
| +}
|
| +
|
| +
|
| +int32_t SPUStringPool::size() {
|
| + return fVec->size();
|
| +}
|
| +
|
| +SPUString *SPUStringPool::getByIndex(int32_t index) {
|
| + SPUString *retString = (SPUString *)fVec->elementAt(index);
|
| + return retString;
|
| +}
|
| +
|
| +
|
| +// Comparison function for ordering strings in the string pool.
|
| +// Compare by length first, then, within a group of the same length,
|
| +// by code point order.
|
| +// Conforms to the type signature for a USortComparator in uvector.h
|
| +
|
| +static int8_t U_CALLCONV SPUStringCompare(UHashTok left, UHashTok right) {
|
| + const SPUString *sL = const_cast<const SPUString *>(
|
| + static_cast<SPUString *>(left.pointer));
|
| + const SPUString *sR = const_cast<const SPUString *>(
|
| + static_cast<SPUString *>(right.pointer));
|
| + int32_t lenL = sL->fStr->length();
|
| + int32_t lenR = sR->fStr->length();
|
| + if (lenL < lenR) {
|
| + return -1;
|
| + } else if (lenL > lenR) {
|
| + return 1;
|
| + } else {
|
| + return sL->fStr->compare(*(sR->fStr));
|
| + }
|
| +}
|
| +
|
| +void SPUStringPool::sort(UErrorCode &status) {
|
| + fVec->sort(SPUStringCompare, status);
|
| +}
|
| +
|
| +
|
| +SPUString *SPUStringPool::addString(UnicodeString *src, UErrorCode &status) {
|
| + SPUString *hashedString = static_cast<SPUString *>(uhash_get(fHash, src));
|
| + if (hashedString != NULL) {
|
| + delete src;
|
| + } else {
|
| + hashedString = new SPUString(src);
|
| + uhash_put(fHash, src, hashedString, &status);
|
| + fVec->addElement(hashedString, status);
|
| + }
|
| + return hashedString;
|
| +}
|
| +
|
| +
|
| +
|
| +ConfusabledataBuilder::ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status) :
|
| + fSpoofImpl(spImpl),
|
| + fInput(NULL),
|
| + fSLTable(NULL),
|
| + fSATable(NULL),
|
| + fMLTable(NULL),
|
| + fMATable(NULL),
|
| + fKeySet(NULL),
|
| + fKeyVec(NULL),
|
| + fValueVec(NULL),
|
| + fStringTable(NULL),
|
| + fStringLengthsTable(NULL),
|
| + stringPool(NULL),
|
| + fParseLine(NULL),
|
| + fParseHexNum(NULL),
|
| + fLineNum(0)
|
| +{
|
| + if (U_FAILURE(status)) {
|
| + return;
|
| + }
|
| + fSLTable = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &status);
|
| + fSATable = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &status);
|
| + fMLTable = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &status);
|
| + fMATable = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &status);
|
| + fKeySet = new UnicodeSet();
|
| + fKeyVec = new UVector(status);
|
| + fValueVec = new UVector(status);
|
| + stringPool = new SPUStringPool(status);
|
| +}
|
| +
|
| +
|
| +ConfusabledataBuilder::~ConfusabledataBuilder() {
|
| + uprv_free(fInput);
|
| + uregex_close(fParseLine);
|
| + uregex_close(fParseHexNum);
|
| + uhash_close(fSLTable);
|
| + uhash_close(fSATable);
|
| + uhash_close(fMLTable);
|
| + uhash_close(fMATable);
|
| + delete fKeySet;
|
| + delete fKeyVec;
|
| + delete fStringTable;
|
| + delete fStringLengthsTable;
|
| + delete fValueVec;
|
| + delete stringPool;
|
| +}
|
| +
|
| +
|
| +void ConfusabledataBuilder::buildConfusableData(SpoofImpl * spImpl, const char * confusables,
|
| + int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status) {
|
| +
|
| + if (U_FAILURE(status)) {
|
| + return;
|
| + }
|
| + ConfusabledataBuilder builder(spImpl, status);
|
| + builder.build(confusables, confusablesLen, status);
|
| + if (U_FAILURE(status) && errorType != NULL) {
|
| + *errorType = USPOOF_SINGLE_SCRIPT_CONFUSABLE;
|
| + pe->line = builder.fLineNum;
|
| + }
|
| +}
|
| +
|
| +
|
| +void ConfusabledataBuilder::build(const char * confusables, int32_t confusablesLen,
|
| + UErrorCode &status) {
|
| +
|
| + // Convert the user input data from UTF-8 to UChar (UTF-16)
|
| + int32_t inputLen = 0;
|
| + if (U_FAILURE(status)) {
|
| + return;
|
| + }
|
| + u_strFromUTF8(NULL, 0, &inputLen, confusables, confusablesLen, &status);
|
| + if (status != U_BUFFER_OVERFLOW_ERROR) {
|
| + return;
|
| + }
|
| + status = U_ZERO_ERROR;
|
| + fInput = static_cast<UChar *>(uprv_malloc((inputLen+1) * sizeof(UChar)));
|
| + if (fInput == NULL) {
|
| + status = U_MEMORY_ALLOCATION_ERROR;
|
| + }
|
| + u_strFromUTF8(fInput, inputLen+1, NULL, confusables, confusablesLen, &status);
|
| +
|
| +
|
| + // Regular Expression to parse a line from Confusables.txt. The expression will match
|
| + // any line. What was matched is determined by examining which capture groups have a match.
|
| + // Capture Group 1: the source char
|
| + // Capture Group 2: the replacement chars
|
| + // Capture Group 3-6 the table type, SL, SA, ML, or MA
|
| + // Capture Group 7: A blank or comment only line.
|
| + // Capture Group 8: A syntactically invalid line. Anything that didn't match before.
|
| + // Example Line from the confusables.txt source file:
|
| + // "1D702 ; 006E 0329 ; SL # MATHEMATICAL ITALIC SMALL ETA ... "
|
| + fParseLine = uregex_openC(
|
| + "(?m)^[ \\t]*([0-9A-Fa-f]+)[ \\t]+;" // Match the source char
|
| + "[ \\t]*([0-9A-Fa-f]+" // Match the replacement char(s)
|
| + "(?:[ \\t]+[0-9A-Fa-f]+)*)[ \\t]*;" // (continued)
|
| + "\\s*(?:(SL)|(SA)|(ML)|(MA))" // Match the table type
|
| + "[ \\t]*(?:#.*?)?$" // Match any trailing #comment
|
| + "|^([ \\t]*(?:#.*?)?)$" // OR match empty lines or lines with only a #comment
|
| + "|^(.*?)$", // OR match any line, which catches illegal lines.
|
| + 0, NULL, &status);
|
| +
|
| + // Regular expression for parsing a hex number out of a space-separated list of them.
|
| + // Capture group 1 gets the number, with spaces removed.
|
| + fParseHexNum = uregex_openC("\\s*([0-9A-F]+)", 0, NULL, &status);
|
| +
|
| + // Zap any Byte Order Mark at the start of input. Changing it to a space is benign
|
| + // given the syntax of the input.
|
| + if (*fInput == 0xfeff) {
|
| + *fInput = 0x20;
|
| + }
|
| +
|
| + // Parse the input, one line per iteration of this loop.
|
| + uregex_setText(fParseLine, fInput, inputLen, &status);
|
| + while (uregex_findNext(fParseLine, &status)) {
|
| + fLineNum++;
|
| + if (uregex_start(fParseLine, 7, &status) >= 0) {
|
| + // this was a blank or comment line.
|
| + continue;
|
| + }
|
| + if (uregex_start(fParseLine, 8, &status) >= 0) {
|
| + // input file syntax error.
|
| + status = U_PARSE_ERROR;
|
| + return;
|
| + }
|
| +
|
| + // We have a good input line. Extract the key character and mapping string, and
|
| + // put them into the appropriate mapping table.
|
| + UChar32 keyChar = SpoofImpl::ScanHex(fInput, uregex_start(fParseLine, 1, &status),
|
| + uregex_end(fParseLine, 1, &status), status);
|
| +
|
| + int32_t mapStringStart = uregex_start(fParseLine, 2, &status);
|
| + int32_t mapStringLength = uregex_end(fParseLine, 2, &status) - mapStringStart;
|
| + uregex_setText(fParseHexNum, &fInput[mapStringStart], mapStringLength, &status);
|
| +
|
| + UnicodeString *mapString = new UnicodeString();
|
| + if (mapString == NULL) {
|
| + status = U_MEMORY_ALLOCATION_ERROR;
|
| + return;
|
| + }
|
| + while (uregex_findNext(fParseHexNum, &status)) {
|
| + UChar32 c = SpoofImpl::ScanHex(&fInput[mapStringStart], uregex_start(fParseHexNum, 1, &status),
|
| + uregex_end(fParseHexNum, 1, &status), status);
|
| + mapString->append(c);
|
| + }
|
| + U_ASSERT(mapString->length() >= 1);
|
| +
|
| + // Put the map (value) string into the string pool
|
| + // This a little like a Java intern() - any duplicates will be eliminated.
|
| + SPUString *smapString = stringPool->addString(mapString, status);
|
| +
|
| + // Add the UChar32 -> string mapping to the appropriate table.
|
| + UHashtable *table = uregex_start(fParseLine, 3, &status) >= 0 ? fSLTable :
|
| + uregex_start(fParseLine, 4, &status) >= 0 ? fSATable :
|
| + uregex_start(fParseLine, 5, &status) >= 0 ? fMLTable :
|
| + uregex_start(fParseLine, 6, &status) >= 0 ? fMATable :
|
| + NULL;
|
| + U_ASSERT(table != NULL);
|
| + uhash_iput(table, keyChar, smapString, &status);
|
| + fKeySet->add(keyChar);
|
| + if (U_FAILURE(status)) {
|
| + return;
|
| + }
|
| + }
|
| +
|
| + // Input data is now all parsed and collected.
|
| + // Now create the run-time binary form of the data.
|
| + //
|
| + // This is done in two steps. First the data is assembled into vectors and strings,
|
| + // for ease of construction, then the contents of these collections are dumped
|
| + // into the actual raw-bytes data storage.
|
| +
|
| + // Build up the string array, and record the index of each string therein
|
| + // in the (build time only) string pool.
|
| + // Strings of length one are not entered into the strings array.
|
| + // At the same time, build up the string lengths table, which records the
|
| + // position in the string table of the first string of each length >= 4.
|
| + // (Strings in the table are sorted by length)
|
| + stringPool->sort(status);
|
| + fStringTable = new UnicodeString();
|
| + fStringLengthsTable = new UVector(status);
|
| + int32_t previousStringLength = 0;
|
| + int32_t previousStringIndex = 0;
|
| + int32_t poolSize = stringPool->size();
|
| + int32_t i;
|
| + for (i=0; i<poolSize; i++) {
|
| + SPUString *s = stringPool->getByIndex(i);
|
| + int32_t strLen = s->fStr->length();
|
| + int32_t strIndex = fStringTable->length();
|
| + U_ASSERT(strLen >= previousStringLength);
|
| + if (strLen == 1) {
|
| + // strings of length one do not get an entry in the string table.
|
| + // Keep the single string character itself here, which is the same
|
| + // convention that is used in the final run-time string table index.
|
| + s->fStrTableIndex = s->fStr->charAt(0);
|
| + } else {
|
| + if ((strLen > previousStringLength) && (previousStringLength >= 4)) {
|
| + fStringLengthsTable->addElement(previousStringIndex, status);
|
| + fStringLengthsTable->addElement(previousStringLength, status);
|
| + }
|
| + s->fStrTableIndex = strIndex;
|
| + fStringTable->append(*(s->fStr));
|
| + }
|
| + previousStringLength = strLen;
|
| + previousStringIndex = strIndex;
|
| + }
|
| + // Make the final entry to the string lengths table.
|
| + // (it holds an entry for the _last_ string of each length, so adding the
|
| + // final one doesn't happen in the main loop because no longer string was encountered.)
|
| + if (previousStringLength >= 4) {
|
| + fStringLengthsTable->addElement(previousStringIndex, status);
|
| + fStringLengthsTable->addElement(previousStringLength, status);
|
| + }
|
| +
|
| + // Construct the compile-time Key and Value tables
|
| + //
|
| + // For each key code point, check which mapping tables it applies to,
|
| + // and create the final data for the key & value structures.
|
| + //
|
| + // The four logical mapping tables are conflated into one combined table.
|
| + // If multiple logical tables have the same mapping for some key, they
|
| + // share a single entry in the combined table.
|
| + // If more than one mapping exists for the same key code point, multiple
|
| + // entries will be created in the table
|
| +
|
| + for (int32_t range=0; range<fKeySet->getRangeCount(); range++) {
|
| + // It is an oddity of the UnicodeSet API that simply enumerating the contained
|
| + // code points requires a nested loop.
|
| + for (UChar32 keyChar=fKeySet->getRangeStart(range);
|
| + keyChar <= fKeySet->getRangeEnd(range); keyChar++) {
|
| + addKeyEntry(keyChar, fSLTable, USPOOF_SL_TABLE_FLAG, status);
|
| + addKeyEntry(keyChar, fSATable, USPOOF_SA_TABLE_FLAG, status);
|
| + addKeyEntry(keyChar, fMLTable, USPOOF_ML_TABLE_FLAG, status);
|
| + addKeyEntry(keyChar, fMATable, USPOOF_MA_TABLE_FLAG, status);
|
| + }
|
| + }
|
| +
|
| + // Put the assembled data into the flat runtime array
|
| + outputData(status);
|
| +
|
| + // All of the intermediate allocated data belongs to the ConfusabledataBuilder
|
| + // object (this), and is deleted in the destructor.
|
| + return;
|
| +}
|
| +
|
| +//
|
| +// outputData The confusable data has been compiled and stored in intermediate
|
| +// collections and strings. Copy it from there to the final flat
|
| +// binary array.
|
| +//
|
| +// Note that as each section is added to the output data, the
|
| +// expand (reserveSpace() function will likely relocate it in memory.
|
| +// Be careful with pointers.
|
| +//
|
| +void ConfusabledataBuilder::outputData(UErrorCode &status) {
|
| +
|
| + U_ASSERT(fSpoofImpl->fSpoofData->fDataOwned == TRUE);
|
| +
|
| + // The Key Table
|
| + // While copying the keys to the runtime array,
|
| + // also sanity check that they are sorted.
|
| +
|
| + int32_t numKeys = fKeyVec->size();
|
| + int32_t *keys =
|
| + static_cast<int32_t *>(fSpoofImpl->fSpoofData->reserveSpace(numKeys*sizeof(int32_t), status));
|
| + if (U_FAILURE(status)) {
|
| + return;
|
| + }
|
| + int i;
|
| + int32_t previousKey = 0;
|
| + for (i=0; i<numKeys; i++) {
|
| + int32_t key = fKeyVec->elementAti(i);
|
| + U_ASSERT((key & 0x00ffffff) >= (previousKey & 0x00ffffff));
|
| + U_ASSERT((key & 0xff000000) != 0);
|
| + keys[i] = key;
|
| + previousKey = key;
|
| + }
|
| + SpoofDataHeader *rawData = fSpoofImpl->fSpoofData->fRawData;
|
| + rawData->fCFUKeys = (int32_t)((char *)keys - (char *)rawData);
|
| + rawData->fCFUKeysSize = numKeys;
|
| + fSpoofImpl->fSpoofData->fCFUKeys = keys;
|
| +
|
| +
|
| + // The Value Table, parallels the key table
|
| + int32_t numValues = fValueVec->size();
|
| + U_ASSERT(numKeys == numValues);
|
| + uint16_t *values =
|
| + static_cast<uint16_t *>(fSpoofImpl->fSpoofData->reserveSpace(numKeys*sizeof(uint16_t), status));
|
| + if (U_FAILURE(status)) {
|
| + return;
|
| + }
|
| + for (i=0; i<numValues; i++) {
|
| + uint32_t value = static_cast<uint32_t>(fValueVec->elementAti(i));
|
| + U_ASSERT(value < 0xffff);
|
| + values[i] = static_cast<uint16_t>(value);
|
| + }
|
| + rawData = fSpoofImpl->fSpoofData->fRawData;
|
| + rawData->fCFUStringIndex = (int32_t)((char *)values - (char *)rawData);
|
| + rawData->fCFUStringIndexSize = numValues;
|
| + fSpoofImpl->fSpoofData->fCFUValues = values;
|
| +
|
| + // The Strings Table.
|
| +
|
| + uint32_t stringsLength = fStringTable->length();
|
| + // Reserve an extra space so the string will be nul-terminated. This is
|
| + // only a convenience, for when debugging; it is not needed otherwise.
|
| + UChar *strings =
|
| + static_cast<UChar *>(fSpoofImpl->fSpoofData->reserveSpace(stringsLength*sizeof(UChar)+2, status));
|
| + if (U_FAILURE(status)) {
|
| + return;
|
| + }
|
| + fStringTable->extract(strings, stringsLength+1, status);
|
| + rawData = fSpoofImpl->fSpoofData->fRawData;
|
| + U_ASSERT(rawData->fCFUStringTable == 0);
|
| + rawData->fCFUStringTable = (int32_t)((char *)strings - (char *)rawData);
|
| + rawData->fCFUStringTableLen = stringsLength;
|
| + fSpoofImpl->fSpoofData->fCFUStrings = strings;
|
| +
|
| + // The String Lengths Table
|
| + // While copying into the runtime array do some sanity checks on the values
|
| + // Each complete entry contains two fields, an index and an offset.
|
| + // Lengths should increase with each entry.
|
| + // Offsets should be less than the size of the string table.
|
| + int32_t lengthTableLength = fStringLengthsTable->size();
|
| + uint16_t *stringLengths =
|
| + static_cast<uint16_t *>(fSpoofImpl->fSpoofData->reserveSpace(lengthTableLength*sizeof(uint16_t), status));
|
| + if (U_FAILURE(status)) {
|
| + return;
|
| + }
|
| + int32_t destIndex = 0;
|
| + uint32_t previousLength = 0;
|
| + for (i=0; i<lengthTableLength; i+=2) {
|
| + uint32_t offset = static_cast<uint32_t>(fStringLengthsTable->elementAti(i));
|
| + uint32_t length = static_cast<uint32_t>(fStringLengthsTable->elementAti(i+1));
|
| + U_ASSERT(offset < stringsLength);
|
| + U_ASSERT(length < 40);
|
| + U_ASSERT(length > previousLength);
|
| + stringLengths[destIndex++] = static_cast<uint16_t>(offset);
|
| + stringLengths[destIndex++] = static_cast<uint16_t>(length);
|
| + previousLength = length;
|
| + }
|
| + rawData = fSpoofImpl->fSpoofData->fRawData;
|
| + rawData->fCFUStringLengths = (int32_t)((char *)stringLengths - (char *)rawData);
|
| + // Note: StringLengthsSize in the raw data is the number of complete entries,
|
| + // each consisting of a pair of 16 bit values, hence the divide by 2.
|
| + rawData->fCFUStringLengthsSize = lengthTableLength / 2;
|
| + fSpoofImpl->fSpoofData->fCFUStringLengths =
|
| + reinterpret_cast<SpoofStringLengthsElement *>(stringLengths);
|
| +}
|
| +
|
| +
|
| +
|
| +// addKeyEntry Construction of the confusable Key and Mapping Values tables.
|
| +// This is an intermediate point in the building process.
|
| +// We already have the mappings in the hash tables fSLTable, etc.
|
| +// This function builds corresponding run-time style table entries into
|
| +// fKeyVec and fValueVec
|
| +
|
| +void ConfusabledataBuilder::addKeyEntry(
|
| + UChar32 keyChar, // The key character
|
| + UHashtable *table, // The table, one of SATable, MATable, etc.
|
| + int32_t tableFlag, // One of USPOOF_SA_TABLE_FLAG, etc.
|
| + UErrorCode &status) {
|
| +
|
| + SPUString *targetMapping = static_cast<SPUString *>(uhash_iget(table, keyChar));
|
| + if (targetMapping == NULL) {
|
| + // No mapping for this key character.
|
| + // (This function is called for all four tables for each key char that
|
| + // is seen anywhere, so this no entry cases are very much expected.)
|
| + return;
|
| + }
|
| +
|
| + // Check whether there is already an entry with the correct mapping.
|
| + // If so, simply set the flag in the keyTable saying that the existing entry
|
| + // applies to the table that we're doing now.
|
| +
|
| + UBool keyHasMultipleValues = FALSE;
|
| + int32_t i;
|
| + for (i=fKeyVec->size()-1; i>=0 ; i--) {
|
| + int32_t key = fKeyVec->elementAti(i);
|
| + if ((key & 0x0ffffff) != keyChar) {
|
| + // We have now checked all existing key entries for this key char (if any)
|
| + // without finding one with the same mapping.
|
| + break;
|
| + }
|
| + UnicodeString mapping = getMapping(i);
|
| + if (mapping == *(targetMapping->fStr)) {
|
| + // The run time entry we are currently testing has the correct mapping.
|
| + // Set the flag in it indicating that it applies to the new table also.
|
| + key |= tableFlag;
|
| + fKeyVec->setElementAt(key, i);
|
| + return;
|
| + }
|
| + keyHasMultipleValues = TRUE;
|
| + }
|
| +
|
| + // Need to add a new entry to the binary data being built for this mapping.
|
| + // Includes adding entries to both the key table and the parallel values table.
|
| +
|
| + int32_t newKey = keyChar | tableFlag;
|
| + if (keyHasMultipleValues) {
|
| + newKey |= USPOOF_KEY_MULTIPLE_VALUES;
|
| + }
|
| + int32_t adjustedMappingLength = targetMapping->fStr->length() - 1;
|
| + if (adjustedMappingLength>3) {
|
| + adjustedMappingLength = 3;
|
| + }
|
| + newKey |= adjustedMappingLength << USPOOF_KEY_LENGTH_SHIFT;
|
| +
|
| + int32_t newData = targetMapping->fStrTableIndex;
|
| +
|
| + fKeyVec->addElement(newKey, status);
|
| + fValueVec->addElement(newData, status);
|
| +
|
| + // If the preceding key entry is for the same key character (but with a different mapping)
|
| + // set the multiple-values flag on it.
|
| + if (keyHasMultipleValues) {
|
| + int32_t previousKeyIndex = fKeyVec->size() - 2;
|
| + int32_t previousKey = fKeyVec->elementAti(previousKeyIndex);
|
| + previousKey |= USPOOF_KEY_MULTIPLE_VALUES;
|
| + fKeyVec->setElementAt(previousKey, previousKeyIndex);
|
| + }
|
| +}
|
| +
|
| +
|
| +
|
| +UnicodeString ConfusabledataBuilder::getMapping(int32_t index) {
|
| + int32_t key = fKeyVec->elementAti(index);
|
| + int32_t value = fValueVec->elementAti(index);
|
| + int32_t length = USPOOF_KEY_LENGTH_FIELD(key);
|
| + int32_t lastIndexWithLen;
|
| + switch (length) {
|
| + case 0:
|
| + return UnicodeString(static_cast<UChar>(value));
|
| + case 1:
|
| + case 2:
|
| + return UnicodeString(*fStringTable, value, length+1);
|
| + case 3:
|
| + length = 0;
|
| + int32_t i;
|
| + for (i=0; i<fStringLengthsTable->size(); i+=2) {
|
| + lastIndexWithLen = fStringLengthsTable->elementAti(i);
|
| + if (value <= lastIndexWithLen) {
|
| + length = fStringLengthsTable->elementAti(i+1);
|
| + break;
|
| + }
|
| + }
|
| + U_ASSERT(length>=3);
|
| + return UnicodeString(*fStringTable, value, length);
|
| + default:
|
| + U_ASSERT(FALSE);
|
| + }
|
| + return UnicodeString();
|
| +}
|
| +
|
| +#endif
|
| +#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
|
| +
|
|
|
| Property changes on: icu46/source/i18n/uspoof_conf.cpp
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|