| Index: icu46/source/common/triedict.cpp
|
| ===================================================================
|
| --- icu46/source/common/triedict.cpp (revision 68397)
|
| +++ icu46/source/common/triedict.cpp (working copy)
|
| @@ -20,6 +20,7 @@
|
| #include "uvector.h"
|
| #include "uvectr32.h"
|
| #include "uarrsort.h"
|
| +#include "hash.h"
|
|
|
| //#define DEBUG_TRIE_DICT 1
|
|
|
| @@ -27,8 +28,13 @@
|
| #include <sys/times.h>
|
| #include <limits.h>
|
| #include <stdio.h>
|
| +#include <time.h>
|
| +#ifndef CLK_TCK
|
| +#define CLK_TCK CLOCKS_PER_SEC
|
| #endif
|
|
|
| +#endif
|
| +
|
| U_NAMESPACE_BEGIN
|
|
|
| /*******************************************************************
|
| @@ -45,6 +51,11 @@
|
| * MutableTrieDictionary
|
| */
|
|
|
| +//#define MAX_VALUE 65535
|
| +
|
| +// forward declaration
|
| +inline uint16_t scaleLogProbabilities(double logprob);
|
| +
|
| // Node structure for the ternary, uncompressed trie
|
| struct TernaryNode : public UMemory {
|
| UChar ch; // UTF-16 code unit
|
| @@ -77,7 +88,8 @@
|
| delete high;
|
| }
|
|
|
| -MutableTrieDictionary::MutableTrieDictionary( UChar median, UErrorCode &status ) {
|
| +MutableTrieDictionary::MutableTrieDictionary( UChar median, UErrorCode &status,
|
| + UBool containsValue /* = FALSE */ ) {
|
| // Start the trie off with something. Having the root node already present
|
| // cuts a special case out of the search/insertion functions.
|
| // Making it a median character cuts the worse case for searches from
|
| @@ -91,14 +103,19 @@
|
| if (U_SUCCESS(status) && fIter == NULL) {
|
| status = U_MEMORY_ALLOCATION_ERROR;
|
| }
|
| +
|
| + fValued = containsValue;
|
| }
|
|
|
| -MutableTrieDictionary::MutableTrieDictionary( UErrorCode &status ) {
|
| +MutableTrieDictionary::MutableTrieDictionary( UErrorCode &status,
|
| + UBool containsValue /* = false */ ) {
|
| fTrie = NULL;
|
| fIter = utext_openUChars(NULL, NULL, 0, &status);
|
| if (U_SUCCESS(status) && fIter == NULL) {
|
| status = U_MEMORY_ALLOCATION_ERROR;
|
| }
|
| +
|
| + fValued = containsValue;
|
| }
|
|
|
| MutableTrieDictionary::~MutableTrieDictionary() {
|
| @@ -108,12 +125,13 @@
|
|
|
| int32_t
|
| MutableTrieDictionary::search( UText *text,
|
| - int32_t maxLength,
|
| - int32_t *lengths,
|
| - int &count,
|
| - int limit,
|
| - TernaryNode *&parent,
|
| - UBool &pMatched ) const {
|
| + int32_t maxLength,
|
| + int32_t *lengths,
|
| + int &count,
|
| + int limit,
|
| + TernaryNode *&parent,
|
| + UBool &pMatched,
|
| + uint16_t *values /*=NULL*/) const {
|
| // TODO: current implementation works in UTF-16 space
|
| const TernaryNode *up = NULL;
|
| const TernaryNode *p = fTrie;
|
| @@ -121,6 +139,10 @@
|
| pMatched = TRUE;
|
| int i;
|
|
|
| + if (!fValued) {
|
| + values = NULL;
|
| + }
|
| +
|
| UChar uc = utext_current32(text);
|
| for (i = 0; i < maxLength && p != NULL; ++i) {
|
| while (p != NULL) {
|
| @@ -141,7 +163,11 @@
|
| break;
|
| }
|
| // Must be equal to get here
|
| - if (limit > 0 && (p->flags & kEndsWord)) {
|
| + if (limit > 0 && (p->flags > 0)) {
|
| + //is there a more efficient way to add values? ie. remove if stmt
|
| + if(values != NULL) {
|
| + values[mycount] = p->flags;
|
| + }
|
| lengths[mycount++] = i+1;
|
| --limit;
|
| }
|
| @@ -161,13 +187,14 @@
|
| void
|
| MutableTrieDictionary::addWord( const UChar *word,
|
| int32_t length,
|
| - UErrorCode &status ) {
|
| -#if 0
|
| - if (length <= 0) {
|
| + UErrorCode &status,
|
| + uint16_t value /* = 0 */ ) {
|
| + // dictionary cannot store zero values, would interfere with flags
|
| + if (length <= 0 || (!fValued && value > 0) || (fValued && value == 0)) {
|
| status = U_ILLEGAL_ARGUMENT_ERROR;
|
| return;
|
| }
|
| -#endif
|
| +
|
| TernaryNode *parent;
|
| UBool pMatched;
|
| int count;
|
| @@ -177,7 +204,7 @@
|
| matched = search(fIter, length, NULL, count, 0, parent, pMatched);
|
|
|
| while (matched++ < length) {
|
| - UChar32 uc = utext_next32(fIter); // TODO: supplemetary support?
|
| + UChar32 uc = utext_next32(fIter); // TODO: supplementary support?
|
| U_ASSERT(uc != U_SENTINEL);
|
| TernaryNode *newNode = new TernaryNode(uc);
|
| if (newNode == NULL) {
|
| @@ -199,30 +226,23 @@
|
| parent = newNode;
|
| }
|
|
|
| - parent->flags |= kEndsWord;
|
| -}
|
| -
|
| -#if 0
|
| -void
|
| -MutableTrieDictionary::addWords( UEnumeration *words,
|
| - UErrorCode &status ) {
|
| - int32_t length;
|
| - const UChar *word;
|
| - while ((word = uenum_unext(words, &length, &status)) && U_SUCCESS(status)) {
|
| - addWord(word, length, status);
|
| + if(fValued && value > 0){
|
| + parent->flags = value;
|
| + } else {
|
| + parent->flags |= kEndsWord;
|
| }
|
| }
|
| -#endif
|
|
|
| int32_t
|
| MutableTrieDictionary::matches( UText *text,
|
| int32_t maxLength,
|
| int32_t *lengths,
|
| int &count,
|
| - int limit ) const {
|
| + int limit,
|
| + uint16_t *values /*=NULL*/) const {
|
| TernaryNode *parent;
|
| UBool pMatched;
|
| - return search(text, maxLength, lengths, count, limit, parent, pMatched);
|
| + return search(text, maxLength, lengths, count, limit, parent, pMatched, values);
|
| }
|
|
|
| // Implementation of iteration for MutableTrieDictionary
|
| @@ -277,7 +297,7 @@
|
| break;
|
| }
|
| case kEqual:
|
| - emit = (node->flags & kEndsWord) != 0;
|
| + emit = node->flags > 0;
|
| equal = (node->equal != NULL);
|
| // If this node should be part of the next emitted string, append
|
| // the UChar to the string, and make sure we pop it when we come
|
| @@ -299,7 +319,7 @@
|
| }
|
| case kGreaterThan:
|
| // If this node's character is in the string, remove it.
|
| - if (node->equal != NULL || (node->flags & kEndsWord)) {
|
| + if (node->equal != NULL || node->flags > 0) {
|
| unistr.truncate(unistr.length()-1);
|
| }
|
| if (node->high != NULL) {
|
| @@ -354,14 +374,77 @@
|
| * CompactTrieDictionary
|
| */
|
|
|
| +//TODO further optimization:
|
| +// minimise size of trie with logprobs by storing values
|
| +// for terminal nodes directly in offsets[]
|
| +// --> calculating from next offset *might* be simpler, but would have to add
|
| +// one last offset for logprob of last node
|
| +// --> if calculate from current offset, need to factor in possible overflow
|
| +// as well.
|
| +// idea: store in offset, set first bit to indicate logprob storage-->won't
|
| +// have to access additional node
|
| +
|
| +// {'Dic', 1}, version 1: uses old header, no values
|
| +#define COMPACT_TRIE_MAGIC_1 0x44696301
|
| +// version 2: uses new header (more than 2^16 nodes), no values
|
| +#define COMPACT_TRIE_MAGIC_2 0x44696302
|
| +// version 3: uses new header, includes values
|
| +#define COMPACT_TRIE_MAGIC_3 0x44696303
|
| +
|
| struct CompactTrieHeader {
|
| uint32_t size; // Size of the data in bytes
|
| uint32_t magic; // Magic number (including version)
|
| + uint32_t nodeCount; // Number of entries in offsets[]
|
| + uint32_t root; // Node number of the root node
|
| + uint32_t offsets[1]; // Offsets to nodes from start of data
|
| +};
|
| +
|
| +// old version of CompactTrieHeader kept for backwards compatibility
|
| +struct CompactTrieHeaderV1 {
|
| + uint32_t size; // Size of the data in bytes
|
| + uint32_t magic; // Magic number (including version)
|
| uint16_t nodeCount; // Number of entries in offsets[]
|
| uint16_t root; // Node number of the root node
|
| - uint32_t offsets[1]; // Offsets to nodes from start of data
|
| + uint32_t offsets[1]; // Offsets to nodes from start of data
|
| };
|
|
|
| +// Helper class for managing CompactTrieHeader and CompactTrieHeaderV1
|
| +struct CompactTrieInfo {
|
| + uint32_t size; // Size of the data in bytes
|
| + uint32_t magic; // Magic number (including version)
|
| + uint32_t nodeCount; // Number of entries in offsets[]
|
| + uint32_t root; // Node number of the root node
|
| + uint32_t *offsets; // Offsets to nodes from start of data
|
| + uint8_t *address; // pointer to header bytes in memory
|
| +
|
| + CompactTrieInfo(const void *data, UErrorCode &status){
|
| + CompactTrieHeader *header = (CompactTrieHeader *) data;
|
| + if (header->magic != COMPACT_TRIE_MAGIC_1 &&
|
| + header->magic != COMPACT_TRIE_MAGIC_2 &&
|
| + header->magic != COMPACT_TRIE_MAGIC_3) {
|
| + status = U_ILLEGAL_ARGUMENT_ERROR;
|
| + } else {
|
| + size = header->size;
|
| + magic = header->magic;
|
| +
|
| + if (header->magic == COMPACT_TRIE_MAGIC_1) {
|
| + CompactTrieHeaderV1 *headerV1 = (CompactTrieHeaderV1 *) header;
|
| + nodeCount = headerV1->nodeCount;
|
| + root = headerV1->root;
|
| + offsets = &(headerV1->offsets[0]);
|
| + address = (uint8_t *)headerV1;
|
| + } else {
|
| + nodeCount = header->nodeCount;
|
| + root = header->root;
|
| + offsets = &(header->offsets[0]);
|
| + address = (uint8_t *)header;
|
| + }
|
| + }
|
| + }
|
| +
|
| + ~CompactTrieInfo(){}
|
| +};
|
| +
|
| // Note that to avoid platform-specific alignment issues, all members of the node
|
| // structures should be the same size, or should contain explicit padding to
|
| // natural alignment boundaries.
|
| @@ -375,10 +458,14 @@
|
| enum CompactTrieNodeFlags {
|
| kVerticalNode = 0x1000, // This is a vertical node
|
| kParentEndsWord = 0x2000, // The node whose equal link points to this ends a word
|
| - kReservedFlag1 = 0x4000,
|
| - kReservedFlag2 = 0x8000,
|
| + kExceedsCount = 0x4000, // new MSB for count >= 4096, originally kReservedFlag1
|
| + kEqualOverflows = 0x8000, // Links to nodeIDs > 2^16, orig. kReservedFlag2
|
| kCountMask = 0x0FFF, // The count portion of flagscount
|
| - kFlagMask = 0xF000 // The flags portion of flagscount
|
| + kFlagMask = 0xF000, // The flags portion of flagscount
|
| + kRootCountMask = 0x7FFF // The count portion of flagscount in the root node
|
| +
|
| + //offset flags:
|
| + //kOffsetContainsValue = 0x80000000 // Offset contains value for parent node
|
| };
|
|
|
| // The two node types are distinguished by the kVerticalNode flag.
|
| @@ -402,81 +489,216 @@
|
| uint16_t chars[1]; // Code units
|
| };
|
|
|
| -// {'Dic', 1}, version 1
|
| -#define COMPACT_TRIE_MAGIC_1 0x44696301
|
| -
|
| CompactTrieDictionary::CompactTrieDictionary(UDataMemory *dataObj,
|
| UErrorCode &status )
|
| : fUData(dataObj)
|
| {
|
| - fData = (const CompactTrieHeader *) udata_getMemory(dataObj);
|
| + fInfo = (CompactTrieInfo *)uprv_malloc(sizeof(CompactTrieInfo));
|
| + *fInfo = CompactTrieInfo(udata_getMemory(dataObj), status);
|
| fOwnData = FALSE;
|
| - if (fData->magic != COMPACT_TRIE_MAGIC_1) {
|
| - status = U_ILLEGAL_ARGUMENT_ERROR;
|
| - fData = NULL;
|
| - }
|
| }
|
| +
|
| CompactTrieDictionary::CompactTrieDictionary( const void *data,
|
| UErrorCode &status )
|
| : fUData(NULL)
|
| {
|
| - fData = (const CompactTrieHeader *) data;
|
| + fInfo = (CompactTrieInfo *)uprv_malloc(sizeof(CompactTrieInfo));
|
| + *fInfo = CompactTrieInfo(data, status);
|
| fOwnData = FALSE;
|
| - if (fData->magic != COMPACT_TRIE_MAGIC_1) {
|
| - status = U_ILLEGAL_ARGUMENT_ERROR;
|
| - fData = NULL;
|
| - }
|
| }
|
|
|
| CompactTrieDictionary::CompactTrieDictionary( const MutableTrieDictionary &dict,
|
| UErrorCode &status )
|
| : fUData(NULL)
|
| {
|
| - fData = compactMutableTrieDictionary(dict, status);
|
| + const CompactTrieHeader* header = compactMutableTrieDictionary(dict, status);
|
| + if (U_SUCCESS(status)) {
|
| + fInfo = (CompactTrieInfo *)uprv_malloc(sizeof(CompactTrieInfo));
|
| + *fInfo = CompactTrieInfo(header, status);
|
| + }
|
| +
|
| fOwnData = !U_FAILURE(status);
|
| }
|
|
|
| CompactTrieDictionary::~CompactTrieDictionary() {
|
| if (fOwnData) {
|
| - uprv_free((void *)fData);
|
| + uprv_free((void *)(fInfo->address));
|
| }
|
| + uprv_free((void *)fInfo);
|
| +
|
| if (fUData) {
|
| udata_close(fUData);
|
| }
|
| }
|
|
|
| +UBool CompactTrieDictionary::getValued() const{
|
| + return fInfo->magic == COMPACT_TRIE_MAGIC_3;
|
| +}
|
| +
|
| uint32_t
|
| CompactTrieDictionary::dataSize() const {
|
| - return fData->size;
|
| + return fInfo->size;
|
| }
|
|
|
| const void *
|
| CompactTrieDictionary::data() const {
|
| - return fData;
|
| + return fInfo->address;
|
| }
|
|
|
| -// This function finds the address of a node for us, given its node ID
|
| +//This function finds the address of a node for us, given its node ID
|
| static inline const CompactTrieNode *
|
| -getCompactNode(const CompactTrieHeader *header, uint16_t node) {
|
| - return (const CompactTrieNode *)((const uint8_t *)header + header->offsets[node]);
|
| +getCompactNode(const CompactTrieInfo *info, uint32_t node) {
|
| + if(node < info->root-1) {
|
| + return (const CompactTrieNode *)(&info->offsets[node]);
|
| + } else {
|
| + return (const CompactTrieNode *)(info->address + info->offsets[node]);
|
| + }
|
| }
|
|
|
| +//this version of getCompactNode is currently only used in compactMutableTrieDictionary()
|
| +static inline const CompactTrieNode *
|
| +getCompactNode(const CompactTrieHeader *header, uint32_t node) {
|
| + if(node < header->root-1) {
|
| + return (const CompactTrieNode *)(&header->offsets[node]);
|
| + } else {
|
| + return (const CompactTrieNode *)((const uint8_t *)header + header->offsets[node]);
|
| + }
|
| +}
|
| +
|
| +
|
| +/**
|
| + * Calculates the number of links in a node
|
| + * @node The specified node
|
| + */
|
| +static inline const uint16_t
|
| +getCount(const CompactTrieNode *node){
|
| + return (node->flagscount & kCountMask);
|
| + //use the code below if number of links ever exceed 4096
|
| + //return (node->flagscount & kCountMask) + ((node->flagscount & kExceedsCount) >> 2);
|
| +}
|
| +
|
| +/**
|
| + * calculates an equal link node ID of a horizontal node
|
| + * @hnode The horizontal node containing the equal link
|
| + * @param index The index into hnode->entries[]
|
| + * @param nodeCount The length of hnode->entries[]
|
| + */
|
| +static inline uint32_t calcEqualLink(const CompactTrieVerticalNode *vnode){
|
| + if(vnode->flagscount & kEqualOverflows){
|
| + // treat overflow bits as an extension of chars[]
|
| + uint16_t *overflow = (uint16_t *) &vnode->chars[getCount((CompactTrieNode*)vnode)];
|
| + return vnode->equal + (((uint32_t)*overflow) << 16);
|
| + }else{
|
| + return vnode->equal;
|
| + }
|
| +}
|
| +
|
| +/**
|
| + * calculates an equal link node ID of a horizontal node
|
| + * @hnode The horizontal node containing the equal link
|
| + * @param index The index into hnode->entries[]
|
| + * @param nodeCount The length of hnode->entries[]
|
| + */
|
| +static inline uint32_t calcEqualLink(const CompactTrieHorizontalNode *hnode, uint16_t index, uint16_t nodeCount){
|
| + if(hnode->flagscount & kEqualOverflows){
|
| + //set overflow to point to the uint16_t containing the overflow bits
|
| + uint16_t *overflow = (uint16_t *) &hnode->entries[nodeCount];
|
| + overflow += index/4;
|
| + uint16_t extraBits = (*overflow >> (3 - (index % 4)) * 4) % 0x10;
|
| + return hnode->entries[index].equal + (((uint32_t)extraBits) << 16);
|
| + } else {
|
| + return hnode->entries[index].equal;
|
| + }
|
| +}
|
| +
|
| +/**
|
| + * Returns the value stored in the specified node which is associated with its
|
| + * parent node.
|
| + * TODO: how to tell that value is stored in node or in offset? check whether
|
| + * node ID < fInfo->root!
|
| + */
|
| +static inline uint16_t getValue(const CompactTrieHorizontalNode *hnode){
|
| + uint16_t count = getCount((CompactTrieNode *)hnode);
|
| + uint16_t overflowSize = 0; //size of node ID overflow storage in bytes
|
| +
|
| + if(hnode->flagscount & kEqualOverflows)
|
| + overflowSize = (count + 3) / 4 * sizeof(uint16_t);
|
| + return *((uint16_t *)((uint8_t *)&hnode->entries[count] + overflowSize));
|
| +}
|
| +
|
| +static inline uint16_t getValue(const CompactTrieVerticalNode *vnode){
|
| + // calculate size of total node ID overflow storage in bytes
|
| + uint16_t overflowSize = (vnode->flagscount & kEqualOverflows)? sizeof(uint16_t) : 0;
|
| + return *((uint16_t *)((uint8_t *)&vnode->chars[getCount((CompactTrieNode *)vnode)] + overflowSize));
|
| +}
|
| +
|
| +static inline uint16_t getValue(const CompactTrieNode *node){
|
| + if(node->flagscount & kVerticalNode)
|
| + return getValue((const CompactTrieVerticalNode *)node);
|
| + else
|
| + return getValue((const CompactTrieHorizontalNode *)node);
|
| +}
|
| +
|
| +//returns index of match in CompactTrieHorizontalNode.entries[] using binary search
|
| +inline int16_t
|
| +searchHorizontalEntries(const CompactTrieHorizontalEntry *entries,
|
| + UChar uc, uint16_t nodeCount){
|
| + int low = 0;
|
| + int high = nodeCount-1;
|
| + int middle;
|
| + while (high >= low) {
|
| + middle = (high+low)/2;
|
| + if (uc == entries[middle].ch) {
|
| + return middle;
|
| + }
|
| + else if (uc < entries[middle].ch) {
|
| + high = middle-1;
|
| + }
|
| + else {
|
| + low = middle+1;
|
| + }
|
| + }
|
| +
|
| + return -1;
|
| +}
|
| +
|
| int32_t
|
| CompactTrieDictionary::matches( UText *text,
|
| int32_t maxLength,
|
| int32_t *lengths,
|
| int &count,
|
| - int limit ) const {
|
| + int limit,
|
| + uint16_t *values /*= NULL*/) const {
|
| + if (fInfo->magic == COMPACT_TRIE_MAGIC_2)
|
| + values = NULL;
|
| +
|
| // TODO: current implementation works in UTF-16 space
|
| - const CompactTrieNode *node = getCompactNode(fData, fData->root);
|
| + const CompactTrieNode *node = getCompactNode(fInfo, fInfo->root);
|
| int mycount = 0;
|
|
|
| UChar uc = utext_current32(text);
|
| int i = 0;
|
|
|
| + // handle root node with only kEqualOverflows flag: assume horizontal node without parent
|
| + if(node != NULL){
|
| + const CompactTrieHorizontalNode *root = (const CompactTrieHorizontalNode *) node;
|
| + int index = searchHorizontalEntries(root->entries, uc, root->flagscount & kRootCountMask);
|
| + if(index > -1){
|
| + node = getCompactNode(fInfo, calcEqualLink(root, index, root->flagscount & kRootCountMask));
|
| + utext_next32(text);
|
| + uc = utext_current32(text);
|
| + ++i;
|
| + }else{
|
| + node = NULL;
|
| + }
|
| + }
|
| +
|
| while (node != NULL) {
|
| // Check if the node we just exited ends a word
|
| if (limit > 0 && (node->flagscount & kParentEndsWord)) {
|
| + if(values != NULL){
|
| + values[mycount] = getValue(node);
|
| + }
|
| lengths[mycount++] = i;
|
| --limit;
|
| }
|
| @@ -487,7 +709,7 @@
|
| break;
|
| }
|
|
|
| - int nodeCount = (node->flagscount & kCountMask);
|
| + int nodeCount = getCount(node);
|
| if (nodeCount == 0) {
|
| // Special terminal node; return now
|
| break;
|
| @@ -507,35 +729,27 @@
|
| // To get here we must have come through the whole list successfully;
|
| // go on to the next node. Note that a word cannot end in the middle
|
| // of a vertical node.
|
| - node = getCompactNode(fData, vnode->equal);
|
| + node = getCompactNode(fInfo, calcEqualLink(vnode));
|
| }
|
| else {
|
| // Horizontal node; do binary search
|
| const CompactTrieHorizontalNode *hnode = (const CompactTrieHorizontalNode *)node;
|
| - int low = 0;
|
| - int high = nodeCount-1;
|
| - int middle;
|
| - node = NULL; // If we don't find a match, we'll fall out of the loop
|
| - while (high >= low) {
|
| - middle = (high+low)/2;
|
| - if (uc == hnode->entries[middle].ch) {
|
| - // We hit a match; get the next node and next character
|
| - node = getCompactNode(fData, hnode->entries[middle].equal);
|
| - utext_next32(text);
|
| - uc = utext_current32(text);
|
| - ++i;
|
| - break;
|
| - }
|
| - else if (uc < hnode->entries[middle].ch) {
|
| - high = middle-1;
|
| - }
|
| - else {
|
| - low = middle+1;
|
| - }
|
| + const CompactTrieHorizontalEntry *entries;
|
| + entries = hnode->entries;
|
| +
|
| + int index = searchHorizontalEntries(entries, uc, nodeCount);
|
| + if(index > -1){ //
|
| + // We hit a match; get the next node and next character
|
| + node = getCompactNode(fInfo, calcEqualLink(hnode, index, nodeCount));
|
| + utext_next32(text);
|
| + uc = utext_current32(text);
|
| + ++i;
|
| + }else{
|
| + node = NULL; // If we don't find a match, we'll fall out of the loop
|
| }
|
| }
|
| }
|
| -exit:
|
| + exit:
|
| count = mycount;
|
| return i;
|
| }
|
| @@ -545,16 +759,16 @@
|
| private:
|
| UVector32 fNodeStack; // Stack of nodes to process
|
| UVector32 fIndexStack; // Stack of where in node we are
|
| - const CompactTrieHeader *fHeader; // Trie data
|
| + const CompactTrieInfo *fInfo; // Trie data
|
|
|
| public:
|
| static UClassID U_EXPORT2 getStaticClassID(void);
|
| virtual UClassID getDynamicClassID(void) const;
|
| public:
|
| - CompactTrieEnumeration(const CompactTrieHeader *header, UErrorCode &status)
|
| + CompactTrieEnumeration(const CompactTrieInfo *info, UErrorCode &status)
|
| : fNodeStack(status), fIndexStack(status) {
|
| - fHeader = header;
|
| - fNodeStack.push(header->root, status);
|
| + fInfo = info;
|
| + fNodeStack.push(info->root, status);
|
| fIndexStack.push(0, status);
|
| unistr.remove();
|
| }
|
| @@ -564,14 +778,14 @@
|
|
|
| virtual StringEnumeration *clone() const {
|
| UErrorCode status = U_ZERO_ERROR;
|
| - return new CompactTrieEnumeration(fHeader, status);
|
| + return new CompactTrieEnumeration(fInfo, status);
|
| }
|
|
|
| virtual const UnicodeString * snext(UErrorCode &status);
|
|
|
| // Very expensive, but this should never be used.
|
| virtual int32_t count(UErrorCode &status) const {
|
| - CompactTrieEnumeration counter(fHeader, status);
|
| + CompactTrieEnumeration counter(fInfo, status);
|
| int32_t result = 0;
|
| while (counter.snext(status) != NULL && U_SUCCESS(status)) {
|
| ++result;
|
| @@ -582,7 +796,7 @@
|
| virtual void reset(UErrorCode &status) {
|
| fNodeStack.removeAllElements();
|
| fIndexStack.removeAllElements();
|
| - fNodeStack.push(fHeader->root, status);
|
| + fNodeStack.push(fInfo->root, status);
|
| fIndexStack.push(0, status);
|
| unistr.remove();
|
| }
|
| @@ -595,26 +809,34 @@
|
| if (fNodeStack.empty() || U_FAILURE(status)) {
|
| return NULL;
|
| }
|
| - const CompactTrieNode *node = getCompactNode(fHeader, fNodeStack.peeki());
|
| + const CompactTrieNode *node = getCompactNode(fInfo, fNodeStack.peeki());
|
| int where = fIndexStack.peeki();
|
| while (!fNodeStack.empty() && U_SUCCESS(status)) {
|
| - int nodeCount = (node->flagscount & kCountMask);
|
| + int nodeCount;
|
| +
|
| + bool isRoot = fNodeStack.peeki() == static_cast<int32_t>(fInfo->root);
|
| + if(isRoot){
|
| + nodeCount = node->flagscount & kRootCountMask;
|
| + } else {
|
| + nodeCount = getCount(node);
|
| + }
|
| +
|
| UBool goingDown = FALSE;
|
| if (nodeCount == 0) {
|
| // Terminal node; go up immediately
|
| fNodeStack.popi();
|
| fIndexStack.popi();
|
| - node = getCompactNode(fHeader, fNodeStack.peeki());
|
| + node = getCompactNode(fInfo, fNodeStack.peeki());
|
| where = fIndexStack.peeki();
|
| }
|
| - else if (node->flagscount & kVerticalNode) {
|
| + else if ((node->flagscount & kVerticalNode) && !isRoot) {
|
| // Vertical node
|
| const CompactTrieVerticalNode *vnode = (const CompactTrieVerticalNode *)node;
|
| if (where == 0) {
|
| // Going down
|
| - unistr.append((const UChar *)vnode->chars, (int32_t) nodeCount);
|
| + unistr.append((const UChar *)vnode->chars, nodeCount);
|
| fIndexStack.setElementAt(1, fIndexStack.size()-1);
|
| - node = getCompactNode(fHeader, fNodeStack.push(vnode->equal, status));
|
| + node = getCompactNode(fInfo, fNodeStack.push(calcEqualLink(vnode), status));
|
| where = fIndexStack.push(0, status);
|
| goingDown = TRUE;
|
| }
|
| @@ -623,7 +845,7 @@
|
| unistr.truncate(unistr.length()-nodeCount);
|
| fNodeStack.popi();
|
| fIndexStack.popi();
|
| - node = getCompactNode(fHeader, fNodeStack.peeki());
|
| + node = getCompactNode(fInfo, fNodeStack.peeki());
|
| where = fIndexStack.peeki();
|
| }
|
| }
|
| @@ -638,7 +860,7 @@
|
| // Push on next node
|
| unistr.append((UChar)hnode->entries[where].ch);
|
| fIndexStack.setElementAt(where+1, fIndexStack.size()-1);
|
| - node = getCompactNode(fHeader, fNodeStack.push(hnode->entries[where].equal, status));
|
| + node = getCompactNode(fInfo, fNodeStack.push(calcEqualLink(hnode, where, nodeCount), status));
|
| where = fIndexStack.push(0, status);
|
| goingDown = TRUE;
|
| }
|
| @@ -646,12 +868,14 @@
|
| // Going up
|
| fNodeStack.popi();
|
| fIndexStack.popi();
|
| - node = getCompactNode(fHeader, fNodeStack.peeki());
|
| + node = getCompactNode(fInfo, fNodeStack.peeki());
|
| where = fIndexStack.peeki();
|
| }
|
| }
|
| +
|
| // Check if the parent of the node we've just gone down to ends a
|
| // word. If so, return it.
|
| + // The root node should never end up here.
|
| if (goingDown && (node->flagscount & kParentEndsWord)) {
|
| return &unistr;
|
| }
|
| @@ -664,7 +888,7 @@
|
| if (U_FAILURE(status)) {
|
| return NULL;
|
| }
|
| - return new CompactTrieEnumeration(fData, status);
|
| + return new CompactTrieEnumeration(fInfo, status);
|
| }
|
|
|
| //
|
| @@ -672,21 +896,36 @@
|
| // and back again
|
| //
|
|
|
| -// Helper classes to construct the compact trie
|
| +enum CompactTrieNodeType {
|
| + kHorizontalType = 0,
|
| + kVerticalType = 1,
|
| + kValueType = 2
|
| +};
|
| +
|
| +/**
|
| + * The following classes (i.e. BuildCompactTrie*Node) are helper classes to
|
| + * construct the compact trie by storing information for each node and later
|
| + * writing the node to memory in a sequential format.
|
| + */
|
| class BuildCompactTrieNode: public UMemory {
|
| - public:
|
| +public:
|
| UBool fParentEndsWord;
|
| - UBool fVertical;
|
| + CompactTrieNodeType fNodeType;
|
| UBool fHasDuplicate;
|
| + UBool fEqualOverflows;
|
| int32_t fNodeID;
|
| UnicodeString fChars;
|
| + uint16_t fValue;
|
|
|
| - public:
|
| - BuildCompactTrieNode(UBool parentEndsWord, UBool vertical, UStack &nodes, UErrorCode &status) {
|
| +public:
|
| + BuildCompactTrieNode(UBool parentEndsWord, CompactTrieNodeType nodeType,
|
| + UStack &nodes, UErrorCode &status, uint16_t value = 0) {
|
| fParentEndsWord = parentEndsWord;
|
| fHasDuplicate = FALSE;
|
| - fVertical = vertical;
|
| + fNodeType = nodeType;
|
| + fEqualOverflows = FALSE;
|
| fNodeID = nodes.size();
|
| + fValue = parentEndsWord? value : 0;
|
| nodes.push(this, status);
|
| }
|
|
|
| @@ -694,87 +933,225 @@
|
| }
|
|
|
| virtual uint32_t size() {
|
| - return sizeof(uint16_t);
|
| + if(fValue > 0)
|
| + return sizeof(uint16_t) * 2;
|
| + else
|
| + return sizeof(uint16_t);
|
| }
|
|
|
| virtual void write(uint8_t *bytes, uint32_t &offset, const UVector32 &/*translate*/) {
|
| // Write flag/count
|
| - *((uint16_t *)(bytes+offset)) = (fChars.length() & kCountMask)
|
| - | (fVertical ? kVerticalNode : 0) | (fParentEndsWord ? kParentEndsWord : 0 );
|
| +
|
| + // if this ever fails, a flag bit (i.e. kExceedsCount) will need to be
|
| + // used as a 5th MSB.
|
| + U_ASSERT(fChars.length() < 4096 || fNodeID == 2);
|
| +
|
| + *((uint16_t *)(bytes+offset)) = (fEqualOverflows? kEqualOverflows : 0) |
|
| + ((fNodeID == 2)? (fChars.length() & kRootCountMask):
|
| + (
|
| + (fChars.length() & kCountMask) |
|
| + //((fChars.length() << 2) & kExceedsCount) |
|
| + (fNodeType == kVerticalType ? kVerticalNode : 0) |
|
| + (fParentEndsWord ? kParentEndsWord : 0 )
|
| + )
|
| + );
|
| offset += sizeof(uint16_t);
|
| }
|
| +
|
| + virtual void writeValue(uint8_t *bytes, uint32_t &offset) {
|
| + if(fValue > 0){
|
| + *((uint16_t *)(bytes+offset)) = fValue;
|
| + offset += sizeof(uint16_t);
|
| + }
|
| + }
|
| +
|
| };
|
|
|
| +/**
|
| + * Stores value of parent terminating nodes that have no more subtries.
|
| + */
|
| +class BuildCompactTrieValueNode: public BuildCompactTrieNode {
|
| +public:
|
| + BuildCompactTrieValueNode(UStack &nodes, UErrorCode &status, uint16_t value)
|
| + : BuildCompactTrieNode(TRUE, kValueType, nodes, status, value){
|
| + }
|
| +
|
| + virtual ~BuildCompactTrieValueNode(){
|
| + }
|
| +
|
| + virtual uint32_t size() {
|
| + return sizeof(uint16_t) * 2;
|
| + }
|
| +
|
| + virtual void write(uint8_t *bytes, uint32_t &offset, const UVector32 &translate) {
|
| + // don't write value directly to memory but store it in offset to be written later
|
| + //offset = fValue & kOffsetContainsValue;
|
| + BuildCompactTrieNode::write(bytes, offset, translate);
|
| + BuildCompactTrieNode::writeValue(bytes, offset);
|
| + }
|
| +};
|
| +
|
| class BuildCompactTrieHorizontalNode: public BuildCompactTrieNode {
|
| public:
|
| UStack fLinks;
|
| + UBool fMayOverflow; //intermediate value for fEqualOverflows
|
|
|
| public:
|
| - BuildCompactTrieHorizontalNode(UBool parentEndsWord, UStack &nodes, UErrorCode &status)
|
| - : BuildCompactTrieNode(parentEndsWord, FALSE, nodes, status), fLinks(status) {
|
| + BuildCompactTrieHorizontalNode(UBool parentEndsWord, UStack &nodes, UErrorCode &status, uint16_t value = 0)
|
| + : BuildCompactTrieNode(parentEndsWord, kHorizontalType, nodes, status, value), fLinks(status) {
|
| + fMayOverflow = FALSE;
|
| }
|
|
|
| virtual ~BuildCompactTrieHorizontalNode() {
|
| }
|
|
|
| + // It is impossible to know beforehand exactly how much space the node will
|
| + // need in memory before being written, because the node IDs in the equal
|
| + // links may or may not overflow after node coalescing. Therefore, this method
|
| + // returns the maximum size possible for the node.
|
| virtual uint32_t size() {
|
| - return offsetof(CompactTrieHorizontalNode,entries) +
|
| - (fChars.length()*sizeof(CompactTrieHorizontalEntry));
|
| + uint32_t estimatedSize = offsetof(CompactTrieHorizontalNode,entries) +
|
| + (fChars.length()*sizeof(CompactTrieHorizontalEntry));
|
| +
|
| + if(fValue > 0)
|
| + estimatedSize += sizeof(uint16_t);
|
| +
|
| + //estimate extra space needed to store overflow for node ID links
|
| + //may be more than what is actually needed
|
| + for(int i=0; i < fChars.length(); i++){
|
| + if(((BuildCompactTrieNode *)fLinks[i])->fNodeID > 0xFFFF){
|
| + fMayOverflow = TRUE;
|
| + break;
|
| + }
|
| + }
|
| + if(fMayOverflow) // added space for overflow should be same as ceil(fChars.length()/4) * sizeof(uint16_t)
|
| + estimatedSize += (sizeof(uint16_t) * fChars.length() + 2)/4;
|
| +
|
| + return estimatedSize;
|
| }
|
|
|
| virtual void write(uint8_t *bytes, uint32_t &offset, const UVector32 &translate) {
|
| + int32_t count = fChars.length();
|
| +
|
| + //if largest nodeID > 2^16, set flag
|
| + //large node IDs are more likely to be at the back of the array
|
| + for (int32_t i = count-1; i >= 0; --i) {
|
| + if(translate.elementAti(((BuildCompactTrieNode *)fLinks[i])->fNodeID) > 0xFFFF){
|
| + fEqualOverflows = TRUE;
|
| + break;
|
| + }
|
| + }
|
| +
|
| BuildCompactTrieNode::write(bytes, offset, translate);
|
| - int32_t count = fChars.length();
|
| +
|
| + // write entries[] to memory
|
| for (int32_t i = 0; i < count; ++i) {
|
| CompactTrieHorizontalEntry *entry = (CompactTrieHorizontalEntry *)(bytes+offset);
|
| entry->ch = fChars[i];
|
| entry->equal = translate.elementAti(((BuildCompactTrieNode *)fLinks[i])->fNodeID);
|
| #ifdef DEBUG_TRIE_DICT
|
| - if (entry->equal == 0) {
|
| +
|
| + if ((entry->equal == 0) && !fEqualOverflows) {
|
| fprintf(stderr, "ERROR: horizontal link %d, logical node %d maps to physical node zero\n",
|
| i, ((BuildCompactTrieNode *)fLinks[i])->fNodeID);
|
| }
|
| #endif
|
| offset += sizeof(CompactTrieHorizontalEntry);
|
| }
|
| +
|
| + // append extra bits of equal nodes to end if fEqualOverflows
|
| + if (fEqualOverflows) {
|
| + uint16_t leftmostBits = 0;
|
| + for (int16_t i = 0; i < count; i++) {
|
| + leftmostBits = (leftmostBits << 4) | getLeftmostBits(translate, i);
|
| +
|
| + // write filled uint16_t to memory
|
| + if(i % 4 == 3){
|
| + *((uint16_t *)(bytes+offset)) = leftmostBits;
|
| + leftmostBits = 0;
|
| + offset += sizeof(uint16_t);
|
| + }
|
| + }
|
| +
|
| + // pad last uint16_t with zeroes if necessary
|
| + int remainder = count % 4;
|
| + if (remainder > 0) {
|
| + *((uint16_t *)(bytes+offset)) = (leftmostBits << (16 - 4 * remainder));
|
| + offset += sizeof(uint16_t);
|
| + }
|
| + }
|
| +
|
| + BuildCompactTrieNode::writeValue(bytes, offset);
|
| }
|
| +
|
| + // returns leftmost bits of physical node link
|
| + uint16_t getLeftmostBits(const UVector32 &translate, uint32_t i){
|
| + uint16_t leftmostBits = (uint16_t) (translate.elementAti(((BuildCompactTrieNode *)fLinks[i])->fNodeID) >> 16);
|
| +#ifdef DEBUG_TRIE_DICT
|
| + if (leftmostBits > 0xF) {
|
| + fprintf(stderr, "ERROR: horizontal link %d, logical node %d exceeds maximum possible node ID value\n",
|
| + i, ((BuildCompactTrieNode *)fLinks[i])->fNodeID);
|
| + }
|
| +#endif
|
| + return leftmostBits;
|
| + }
|
|
|
| void addNode(UChar ch, BuildCompactTrieNode *link, UErrorCode &status) {
|
| fChars.append(ch);
|
| fLinks.push(link, status);
|
| }
|
| +
|
| };
|
|
|
| class BuildCompactTrieVerticalNode: public BuildCompactTrieNode {
|
| - public:
|
| +public:
|
| BuildCompactTrieNode *fEqual;
|
|
|
| - public:
|
| - BuildCompactTrieVerticalNode(UBool parentEndsWord, UStack &nodes, UErrorCode &status)
|
| - : BuildCompactTrieNode(parentEndsWord, TRUE, nodes, status) {
|
| +public:
|
| + BuildCompactTrieVerticalNode(UBool parentEndsWord, UStack &nodes, UErrorCode &status, uint16_t value = 0)
|
| + : BuildCompactTrieNode(parentEndsWord, kVerticalType, nodes, status, value) {
|
| fEqual = NULL;
|
| }
|
|
|
| virtual ~BuildCompactTrieVerticalNode() {
|
| }
|
|
|
| + // Returns the maximum possible size of this node. See comment in
|
| + // BuildCompactTrieHorizontal node for more information.
|
| virtual uint32_t size() {
|
| - return offsetof(CompactTrieVerticalNode,chars) + (fChars.length()*sizeof(uint16_t));
|
| + uint32_t estimatedSize = offsetof(CompactTrieVerticalNode,chars) + (fChars.length()*sizeof(uint16_t));
|
| + if(fValue > 0){
|
| + estimatedSize += sizeof(uint16_t);
|
| + }
|
| +
|
| + if(fEqual->fNodeID > 0xFFFF){
|
| + estimatedSize += sizeof(uint16_t);
|
| + }
|
| + return estimatedSize;
|
| }
|
|
|
| virtual void write(uint8_t *bytes, uint32_t &offset, const UVector32 &translate) {
|
| CompactTrieVerticalNode *node = (CompactTrieVerticalNode *)(bytes+offset);
|
| + fEqualOverflows = (translate.elementAti(fEqual->fNodeID) > 0xFFFF);
|
| BuildCompactTrieNode::write(bytes, offset, translate);
|
| node->equal = translate.elementAti(fEqual->fNodeID);
|
| offset += sizeof(node->equal);
|
| #ifdef DEBUG_TRIE_DICT
|
| - if (node->equal == 0) {
|
| + if ((node->equal == 0) && !fEqualOverflows) {
|
| fprintf(stderr, "ERROR: vertical link, logical node %d maps to physical node zero\n",
|
| fEqual->fNodeID);
|
| }
|
| #endif
|
| fChars.extract(0, fChars.length(), (UChar *)node->chars);
|
| - offset += sizeof(uint16_t)*fChars.length();
|
| + offset += sizeof(UChar)*fChars.length();
|
| +
|
| + // append 16 bits of to end for equal node if fEqualOverflows
|
| + if (fEqualOverflows) {
|
| + *((uint16_t *)(bytes+offset)) = (translate.elementAti(fEqual->fNodeID) >> 16);
|
| + offset += sizeof(uint16_t);
|
| + }
|
| +
|
| + BuildCompactTrieNode::writeValue(bytes, offset);
|
| }
|
|
|
| void addChar(UChar ch) {
|
| @@ -784,60 +1161,85 @@
|
| void setLink(BuildCompactTrieNode *node) {
|
| fEqual = node;
|
| }
|
| +
|
| };
|
|
|
| // Forward declaration
|
| static void walkHorizontal(const TernaryNode *node,
|
| BuildCompactTrieHorizontalNode *building,
|
| UStack &nodes,
|
| - UErrorCode &status);
|
| + UErrorCode &status,
|
| + Hashtable *values);
|
|
|
| -// Convert one node. Uses recursion.
|
| +// Convert one TernaryNode into a BuildCompactTrieNode. Uses recursion.
|
|
|
| static BuildCompactTrieNode *
|
| -compactOneNode(const TernaryNode *node, UBool parentEndsWord, UStack &nodes, UErrorCode &status) {
|
| +compactOneNode(const TernaryNode *node, UBool parentEndsWord, UStack &nodes,
|
| + UErrorCode &status, Hashtable *values = NULL, uint16_t parentValue = 0) {
|
| if (U_FAILURE(status)) {
|
| return NULL;
|
| }
|
| BuildCompactTrieNode *result = NULL;
|
| UBool horizontal = (node->low != NULL || node->high != NULL);
|
| if (horizontal) {
|
| - BuildCompactTrieHorizontalNode *hResult =
|
| - new BuildCompactTrieHorizontalNode(parentEndsWord, nodes, status);
|
| + BuildCompactTrieHorizontalNode *hResult;
|
| + if(values != NULL){
|
| + hResult = new BuildCompactTrieHorizontalNode(parentEndsWord, nodes, status, parentValue);
|
| + } else {
|
| + hResult = new BuildCompactTrieHorizontalNode(parentEndsWord, nodes, status);
|
| + }
|
| +
|
| if (hResult == NULL) {
|
| status = U_MEMORY_ALLOCATION_ERROR;
|
| return NULL;
|
| }
|
| if (U_SUCCESS(status)) {
|
| - walkHorizontal(node, hResult, nodes, status);
|
| + walkHorizontal(node, hResult, nodes, status, values);
|
| result = hResult;
|
| }
|
| }
|
| else {
|
| - BuildCompactTrieVerticalNode *vResult =
|
| - new BuildCompactTrieVerticalNode(parentEndsWord, nodes, status);
|
| + BuildCompactTrieVerticalNode *vResult;
|
| + if(values != NULL){
|
| + vResult = new BuildCompactTrieVerticalNode(parentEndsWord, nodes, status, parentValue);
|
| + } else {
|
| + vResult = new BuildCompactTrieVerticalNode(parentEndsWord, nodes, status);
|
| + }
|
| +
|
| if (vResult == NULL) {
|
| status = U_MEMORY_ALLOCATION_ERROR;
|
| + return NULL;
|
| }
|
| else if (U_SUCCESS(status)) {
|
| - UBool endsWord = FALSE;
|
| + uint16_t value = 0;
|
| + UBool endsWord = FALSE;
|
| // Take up nodes until we end a word, or hit a node with < or > links
|
| do {
|
| vResult->addChar(node->ch);
|
| - endsWord = (node->flags & kEndsWord) != 0;
|
| + value = node->flags;
|
| + endsWord = value > 0;
|
| node = node->equal;
|
| }
|
| while(node != NULL && !endsWord && node->low == NULL && node->high == NULL);
|
| +
|
| if (node == NULL) {
|
| if (!endsWord) {
|
| status = U_ILLEGAL_ARGUMENT_ERROR; // Corrupt input trie
|
| }
|
| - else {
|
| + else if(values != NULL){
|
| + UnicodeString key(value); //store value as a single-char UnicodeString
|
| + BuildCompactTrieValueNode *link = (BuildCompactTrieValueNode *) values->get(key);
|
| + if(link == NULL){
|
| + link = new BuildCompactTrieValueNode(nodes, status, value); //take out nodes?
|
| + values->put(key, link, status);
|
| + }
|
| + vResult->setLink(link);
|
| + } else {
|
| vResult->setLink((BuildCompactTrieNode *)nodes[1]);
|
| }
|
| }
|
| else {
|
| - vResult->setLink(compactOneNode(node, endsWord, nodes, status));
|
| + vResult->setLink(compactOneNode(node, endsWord, nodes, status, values, value));
|
| }
|
| result = vResult;
|
| }
|
| @@ -849,19 +1251,28 @@
|
| // Uses recursion.
|
|
|
| static void walkHorizontal(const TernaryNode *node,
|
| - BuildCompactTrieHorizontalNode *building,
|
| - UStack &nodes,
|
| - UErrorCode &status) {
|
| + BuildCompactTrieHorizontalNode *building,
|
| + UStack &nodes,
|
| + UErrorCode &status, Hashtable *values = NULL) {
|
| while (U_SUCCESS(status) && node != NULL) {
|
| if (node->low != NULL) {
|
| - walkHorizontal(node->low, building, nodes, status);
|
| + walkHorizontal(node->low, building, nodes, status, values);
|
| }
|
| BuildCompactTrieNode *link = NULL;
|
| if (node->equal != NULL) {
|
| - link = compactOneNode(node->equal, (node->flags & kEndsWord) != 0, nodes, status);
|
| + link = compactOneNode(node->equal, node->flags > 0, nodes, status, values, node->flags);
|
| }
|
| - else if (node->flags & kEndsWord) {
|
| - link = (BuildCompactTrieNode *)nodes[1];
|
| + else if (node->flags > 0) {
|
| + if(values != NULL) {
|
| + UnicodeString key(node->flags); //store value as a single-char UnicodeString
|
| + link = (BuildCompactTrieValueNode *) values->get(key);
|
| + if(link == NULL) {
|
| + link = new BuildCompactTrieValueNode(nodes, status, node->flags); //take out nodes?
|
| + values->put(key, link, status);
|
| + }
|
| + } else {
|
| + link = (BuildCompactTrieNode *)nodes[1];
|
| + }
|
| }
|
| if (U_SUCCESS(status) && link != NULL) {
|
| building->addNode(node->ch, link, status);
|
| @@ -881,13 +1292,15 @@
|
| _sortBuildNodes(const void * /*context*/, const void *voidl, const void *voidr) {
|
| BuildCompactTrieNode *left = *(BuildCompactTrieNode **)voidl;
|
| BuildCompactTrieNode *right = *(BuildCompactTrieNode **)voidr;
|
| +
|
| // Check for comparing a node to itself, to avoid spurious duplicates
|
| if (left == right) {
|
| return 0;
|
| }
|
| +
|
| // Most significant is type of node. Can never coalesce.
|
| - if (left->fVertical != right->fVertical) {
|
| - return left->fVertical - right->fVertical;
|
| + if (left->fNodeType != right->fNodeType) {
|
| + return left->fNodeType - right->fNodeType;
|
| }
|
| // Next, the "parent ends word" flag. If that differs, we cannot coalesce.
|
| if (left->fParentEndsWord != right->fParentEndsWord) {
|
| @@ -898,12 +1311,19 @@
|
| if (result != 0) {
|
| return result;
|
| }
|
| +
|
| + // If the node value differs, we should not coalesce.
|
| + // If values aren't stored, all fValues should be 0.
|
| + if (left->fValue != right->fValue) {
|
| + return left->fValue - right->fValue;
|
| + }
|
| +
|
| // We know they're both the same node type, so branch for the two cases.
|
| - if (left->fVertical) {
|
| + if (left->fNodeType == kVerticalType) {
|
| result = ((BuildCompactTrieVerticalNode *)left)->fEqual->fNodeID
|
| - - ((BuildCompactTrieVerticalNode *)right)->fEqual->fNodeID;
|
| + - ((BuildCompactTrieVerticalNode *)right)->fEqual->fNodeID;
|
| }
|
| - else {
|
| + else if(left->fChars.length() > 0 && right->fChars.length() > 0){
|
| // We need to compare the links vectors. They should be the
|
| // same size because the strings were equal.
|
| // We compare the node IDs instead of the pointers, to handle
|
| @@ -914,9 +1334,10 @@
|
| int32_t count = hleft->fLinks.size();
|
| for (int32_t i = 0; i < count && result == 0; ++i) {
|
| result = ((BuildCompactTrieNode *)(hleft->fLinks[i]))->fNodeID -
|
| - ((BuildCompactTrieNode *)(hright->fLinks[i]))->fNodeID;
|
| + ((BuildCompactTrieNode *)(hright->fLinks[i]))->fNodeID;
|
| }
|
| }
|
| +
|
| // If they are equal to each other, mark them (speeds coalescing)
|
| if (result == 0) {
|
| left->fHasDuplicate = TRUE;
|
| @@ -1031,20 +1452,25 @@
|
| // Add node 0, used as the NULL pointer/sentinel.
|
| nodes.addElement((int32_t)0, status);
|
|
|
| + Hashtable *values = NULL; // Index of (unique) values
|
| + if (dict.fValued) {
|
| + values = new Hashtable(status);
|
| + }
|
| +
|
| // Start by creating the special empty node we use to indicate that the parent
|
| // terminates a word. This must be node 1, because the builder assumes
|
| - // that.
|
| + // that. This node will never be used for tries storing numerical values.
|
| if (U_FAILURE(status)) {
|
| return NULL;
|
| }
|
| - BuildCompactTrieNode *terminal = new BuildCompactTrieNode(TRUE, FALSE, nodes, status);
|
| + BuildCompactTrieNode *terminal = new BuildCompactTrieNode(TRUE, kHorizontalType, nodes, status);
|
| if (terminal == NULL) {
|
| status = U_MEMORY_ALLOCATION_ERROR;
|
| }
|
|
|
| // This call does all the work of building the new trie structure. The root
|
| - // will be node 2.
|
| - BuildCompactTrieNode *root = compactOneNode(dict.fTrie, FALSE, nodes, status);
|
| + // will have node ID 2 before writing to memory.
|
| + BuildCompactTrieNode *root = compactOneNode(dict.fTrie, FALSE, nodes, status, values);
|
| #ifdef DEBUG_TRIE_DICT
|
| (void) ::times(&timing);
|
| fprintf(stderr, "Compact trie built, %d nodes, time user %f system %f\n",
|
| @@ -1077,21 +1503,37 @@
|
| return NULL;
|
| }
|
|
|
| + //map terminal value nodes
|
| + int valueCount = 0;
|
| + UVector valueNodes(status);
|
| + if(values != NULL) {
|
| + valueCount = values->count(); //number of unique terminal value nodes
|
| + }
|
| +
|
| + // map non-terminal nodes
|
| + int valuePos = 1;//, nodePos = valueCount + valuePos;
|
| + nodeCount = valueCount + valuePos;
|
| for (i = 1; i < count; ++i) {
|
| node = (BuildCompactTrieNode *)nodes[i];
|
| if (node->fNodeID == i) {
|
| // Only one node out of each duplicate set is used
|
| - if (i >= translate.size()) {
|
| + if (node->fNodeID >= translate.size()) {
|
| // Logically extend the mapping table
|
| - translate.setSize(i+1);
|
| + translate.setSize(i + 1);
|
| }
|
| - translate.setElementAt(nodeCount++, i);
|
| + //translate.setElementAt(object, index)!
|
| + if(node->fNodeType == kValueType) {
|
| + valueNodes.addElement(node, status);
|
| + translate.setElementAt(valuePos++, i);
|
| + } else {
|
| + translate.setElementAt(nodeCount++, i);
|
| + }
|
| totalSize += node->size();
|
| }
|
| }
|
| -
|
| - // Check for overflowing 16 bits worth of nodes.
|
| - if (nodeCount > 0x10000) {
|
| +
|
| + // Check for overflowing 20 bits worth of nodes.
|
| + if (nodeCount > 0x100000) {
|
| status = U_ILLEGAL_ARGUMENT_ERROR;
|
| return NULL;
|
| }
|
| @@ -1111,9 +1553,14 @@
|
| status = U_MEMORY_ALLOCATION_ERROR;
|
| return NULL;
|
| }
|
| -
|
| +
|
| CompactTrieHeader *header = (CompactTrieHeader *)bytes;
|
| - header->size = totalSize;
|
| + //header->size = totalSize;
|
| + if(dict.fValued){
|
| + header->magic = COMPACT_TRIE_MAGIC_3;
|
| + } else {
|
| + header->magic = COMPACT_TRIE_MAGIC_2;
|
| + }
|
| header->nodeCount = nodeCount;
|
| header->offsets[0] = 0; // Sentinel
|
| header->root = translate.elementAti(root->fNodeID);
|
| @@ -1123,23 +1570,40 @@
|
| }
|
| #endif
|
| uint32_t offset = offsetof(CompactTrieHeader,offsets)+(nodeCount*sizeof(uint32_t));
|
| - nodeCount = 1;
|
| + nodeCount = valueCount + 1;
|
| +
|
| + // Write terminal value nodes to memory
|
| + for (i=0; i < valueNodes.size(); i++) {
|
| + //header->offsets[i + 1] = offset;
|
| + uint32_t tmpOffset = 0;
|
| + node = (BuildCompactTrieNode *) valueNodes.elementAt(i);
|
| + //header->offsets[i + 1] = (uint32_t)node->fValue;
|
| + node->write((uint8_t *)&header->offsets[i+1], tmpOffset, translate);
|
| + }
|
| +
|
| // Now write the data
|
| for (i = 1; i < count; ++i) {
|
| node = (BuildCompactTrieNode *)nodes[i];
|
| - if (node->fNodeID == i) {
|
| + if (node->fNodeID == i && node->fNodeType != kValueType) {
|
| header->offsets[nodeCount++] = offset;
|
| node->write(bytes, offset, translate);
|
| }
|
| }
|
| +
|
| + //free all extra space
|
| + uprv_realloc(bytes, offset);
|
| + header->size = offset;
|
| +
|
| #ifdef DEBUG_TRIE_DICT
|
| + fprintf(stdout, "Space freed: %d\n", totalSize-offset);
|
| +
|
| (void) ::times(&timing);
|
| fprintf(stderr, "Trie built, time user %f system %f\n",
|
| (double)(timing.tms_utime-previous.tms_utime)/CLK_TCK,
|
| (double)(timing.tms_stime-previous.tms_stime)/CLK_TCK);
|
| previous = timing;
|
| fprintf(stderr, "Final offset is %d\n", offset);
|
| -
|
| +
|
| // Collect statistics on node types and sizes
|
| int hCount = 0;
|
| int vCount = 0;
|
| @@ -1148,68 +1612,85 @@
|
| size_t hItemCount = 0;
|
| size_t vItemCount = 0;
|
| uint32_t previousOff = offset;
|
| - for (uint16_t nodeIdx = nodeCount-1; nodeIdx >= 2; --nodeIdx) {
|
| + uint32_t numOverflow = 0;
|
| + uint32_t valueSpace = 0;
|
| + for (uint32_t nodeIdx = nodeCount-1; nodeIdx >= 2; --nodeIdx) {
|
| const CompactTrieNode *node = getCompactNode(header, nodeIdx);
|
| - if (node->flagscount & kVerticalNode) {
|
| + int itemCount;
|
| + if(nodeIdx == header->root)
|
| + itemCount = node->flagscount & kRootCountMask;
|
| + else
|
| + itemCount = getCount(node);
|
| + if(node->flagscount & kEqualOverflows){
|
| + numOverflow++;
|
| + }
|
| + if (node->flagscount & kVerticalNode && nodeIdx != header->root) {
|
| vCount += 1;
|
| - vItemCount += (node->flagscount & kCountMask);
|
| + vItemCount += itemCount;
|
| vSize += previousOff-header->offsets[nodeIdx];
|
| }
|
| else {
|
| hCount += 1;
|
| - hItemCount += (node->flagscount & kCountMask);
|
| - hSize += previousOff-header->offsets[nodeIdx];
|
| + hItemCount += itemCount;
|
| + if(nodeIdx >= header->root) {
|
| + hSize += previousOff-header->offsets[nodeIdx];
|
| + }
|
| }
|
| +
|
| + if(header->magic == COMPACT_TRIE_MAGIC_3 && node->flagscount & kParentEndsWord)
|
| + valueSpace += sizeof(uint16_t);
|
| previousOff = header->offsets[nodeIdx];
|
| }
|
| fprintf(stderr, "Horizontal nodes: %d total, average %f bytes with %f items\n", hCount,
|
| (double)hSize/hCount, (double)hItemCount/hCount);
|
| fprintf(stderr, "Vertical nodes: %d total, average %f bytes with %f items\n", vCount,
|
| (double)vSize/vCount, (double)vItemCount/vCount);
|
| + fprintf(stderr, "Number of nodes with overflowing nodeIDs: %d \n", numOverflow);
|
| + fprintf(stderr, "Space taken up by values: %d \n", valueSpace);
|
| #endif
|
|
|
| if (U_FAILURE(status)) {
|
| uprv_free(bytes);
|
| header = NULL;
|
| }
|
| - else {
|
| - header->magic = COMPACT_TRIE_MAGIC_1;
|
| - }
|
| return header;
|
| }
|
|
|
| // Forward declaration
|
| static TernaryNode *
|
| -unpackOneNode( const CompactTrieHeader *header, const CompactTrieNode *node, UErrorCode &status );
|
| +unpackOneNode( const CompactTrieInfo *info, const CompactTrieNode *node, UErrorCode &status );
|
|
|
| -
|
| // Convert a horizontal node (or subarray thereof) into a ternary subtrie
|
| static TernaryNode *
|
| -unpackHorizontalArray( const CompactTrieHeader *header, const CompactTrieHorizontalEntry *array,
|
| - int low, int high, UErrorCode &status ) {
|
| +unpackHorizontalArray( const CompactTrieInfo *info, const CompactTrieHorizontalNode *hnode,
|
| + int low, int high, int nodeCount, UErrorCode &status) {
|
| if (U_FAILURE(status) || low > high) {
|
| return NULL;
|
| }
|
| int middle = (low+high)/2;
|
| - TernaryNode *result = new TernaryNode(array[middle].ch);
|
| + TernaryNode *result = new TernaryNode(hnode->entries[middle].ch);
|
| if (result == NULL) {
|
| status = U_MEMORY_ALLOCATION_ERROR;
|
| return NULL;
|
| }
|
| - const CompactTrieNode *equal = getCompactNode(header, array[middle].equal);
|
| + const CompactTrieNode *equal = getCompactNode(info, calcEqualLink(hnode, middle, nodeCount));
|
| if (equal->flagscount & kParentEndsWord) {
|
| - result->flags |= kEndsWord;
|
| + if(info->magic == COMPACT_TRIE_MAGIC_3){
|
| + result->flags = getValue(equal);
|
| + }else{
|
| + result->flags |= kEndsWord;
|
| + }
|
| }
|
| - result->low = unpackHorizontalArray(header, array, low, middle-1, status);
|
| - result->high = unpackHorizontalArray(header, array, middle+1, high, status);
|
| - result->equal = unpackOneNode(header, equal, status);
|
| + result->low = unpackHorizontalArray(info, hnode, low, middle-1, nodeCount, status);
|
| + result->high = unpackHorizontalArray(info, hnode, middle+1, high, nodeCount, status);
|
| + result->equal = unpackOneNode(info, equal, status);
|
| return result;
|
| }
|
|
|
| // Convert one compact trie node into a ternary subtrie
|
| static TernaryNode *
|
| -unpackOneNode( const CompactTrieHeader *header, const CompactTrieNode *node, UErrorCode &status ) {
|
| - int nodeCount = (node->flagscount & kCountMask);
|
| +unpackOneNode( const CompactTrieInfo *info, const CompactTrieNode *node, UErrorCode &status ) {
|
| + int nodeCount = getCount(node);
|
| if (nodeCount == 0 || U_FAILURE(status)) {
|
| // Failure, or terminal node
|
| return NULL;
|
| @@ -1234,29 +1715,41 @@
|
| previous = latest;
|
| }
|
| if (latest != NULL) {
|
| - const CompactTrieNode *equal = getCompactNode(header, vnode->equal);
|
| + const CompactTrieNode *equal = getCompactNode(info, calcEqualLink(vnode));
|
| if (equal->flagscount & kParentEndsWord) {
|
| - latest->flags |= kEndsWord;
|
| + if(info->magic == COMPACT_TRIE_MAGIC_3){
|
| + latest->flags = getValue(equal);
|
| + } else {
|
| + latest->flags |= kEndsWord;
|
| + }
|
| }
|
| - latest->equal = unpackOneNode(header, equal, status);
|
| + latest->equal = unpackOneNode(info, equal, status);
|
| }
|
| return head;
|
| }
|
| else {
|
| // Horizontal node
|
| const CompactTrieHorizontalNode *hnode = (const CompactTrieHorizontalNode *)node;
|
| - return unpackHorizontalArray(header, &hnode->entries[0], 0, nodeCount-1, status);
|
| + return unpackHorizontalArray(info, hnode, 0, nodeCount-1, nodeCount, status);
|
| }
|
| }
|
|
|
| +// returns a MutableTrieDictionary generated from the CompactTrieDictionary
|
| MutableTrieDictionary *
|
| CompactTrieDictionary::cloneMutable( UErrorCode &status ) const {
|
| - MutableTrieDictionary *result = new MutableTrieDictionary( status );
|
| + MutableTrieDictionary *result = new MutableTrieDictionary( status, fInfo->magic == COMPACT_TRIE_MAGIC_3 );
|
| if (result == NULL) {
|
| status = U_MEMORY_ALLOCATION_ERROR;
|
| return NULL;
|
| }
|
| - TernaryNode *root = unpackOneNode(fData, getCompactNode(fData, fData->root), status);
|
| + // treat root node as special case: don't call unpackOneNode() or unpackHorizontalArray() directly
|
| + // because only kEqualOverflows flag should be checked in root's flagscount
|
| + const CompactTrieHorizontalNode *hnode = (const CompactTrieHorizontalNode *)
|
| + getCompactNode(fInfo, fInfo->root);
|
| + uint16_t nodeCount = hnode->flagscount & kRootCountMask;
|
| + TernaryNode *root = unpackHorizontalArray(fInfo, hnode, 0, nodeCount-1,
|
| + nodeCount, status);
|
| +
|
| if (U_FAILURE(status)) {
|
| delete root; // Clean up
|
| delete result;
|
| @@ -1270,8 +1763,8 @@
|
|
|
| U_CAPI int32_t U_EXPORT2
|
| triedict_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData,
|
| - UErrorCode *status) {
|
| -
|
| + UErrorCode *status) {
|
| +
|
| if (status == NULL || U_FAILURE(*status)) {
|
| return 0;
|
| }
|
| @@ -1286,14 +1779,14 @@
|
| //
|
| const UDataInfo *pInfo = (const UDataInfo *)((const uint8_t *)inData+4);
|
| if(!( pInfo->dataFormat[0]==0x54 && /* dataFormat="TrDc" */
|
| - pInfo->dataFormat[1]==0x72 &&
|
| - pInfo->dataFormat[2]==0x44 &&
|
| - pInfo->dataFormat[3]==0x63 &&
|
| - pInfo->formatVersion[0]==1 )) {
|
| + pInfo->dataFormat[1]==0x72 &&
|
| + pInfo->dataFormat[2]==0x44 &&
|
| + pInfo->dataFormat[3]==0x63 &&
|
| + pInfo->formatVersion[0]==1 )) {
|
| udata_printError(ds, "triedict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n",
|
| - pInfo->dataFormat[0], pInfo->dataFormat[1],
|
| - pInfo->dataFormat[2], pInfo->dataFormat[3],
|
| - pInfo->formatVersion[0]);
|
| + pInfo->dataFormat[0], pInfo->dataFormat[1],
|
| + pInfo->dataFormat[2], pInfo->dataFormat[3],
|
| + pInfo->formatVersion[0]);
|
| *status=U_UNSUPPORTED_ERROR;
|
| return 0;
|
| }
|
| @@ -1311,8 +1804,10 @@
|
| //
|
| const uint8_t *inBytes =(const uint8_t *)inData+headerSize;
|
| const CompactTrieHeader *header = (const CompactTrieHeader *)inBytes;
|
| - if (ds->readUInt32(header->magic) != COMPACT_TRIE_MAGIC_1
|
| - || ds->readUInt32(header->size) < sizeof(CompactTrieHeader))
|
| + uint32_t magic = ds->readUInt32(header->magic);
|
| + if (magic != COMPACT_TRIE_MAGIC_1 && magic != COMPACT_TRIE_MAGIC_2 && magic != COMPACT_TRIE_MAGIC_3
|
| + || magic == COMPACT_TRIE_MAGIC_1 && ds->readUInt32(header->size) < sizeof(CompactTrieHeaderV1)
|
| + || magic != COMPACT_TRIE_MAGIC_1 && ds->readUInt32(header->size) < sizeof(CompactTrieHeader))
|
| {
|
| udata_printError(ds, "triedict_swap(): CompactTrieHeader is invalid.\n");
|
| *status=U_UNSUPPORTED_ERROR;
|
| @@ -1333,10 +1828,10 @@
|
| //
|
| if (length < sizeWithUData) {
|
| udata_printError(ds, "triedict_swap(): too few bytes (%d after ICU Data header) for trie data.\n",
|
| - totalSize);
|
| + totalSize);
|
| *status=U_INDEX_OUTOFBOUNDS_ERROR;
|
| return 0;
|
| - }
|
| + }
|
|
|
| //
|
| // Swap the Data. Do the data itself first, then the CompactTrieHeader, because
|
| @@ -1355,20 +1850,38 @@
|
| }
|
|
|
| // We need to loop through all the nodes in the offset table, and swap each one.
|
| - uint16_t nodeCount = ds->readUInt16(header->nodeCount);
|
| + uint32_t nodeCount, rootId;
|
| + if(header->magic == COMPACT_TRIE_MAGIC_1) {
|
| + nodeCount = ds->readUInt16(((CompactTrieHeaderV1 *)header)->nodeCount);
|
| + rootId = ds->readUInt16(((CompactTrieHeaderV1 *)header)->root);
|
| + } else {
|
| + nodeCount = ds->readUInt32(header->nodeCount);
|
| + rootId = ds->readUInt32(header->root);
|
| + }
|
| +
|
| // Skip node 0, which should always be 0.
|
| - for (int i = 1; i < nodeCount; ++i) {
|
| + for (uint32_t i = 1; i < nodeCount; ++i) {
|
| uint32_t nodeOff = ds->readUInt32(header->offsets[i]);
|
| const CompactTrieNode *inNode = (const CompactTrieNode *)(inBytes + nodeOff);
|
| CompactTrieNode *outNode = (CompactTrieNode *)(outBytes + nodeOff);
|
| uint16_t flagscount = ds->readUInt16(inNode->flagscount);
|
| - uint16_t itemCount = flagscount & kCountMask;
|
| + uint16_t itemCount = getCount(inNode);
|
| + //uint16_t itemCount = flagscount & kCountMask;
|
| ds->writeUInt16(&outNode->flagscount, flagscount);
|
| if (itemCount > 0) {
|
| - if (flagscount & kVerticalNode) {
|
| + uint16_t overflow = 0; //number of extra uint16_ts needed to be swapped
|
| + if (flagscount & kVerticalNode && i != rootId) {
|
| + if(flagscount & kEqualOverflows){
|
| + // include overflow bits
|
| + overflow += 1;
|
| + }
|
| + if (header->magic == COMPACT_TRIE_MAGIC_3 && flagscount & kEndsParentWord) {
|
| + //include values
|
| + overflow += 1;
|
| + }
|
| ds->swapArray16(ds, inBytes+nodeOff+offsetof(CompactTrieVerticalNode,chars),
|
| - itemCount*sizeof(uint16_t),
|
| - outBytes+nodeOff+offsetof(CompactTrieVerticalNode,chars), status);
|
| + (itemCount + overflow)*sizeof(uint16_t),
|
| + outBytes+nodeOff+offsetof(CompactTrieVerticalNode,chars), status);
|
| uint16_t equal = ds->readUInt16(inBytes+nodeOff+offsetof(CompactTrieVerticalNode,equal);
|
| ds->writeUInt16(outBytes+nodeOff+offsetof(CompactTrieVerticalNode,equal));
|
| }
|
| @@ -1381,27 +1894,63 @@
|
| word = ds->readUInt16(inHNode->entries[j].equal);
|
| ds->writeUInt16(&outHNode->entries[j].equal, word);
|
| }
|
| +
|
| + // swap overflow/value information
|
| + if(flagscount & kEqualOverflows){
|
| + overflow += (itemCount + 3) / 4;
|
| + }
|
| +
|
| + if (header->magic == COMPACT_TRIE_MAGIC_3 && i != rootId && flagscount & kEndsParentWord) {
|
| + //include values
|
| + overflow += 1;
|
| + }
|
| +
|
| + uint16_t *inOverflow = (uint16_t *) &inHNode->entries[itemCount];
|
| + uint16_t *outOverflow = (uint16_t *) &outHNode->entries[itemCount];
|
| + for(int j = 0; j<overflow; j++){
|
| + uint16_t extraInfo = ds->readUInt16(*inOverflow);
|
| + ds->writeUInt16(outOverflow, extraInfo);
|
| +
|
| + inOverflow++;
|
| + outOverflow++;
|
| + }
|
| }
|
| }
|
| }
|
| #endif
|
|
|
| - // All the data in all the nodes consist of 16 bit items. Swap them all at once.
|
| - uint16_t nodeCount = ds->readUInt16(header->nodeCount);
|
| - uint32_t nodesOff = offsetof(CompactTrieHeader,offsets)+((uint32_t)nodeCount*sizeof(uint32_t));
|
| - ds->swapArray16(ds, inBytes+nodesOff, totalSize-nodesOff, outBytes+nodesOff, status);
|
| -
|
| // Swap the header
|
| ds->writeUInt32(&outputHeader->size, totalSize);
|
| - uint32_t magic = ds->readUInt32(header->magic);
|
| ds->writeUInt32(&outputHeader->magic, magic);
|
| - ds->writeUInt16(&outputHeader->nodeCount, nodeCount);
|
| - uint16_t root = ds->readUInt16(header->root);
|
| - ds->writeUInt16(&outputHeader->root, root);
|
| - ds->swapArray32(ds, inBytes+offsetof(CompactTrieHeader,offsets),
|
| - sizeof(uint32_t)*(int32_t)nodeCount,
|
| - outBytes+offsetof(CompactTrieHeader,offsets), status);
|
|
|
| + uint32_t nodeCount;
|
| + uint32_t offsetPos;
|
| + if (header->magic == COMPACT_TRIE_MAGIC_1) {
|
| + CompactTrieHeaderV1 *headerV1 = (CompactTrieHeaderV1 *)header;
|
| + CompactTrieHeaderV1 *outputHeaderV1 = (CompactTrieHeaderV1 *)outputHeader;
|
| +
|
| + nodeCount = ds->readUInt16(headerV1->nodeCount);
|
| + ds->writeUInt16(&outputHeaderV1->nodeCount, nodeCount);
|
| + uint16_t root = ds->readUInt16(headerV1->root);
|
| + ds->writeUInt16(&outputHeaderV1->root, root);
|
| + offsetPos = offsetof(CompactTrieHeaderV1,offsets);
|
| + } else {
|
| + nodeCount = ds->readUInt32(header->nodeCount);
|
| + ds->writeUInt32(&outputHeader->nodeCount, nodeCount);
|
| + uint32_t root = ds->readUInt32(header->root);
|
| + ds->writeUInt32(&outputHeader->root, root);
|
| + offsetPos = offsetof(CompactTrieHeader,offsets);
|
| + }
|
| +
|
| + // All the data in all the nodes consist of 16 bit items. Swap them all at once.
|
| + uint32_t nodesOff = offsetPos+((uint32_t)nodeCount*sizeof(uint32_t));
|
| + ds->swapArray16(ds, inBytes+nodesOff, totalSize-nodesOff, outBytes+nodesOff, status);
|
| +
|
| + //swap offsets
|
| + ds->swapArray32(ds, inBytes+offsetPos,
|
| + sizeof(uint32_t)*(uint32_t)nodeCount,
|
| + outBytes+offsetPos, status);
|
| +
|
| return sizeWithUData;
|
| }
|
|
|
|
|