| Index: icu46/source/test/perf/unisetperf/draft/trieset.cpp
|
| ===================================================================
|
| --- icu46/source/test/perf/unisetperf/draft/trieset.cpp (revision 0)
|
| +++ icu46/source/test/perf/unisetperf/draft/trieset.cpp (revision 0)
|
| @@ -0,0 +1,111 @@
|
| +/*
|
| +**********************************************************************
|
| +* Copyright (C) 2007, International Business Machines
|
| +* Corporation and others. All Rights Reserved.
|
| +**********************************************************************
|
| +* file name: trieset.cpp
|
| +* encoding: US-ASCII
|
| +* tab size: 8 (not used)
|
| +* indentation:4
|
| +*
|
| +* created on: 2007jan15
|
| +* created by: Markus Scherer
|
| +*
|
| +* Idea for a "compiled", fast, read-only (immutable) version of a UnicodeSet
|
| +* using a UTrie with 8-bit (byte) results per code point.
|
| +* Modifies the trie index to make the BMP linear, and uses the original set
|
| +* for supplementary code points.
|
| +*/
|
| +
|
| +#include "unicode/utypes.h"
|
| +#include "unicont.h"
|
| +
|
| +#define UTRIE_GET8_LATIN1(trie) ((const uint8_t *)(trie)->data32+UTRIE_DATA_BLOCK_LENGTH)
|
| +
|
| +#define UTRIE_GET8_FROM_LEAD(trie, c16) \
|
| + ((const uint8_t *)(trie)->data32)[ \
|
| + ((int32_t)((trie)->index[(c16)>>UTRIE_SHIFT])<<UTRIE_INDEX_SHIFT)+ \
|
| + ((c16)&UTRIE_MASK) \
|
| + ]
|
| +
|
| +class TrieSet : public UObject, public UnicodeContainable {
|
| +public:
|
| + TrieSet(const UnicodeSet &set, UErrorCode &errorCode)
|
| + : trieData(NULL), latin1(NULL), restSet(set.clone()) {
|
| + if(U_FAILURE(errorCode)) {
|
| + return;
|
| + }
|
| + if(restSet==NULL) {
|
| + errorCode=U_MEMORY_ALLOCATION_ERROR;
|
| + return;
|
| + }
|
| +
|
| + UNewTrie *newTrie=utrie_open(NULL, NULL, 0x11000, 0, 0, TRUE);
|
| + UChar32 start, end;
|
| +
|
| + UnicodeSetIterator iter(set);
|
| +
|
| + while(iter.nextRange() && !iter.isString()) {
|
| + start=iter.getCodepoint();
|
| + end=iter.getCodepointEnd();
|
| + if(start>0xffff) {
|
| + break;
|
| + }
|
| + if(end>0xffff) {
|
| + end=0xffff;
|
| + }
|
| + if(!utrie_setRange32(newTrie, start, end+1, TRUE, TRUE)) {
|
| + errorCode=U_INTERNAL_PROGRAM_ERROR;
|
| + return;
|
| + }
|
| + }
|
| +
|
| + // Preflight the trie length.
|
| + int32_t length=utrie_serialize(newTrie, NULL, 0, NULL, 8, &errorCode);
|
| + if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
|
| + return;
|
| + }
|
| +
|
| + trieData=(uint32_t *)uprv_malloc(length);
|
| + if(trieData==NULL) {
|
| + errorCode=U_MEMORY_ALLOCATION_ERROR;
|
| + return;
|
| + }
|
| +
|
| + errorCode=U_ZERO_ERROR;
|
| + utrie_serialize(newTrie, trieData, length, NULL, 8, &errorCode);
|
| + utrie_unserialize(&trie, trieData, length, &errorCode); // TODO: Implement for 8-bit UTrie!
|
| +
|
| + if(U_SUCCESS(errorCode)) {
|
| + // Copy the indexes for surrogate code points into the BMP range
|
| + // for simple access across the entire BMP.
|
| + uprv_memcpy((uint16_t *)trie.index+(0xd800>>UTRIE_SHIFT),
|
| + trie.index+UTRIE_BMP_INDEX_LENGTH,
|
| + (0x800>>UTRIE_SHIFT)*2);
|
| + latin1=UTRIE_GET8_LATIN1(&trie);
|
| + }
|
| +
|
| + restSet.remove(0, 0xffff);
|
| + }
|
| +
|
| + ~TrieSet() {
|
| + uprv_free(trieData);
|
| + delete restSet;
|
| + }
|
| +
|
| + UBool contains(UChar32 c) const {
|
| + if((uint32_t)c<=0xff) {
|
| + return (UBool)latin1[c];
|
| + } else if((uint32_t)c<0xffff) {
|
| + return (UBool)UTRIE_GET8_FROM_LEAD(&trie, c);
|
| + } else {
|
| + return restSet->contains(c);
|
| + }
|
| + }
|
| +
|
| +private:
|
| + uint32_t *trieData;
|
| + const uint8_t *latin1;
|
| + UTrie trie;
|
| + UnicodeSet *restSet;
|
| +};
|
|
|
| Property changes on: icu46/source/test/perf/unisetperf/draft/trieset.cpp
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|