Index: icu46/source/test/perf/unisetperf/draft/trieset.cpp |
=================================================================== |
--- icu46/source/test/perf/unisetperf/draft/trieset.cpp (revision 0) |
+++ icu46/source/test/perf/unisetperf/draft/trieset.cpp (revision 0) |
@@ -0,0 +1,111 @@ |
+/* |
+********************************************************************** |
+* Copyright (C) 2007, International Business Machines |
+* Corporation and others. All Rights Reserved. |
+********************************************************************** |
+* file name: trieset.cpp |
+* encoding: US-ASCII |
+* tab size: 8 (not used) |
+* indentation:4 |
+* |
+* created on: 2007jan15 |
+* created by: Markus Scherer |
+* |
+* Idea for a "compiled", fast, read-only (immutable) version of a UnicodeSet |
+* using a UTrie with 8-bit (byte) results per code point. |
+* Modifies the trie index to make the BMP linear, and uses the original set |
+* for supplementary code points. |
+*/ |
+ |
+#include "unicode/utypes.h" |
+#include "unicont.h" |
+ |
+#define UTRIE_GET8_LATIN1(trie) ((const uint8_t *)(trie)->data32+UTRIE_DATA_BLOCK_LENGTH) |
+ |
+#define UTRIE_GET8_FROM_LEAD(trie, c16) \ |
+ ((const uint8_t *)(trie)->data32)[ \ |
+ ((int32_t)((trie)->index[(c16)>>UTRIE_SHIFT])<<UTRIE_INDEX_SHIFT)+ \ |
+ ((c16)&UTRIE_MASK) \ |
+ ] |
+ |
+class TrieSet : public UObject, public UnicodeContainable { |
+public: |
+ TrieSet(const UnicodeSet &set, UErrorCode &errorCode) |
+ : trieData(NULL), latin1(NULL), restSet(set.clone()) { |
+ if(U_FAILURE(errorCode)) { |
+ return; |
+ } |
+ if(restSet==NULL) { |
+ errorCode=U_MEMORY_ALLOCATION_ERROR; |
+ return; |
+ } |
+ |
+ UNewTrie *newTrie=utrie_open(NULL, NULL, 0x11000, 0, 0, TRUE); |
+ UChar32 start, end; |
+ |
+ UnicodeSetIterator iter(set); |
+ |
+ while(iter.nextRange() && !iter.isString()) { |
+ start=iter.getCodepoint(); |
+ end=iter.getCodepointEnd(); |
+ if(start>0xffff) { |
+ break; |
+ } |
+ if(end>0xffff) { |
+ end=0xffff; |
+ } |
+ if(!utrie_setRange32(newTrie, start, end+1, TRUE, TRUE)) { |
+ errorCode=U_INTERNAL_PROGRAM_ERROR; |
+ return; |
+ } |
+ } |
+ |
+ // Preflight the trie length. |
+ int32_t length=utrie_serialize(newTrie, NULL, 0, NULL, 8, &errorCode); |
+ if(errorCode!=U_BUFFER_OVERFLOW_ERROR) { |
+ return; |
+ } |
+ |
+ trieData=(uint32_t *)uprv_malloc(length); |
+ if(trieData==NULL) { |
+ errorCode=U_MEMORY_ALLOCATION_ERROR; |
+ return; |
+ } |
+ |
+ errorCode=U_ZERO_ERROR; |
+ utrie_serialize(newTrie, trieData, length, NULL, 8, &errorCode); |
+ utrie_unserialize(&trie, trieData, length, &errorCode); // TODO: Implement for 8-bit UTrie! |
+ |
+ if(U_SUCCESS(errorCode)) { |
+ // Copy the indexes for surrogate code points into the BMP range |
+ // for simple access across the entire BMP. |
+ uprv_memcpy((uint16_t *)trie.index+(0xd800>>UTRIE_SHIFT), |
+ trie.index+UTRIE_BMP_INDEX_LENGTH, |
+ (0x800>>UTRIE_SHIFT)*2); |
+ latin1=UTRIE_GET8_LATIN1(&trie); |
+ } |
+ |
+ restSet.remove(0, 0xffff); |
+ } |
+ |
+ ~TrieSet() { |
+ uprv_free(trieData); |
+ delete restSet; |
+ } |
+ |
+ UBool contains(UChar32 c) const { |
+ if((uint32_t)c<=0xff) { |
+ return (UBool)latin1[c]; |
+ } else if((uint32_t)c<0xffff) { |
+ return (UBool)UTRIE_GET8_FROM_LEAD(&trie, c); |
+ } else { |
+ return restSet->contains(c); |
+ } |
+ } |
+ |
+private: |
+ uint32_t *trieData; |
+ const uint8_t *latin1; |
+ UTrie trie; |
+ UnicodeSet *restSet; |
+}; |
Property changes on: icu46/source/test/perf/unisetperf/draft/trieset.cpp |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |