Index: source/test/perf/unisetperf/draft/bitset.cpp |
diff --git a/source/test/perf/unisetperf/draft/bitset.cpp b/source/test/perf/unisetperf/draft/bitset.cpp |
deleted file mode 100644 |
index d4d3f3f9830b0bc9233c456b1dded55aa7c38144..0000000000000000000000000000000000000000 |
--- a/source/test/perf/unisetperf/draft/bitset.cpp |
+++ /dev/null |
@@ -1,198 +0,0 @@ |
-/* |
-********************************************************************** |
-* Copyright (C) 2014, International Business Machines |
-* Corporation and others. All Rights Reserved. |
-********************************************************************** |
-* file name: bitset.cpp |
-* encoding: US-ASCII |
-* tab size: 8 (not used) |
-* indentation:4 |
-* |
-* created on: 2007jan15 |
-* created by: Markus Scherer |
-* |
-* Idea for a "compiled", fast, read-only (immutable) version of a UnicodeSet |
-* using a folded bit set consisting of a 1k-entry index table and a |
-* compacted array of 64-bit words. |
-* Uses a simple hash table for compaction. |
-* Uses the original set for supplementary code points. |
-*/ |
- |
-#include "unicode/utypes.h" |
-#include "unicont.h" |
-#include "cmemory.h" // for UPRV_LENGTHOF |
- |
-/* |
- * Hash table for up to 1k 64-bit words, for 1 bit per BMP code point. |
- * Hashes 64-bit words and maps them to 16-bit integers which are |
- * assigned in order of new incoming words for subsequent storage |
- * in a contiguous array. |
- */ |
-struct BMPBitHash : public UObject { |
- int64_t keys[0x800]; // 2k |
- uint16_t values[0x800]; |
- uint16_t reverse[0x400]; |
- uint16_t count; |
- const int32_t prime=1301; // Less than 2k. |
- |
- BMPBitHash() : count(0) { |
- // Fill values[] with 0xffff. |
- uprv_memset(values, 0xff, sizeof(values)); |
- } |
- |
- /* |
- * Map a key to an integer count. |
- * Map at most 1k=0x400 different keys with this data structure. |
- */ |
- uint16_t map(int64_t key) { |
- int32_t hash=(int32_t)(key>>55)&0x1ff; |
- hash^=(int32_t)(key>>44)&0x7ff; |
- hash^=(int32_t)(key>>33)&0x7ff; |
- hash^=(int32_t)(key>>22)&0x7ff; |
- hash^=(int32_t)(key>>11)&0x7ff; |
- hash^=(int32_t)key&0x7ff; |
- for(;;) { |
- if(values[hash]==0xffff) { |
- // Unused slot. |
- keys[hash]=key; |
- reverse[count]=hash; |
- return values[hash]=count++; |
- } else if(keys[hash]==key) { |
- // Found a slot with this key. |
- return values[hash]; |
- } else { |
- // Used slot with a different key, move to another slot. |
- hash=(hash+prime)&0x7ff; |
- } |
- } |
- } |
- |
- uint16_t countKeys() const { return count; } |
- |
- /* |
- * Invert the hash map: Fill an array of length countKeys() with the keys |
- * indexed by their mapped values. |
- */ |
- void invert(int64_t *k) const { |
- uint16_t i; |
- |
- for(i=0; i<count; ++i) { |
- k[i]=keys[reverse[i]]; |
- } |
- } |
-}; |
- |
-class BitSet : public UObject, public UnicodeContainable { |
-public: |
- BitSet(const UnicodeSet &set, UErrorCode &errorCode) : bits(shortBits), restSet(set.clone()) { |
- if(U_FAILURE(errorCode)) { |
- return; |
- } |
- BMPBitHash *bitHash=new BMPBitHash; |
- if(bitHash==NULL || restSet==NULL) { |
- errorCode=U_MEMORY_ALLOCATION_ERROR; |
- return; |
- } |
- |
- UnicodeSetIterator iter(set); |
- int64_t b; |
- UChar32 start, end; |
- int32_t prevIndex, i, j; |
- |
- b=0; // Not necessary but makes compilers happy. |
- prevIndex=-1; |
- for(;;) { |
- if(iter.nextRange() && !iter.isString()) { |
- start=iter.getCodepoint(); |
- end=iter.getCodepointEnd(); |
- } else { |
- start=0x10000; |
- } |
- i=start>>6; |
- if(prevIndex!=i) { |
- // Finish the end of the previous range. |
- if(prevIndex<0) { |
- prevIndex=0; |
- } else { |
- index[prevIndex++]=bitHash->map(b); |
- } |
- // Fill all-zero entries between ranges. |
- if(prevIndex<i) { |
- uint16_t zero=bitHash->map(0); |
- do { |
- index[prevIndex++]=zero; |
- } while(prevIndex<i); |
- } |
- b=0; |
- } |
- if(start>0xffff) { |
- break; |
- } |
- b|=~((INT64_C(1)<<(start&0x3f))-1); |
- j=end>>6; |
- if(i<j) { |
- // Set bits for the start of the range. |
- index[i++]=bitHash->map(b); |
- // Fill all-one entries inside the range. |
- if(i<j) { |
- uint16_t all=bitHash->map(INT64_C(0xffffffffffffffff)); |
- do { |
- index[i++]=all; |
- } while(i<j); |
- } |
- b=INT64_C(0xffffffffffffffff); |
- } |
- /* i==j */ |
- b&=(INT64_C(1)<<(end&0x3f))-1; |
- prevIndex=j; |
- } |
- |
- if(bitHash->countKeys()>UPRV_LENGTHOF(shortBits)) { |
- bits=(int64_t *)uprv_malloc(bitHash->countKeys()*8); |
- } |
- if(bits!=NULL) { |
- bitHash->invert(bits); |
- } else { |
- bits=shortBits; |
- errorCode=U_MEMORY_ALLOCATION_ERROR; |
- return; |
- } |
- |
- latin1Set[0]=(uint32_t)bits[0]; |
- latin1Set[1]=(uint32_t)(bits[0]>>32); |
- latin1Set[2]=(uint32_t)bits[1]; |
- latin1Set[3]=(uint32_t)(bits[1]>>32); |
- latin1Set[4]=(uint32_t)bits[2]; |
- latin1Set[5]=(uint32_t)(bits[2]>>32); |
- latin1Set[6]=(uint32_t)bits[3]; |
- latin1Set[7]=(uint32_t)(bits[3]>>32); |
- |
- restSet.remove(0, 0xffff); |
- } |
- |
- ~BitSet() { |
- if(bits!=shortBits) { |
- uprv_free(bits); |
- } |
- delete restSet; |
- } |
- |
- UBool contains(UChar32 c) const { |
- if((uint32_t)c<=0xff) { |
- return (UBool)((latin1Set[c>>5]&((uint32_t)1<<(c&0x1f)))!=0); |
- } else if((uint32_t)c<0xffff) { |
- return (UBool)((bits[c>>6]&(INT64_C(1)<<(c&0x3f)))!=0); |
- } else { |
- return restSet->contains(c); |
- } |
- } |
- |
-private: |
- uint16_t index[0x400]; |
- int64_t shortBits[32]; |
- int64_t *bits; |
- |
- uint32_t latin1Bits[8]; |
- |
- UnicodeSet *restSet; |
-}; |