| Index: source/test/perf/unisetperf/draft/bitset.cpp
|
| diff --git a/source/test/perf/unisetperf/draft/bitset.cpp b/source/test/perf/unisetperf/draft/bitset.cpp
|
| deleted file mode 100644
|
| index d4d3f3f9830b0bc9233c456b1dded55aa7c38144..0000000000000000000000000000000000000000
|
| --- a/source/test/perf/unisetperf/draft/bitset.cpp
|
| +++ /dev/null
|
| @@ -1,198 +0,0 @@
|
| -/*
|
| -**********************************************************************
|
| -* Copyright (C) 2014, International Business Machines
|
| -* Corporation and others. All Rights Reserved.
|
| -**********************************************************************
|
| -* file name: bitset.cpp
|
| -* encoding: US-ASCII
|
| -* tab size: 8 (not used)
|
| -* indentation:4
|
| -*
|
| -* created on: 2007jan15
|
| -* created by: Markus Scherer
|
| -*
|
| -* Idea for a "compiled", fast, read-only (immutable) version of a UnicodeSet
|
| -* using a folded bit set consisting of a 1k-entry index table and a
|
| -* compacted array of 64-bit words.
|
| -* Uses a simple hash table for compaction.
|
| -* Uses the original set for supplementary code points.
|
| -*/
|
| -
|
| -#include "unicode/utypes.h"
|
| -#include "unicont.h"
|
| -#include "cmemory.h" // for UPRV_LENGTHOF
|
| -
|
| -/*
|
| - * Hash table for up to 1k 64-bit words, for 1 bit per BMP code point.
|
| - * Hashes 64-bit words and maps them to 16-bit integers which are
|
| - * assigned in order of new incoming words for subsequent storage
|
| - * in a contiguous array.
|
| - */
|
| -struct BMPBitHash : public UObject {
|
| - int64_t keys[0x800]; // 2k
|
| - uint16_t values[0x800];
|
| - uint16_t reverse[0x400];
|
| - uint16_t count;
|
| - const int32_t prime=1301; // Less than 2k.
|
| -
|
| - BMPBitHash() : count(0) {
|
| - // Fill values[] with 0xffff.
|
| - uprv_memset(values, 0xff, sizeof(values));
|
| - }
|
| -
|
| - /*
|
| - * Map a key to an integer count.
|
| - * Map at most 1k=0x400 different keys with this data structure.
|
| - */
|
| - uint16_t map(int64_t key) {
|
| - int32_t hash=(int32_t)(key>>55)&0x1ff;
|
| - hash^=(int32_t)(key>>44)&0x7ff;
|
| - hash^=(int32_t)(key>>33)&0x7ff;
|
| - hash^=(int32_t)(key>>22)&0x7ff;
|
| - hash^=(int32_t)(key>>11)&0x7ff;
|
| - hash^=(int32_t)key&0x7ff;
|
| - for(;;) {
|
| - if(values[hash]==0xffff) {
|
| - // Unused slot.
|
| - keys[hash]=key;
|
| - reverse[count]=hash;
|
| - return values[hash]=count++;
|
| - } else if(keys[hash]==key) {
|
| - // Found a slot with this key.
|
| - return values[hash];
|
| - } else {
|
| - // Used slot with a different key, move to another slot.
|
| - hash=(hash+prime)&0x7ff;
|
| - }
|
| - }
|
| - }
|
| -
|
| - uint16_t countKeys() const { return count; }
|
| -
|
| - /*
|
| - * Invert the hash map: Fill an array of length countKeys() with the keys
|
| - * indexed by their mapped values.
|
| - */
|
| - void invert(int64_t *k) const {
|
| - uint16_t i;
|
| -
|
| - for(i=0; i<count; ++i) {
|
| - k[i]=keys[reverse[i]];
|
| - }
|
| - }
|
| -};
|
| -
|
| -class BitSet : public UObject, public UnicodeContainable {
|
| -public:
|
| - BitSet(const UnicodeSet &set, UErrorCode &errorCode) : bits(shortBits), restSet(set.clone()) {
|
| - if(U_FAILURE(errorCode)) {
|
| - return;
|
| - }
|
| - BMPBitHash *bitHash=new BMPBitHash;
|
| - if(bitHash==NULL || restSet==NULL) {
|
| - errorCode=U_MEMORY_ALLOCATION_ERROR;
|
| - return;
|
| - }
|
| -
|
| - UnicodeSetIterator iter(set);
|
| - int64_t b;
|
| - UChar32 start, end;
|
| - int32_t prevIndex, i, j;
|
| -
|
| - b=0; // Not necessary but makes compilers happy.
|
| - prevIndex=-1;
|
| - for(;;) {
|
| - if(iter.nextRange() && !iter.isString()) {
|
| - start=iter.getCodepoint();
|
| - end=iter.getCodepointEnd();
|
| - } else {
|
| - start=0x10000;
|
| - }
|
| - i=start>>6;
|
| - if(prevIndex!=i) {
|
| - // Finish the end of the previous range.
|
| - if(prevIndex<0) {
|
| - prevIndex=0;
|
| - } else {
|
| - index[prevIndex++]=bitHash->map(b);
|
| - }
|
| - // Fill all-zero entries between ranges.
|
| - if(prevIndex<i) {
|
| - uint16_t zero=bitHash->map(0);
|
| - do {
|
| - index[prevIndex++]=zero;
|
| - } while(prevIndex<i);
|
| - }
|
| - b=0;
|
| - }
|
| - if(start>0xffff) {
|
| - break;
|
| - }
|
| - b|=~((INT64_C(1)<<(start&0x3f))-1);
|
| - j=end>>6;
|
| - if(i<j) {
|
| - // Set bits for the start of the range.
|
| - index[i++]=bitHash->map(b);
|
| - // Fill all-one entries inside the range.
|
| - if(i<j) {
|
| - uint16_t all=bitHash->map(INT64_C(0xffffffffffffffff));
|
| - do {
|
| - index[i++]=all;
|
| - } while(i<j);
|
| - }
|
| - b=INT64_C(0xffffffffffffffff);
|
| - }
|
| - /* i==j */
|
| - b&=(INT64_C(1)<<(end&0x3f))-1;
|
| - prevIndex=j;
|
| - }
|
| -
|
| - if(bitHash->countKeys()>UPRV_LENGTHOF(shortBits)) {
|
| - bits=(int64_t *)uprv_malloc(bitHash->countKeys()*8);
|
| - }
|
| - if(bits!=NULL) {
|
| - bitHash->invert(bits);
|
| - } else {
|
| - bits=shortBits;
|
| - errorCode=U_MEMORY_ALLOCATION_ERROR;
|
| - return;
|
| - }
|
| -
|
| - latin1Set[0]=(uint32_t)bits[0];
|
| - latin1Set[1]=(uint32_t)(bits[0]>>32);
|
| - latin1Set[2]=(uint32_t)bits[1];
|
| - latin1Set[3]=(uint32_t)(bits[1]>>32);
|
| - latin1Set[4]=(uint32_t)bits[2];
|
| - latin1Set[5]=(uint32_t)(bits[2]>>32);
|
| - latin1Set[6]=(uint32_t)bits[3];
|
| - latin1Set[7]=(uint32_t)(bits[3]>>32);
|
| -
|
| - restSet.remove(0, 0xffff);
|
| - }
|
| -
|
| - ~BitSet() {
|
| - if(bits!=shortBits) {
|
| - uprv_free(bits);
|
| - }
|
| - delete restSet;
|
| - }
|
| -
|
| - UBool contains(UChar32 c) const {
|
| - if((uint32_t)c<=0xff) {
|
| - return (UBool)((latin1Set[c>>5]&((uint32_t)1<<(c&0x1f)))!=0);
|
| - } else if((uint32_t)c<0xffff) {
|
| - return (UBool)((bits[c>>6]&(INT64_C(1)<<(c&0x3f)))!=0);
|
| - } else {
|
| - return restSet->contains(c);
|
| - }
|
| - }
|
| -
|
| -private:
|
| - uint16_t index[0x400];
|
| - int64_t shortBits[32];
|
| - int64_t *bits;
|
| -
|
| - uint32_t latin1Bits[8];
|
| -
|
| - UnicodeSet *restSet;
|
| -};
|
|
|