icu46/source/test/perf/unisetperf/draft/bitset.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Unified Diff: icu46/source/test/perf/unisetperf/draft/bitset.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: icu46/source/test/perf/unisetperf/draft/bitset.cpp

===================================================================

--- icu46/source/test/perf/unisetperf/draft/bitset.cpp (revision 0)

+++ icu46/source/test/perf/unisetperf/draft/bitset.cpp (revision 0)

@@ -0,0 +1,197 @@

+/*

+**********************************************************************

+* file name: bitset.cpp

+* encoding: US-ASCII

+* tab size: 8 (not used)

+* indentation:4

+* created on: 2007jan15

+* created by: Markus Scherer

+* Idea for a "compiled", fast, read-only (immutable) version of a UnicodeSet

+* using a folded bit set consisting of a 1k-entry index table and a

+* compacted array of 64-bit words.

+* Uses a simple hash table for compaction.

+* Uses the original set for supplementary code points.

+*/

+#include "unicode/utypes.h"

+#include "unicont.h"

+/*

+ * Hash table for up to 1k 64-bit words, for 1 bit per BMP code point.

+ * Hashes 64-bit words and maps them to 16-bit integers which are

+ * assigned in order of new incoming words for subsequent storage

+ * in a contiguous array.

+ */

+struct BMPBitHash : public UObject {

+ int64_t keys[0x800]; // 2k

+ uint16_t values[0x800];

+ uint16_t reverse[0x400];

+ uint16_t count;

+ const int32_t prime=1301; // Less than 2k.

+ BMPBitHash() : count(0) {

+ // Fill values[] with 0xffff.

+ uprv_memset(values, 0xff, sizeof(values));

+ }

+ /*

+ * Map a key to an integer count.

+ * Map at most 1k=0x400 different keys with this data structure.

+ */

+ uint16_t map(int64_t key) {

+ int32_t hash=(int32_t)(key>>55)&0x1ff;

+ hash^=(int32_t)(key>>44)&0x7ff;

+ hash^=(int32_t)(key>>33)&0x7ff;

+ hash^=(int32_t)(key>>22)&0x7ff;

+ hash^=(int32_t)(key>>11)&0x7ff;

+ hash^=(int32_t)key&0x7ff;

+ for(;;) {

+ if(values[hash]==0xffff) {

+ // Unused slot.

+ keys[hash]=key;

+ reverse[count]=hash;

+ return values[hash]=count++;

+ } else if(keys[hash]==key) {

+ // Found a slot with this key.

+ return values[hash];

+ } else {

+ // Used slot with a different key, move to another slot.

+ hash=(hash+prime)&0x7ff;

+ }

+ uint16_t countKeys() const { return count; }

+ /*

+ * Invert the hash map: Fill an array of length countKeys() with the keys

+ * indexed by their mapped values.

+ */

+ void invert(int64_t *k) const {

+ uint16_t i;

+ for(i=0; i<count; ++i) {

+ k[i]=keys[reverse[i]];

+ }

+};

+class BitSet : public UObject, public UnicodeContainable {

+public:

+ BitSet(const UnicodeSet &set, UErrorCode &errorCode) : bits(shortBits), restSet(set.clone()) {

+ if(U_FAILURE(errorCode)) {

+ return;

+ }

+ BMPBitHash *bitHash=new BMPBitHash;

+ if(bitHash==NULL || restSet==NULL) {

+ errorCode=U_MEMORY_ALLOCATION_ERROR;

+ return;

+ }

+ UnicodeSetIterator iter(set);

+ int64_t b;

+ UChar32 start, end;

+ int32_t prevIndex, i, j;

+ b=0; // Not necessary but makes compilers happy.

+ prevIndex=-1;

+ for(;;) {

+ if(iter.nextRange() && !iter.isString()) {

+ start=iter.getCodepoint();

+ end=iter.getCodepointEnd();

+ } else {

+ start=0x10000;

+ }

+ i=start>>6;

+ if(prevIndex!=i) {

+ // Finish the end of the previous range.

+ if(prevIndex<0) {

+ prevIndex=0;

+ } else {

+ index[prevIndex++]=bitHash->map(b);

+ }

+ // Fill all-zero entries between ranges.

+ if(prevIndex<i) {

+ uint16_t zero=bitHash->map(0);

+ do {

+ index[prevIndex++]=zero;

+ } while(prevIndex<i);

+ }

+ b=0;

+ }

+ if(start>0xffff) {

+ break;

+ }

+ b|=~((INT64_C(1)<<(start&0x3f))-1);

+ j=end>>6;

+ if(i<j) {

+ // Set bits for the start of the range.

+ index[i++]=bitHash->map(b);

+ // Fill all-one entries inside the range.

+ if(i<j) {

+ uint16_t all=bitHash->map(INT64_C(0xffffffffffffffff));

+ do {

+ index[i++]=all;

+ } while(i<j);

+ }

+ b=INT64_C(0xffffffffffffffff);

+ }

+ /* i==j */

+ b&=(INT64_C(1)<<(end&0x3f))-1;

+ prevIndex=j;

+ }

+ if(bitHash->countKeys()>LENGTHOF(shortBits)) {

+ bits=(int64_t *)uprv_malloc(bitHash->countKeys()*8);

+ }

+ if(bits!=NULL) {

+ bitHash->invert(bits);

+ } else {

+ bits=shortBits;

+ errorCode=U_MEMORY_ALLOCATION_ERROR;

+ return;

+ }

+ latin1Set[0]=(uint32_t)bits[0];

+ latin1Set[1]=(uint32_t)(bits[0]>>32);

+ latin1Set[2]=(uint32_t)bits[1];

+ latin1Set[3]=(uint32_t)(bits[1]>>32);

+ latin1Set[4]=(uint32_t)bits[2];

+ latin1Set[5]=(uint32_t)(bits[2]>>32);

+ latin1Set[6]=(uint32_t)bits[3];

+ latin1Set[7]=(uint32_t)(bits[3]>>32);

+ restSet.remove(0, 0xffff);

+ }

+ ~BitSet() {

+ if(bits!=shortBits) {

+ uprv_free(bits);

+ }

+ delete restSet;

+ }

+ UBool contains(UChar32 c) const {

+ if((uint32_t)c<=0xff) {

+ return (UBool)((latin1Set[c>>5]&((uint32_t)1<<(c&0x1f)))!=0);

+ } else if((uint32_t)c<0xffff) {

+ return (UBool)((bits[c>>6]&(INT64_C(1)<<(c&0x3f)))!=0);

+ } else {

+ return restSet->contains(c);

+ }

+private:

+ uint16_t index[0x400];

+ int64_t shortBits[32];

+ int64_t *bits;

+ uint32_t latin1Bits[8];

+ UnicodeSet *restSet;

+};

Property changes on: icu46/source/test/perf/unisetperf/draft/bitset.cpp

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « icu46/source/test/perf/unisetperf/Makefile.in ('k') | icu46/source/test/perf/unisetperf/draft/contperf.bat » ('j') | no next file with comments »