Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Unified Diff: icu46/source/test/perf/unisetperf/draft/bitset.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/
Patch Set: Created 10 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « icu46/source/test/perf/unisetperf/Makefile.in ('k') | icu46/source/test/perf/unisetperf/draft/contperf.bat » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: icu46/source/test/perf/unisetperf/draft/bitset.cpp
===================================================================
--- icu46/source/test/perf/unisetperf/draft/bitset.cpp (revision 0)
+++ icu46/source/test/perf/unisetperf/draft/bitset.cpp (revision 0)
@@ -0,0 +1,197 @@
+/*
+**********************************************************************
+* Copyright (C) 2007, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* file name: bitset.cpp
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2007jan15
+* created by: Markus Scherer
+*
+* Idea for a "compiled", fast, read-only (immutable) version of a UnicodeSet
+* using a folded bit set consisting of a 1k-entry index table and a
+* compacted array of 64-bit words.
+* Uses a simple hash table for compaction.
+* Uses the original set for supplementary code points.
+*/
+
+#include "unicode/utypes.h"
+#include "unicont.h"
+
+/*
+ * Hash table for up to 1k 64-bit words, for 1 bit per BMP code point.
+ * Hashes 64-bit words and maps them to 16-bit integers which are
+ * assigned in order of new incoming words for subsequent storage
+ * in a contiguous array.
+ */
+struct BMPBitHash : public UObject {
+ int64_t keys[0x800]; // 2k
+ uint16_t values[0x800];
+ uint16_t reverse[0x400];
+ uint16_t count;
+ const int32_t prime=1301; // Less than 2k.
+
+ BMPBitHash() : count(0) {
+ // Fill values[] with 0xffff.
+ uprv_memset(values, 0xff, sizeof(values));
+ }
+
+ /*
+ * Map a key to an integer count.
+ * Map at most 1k=0x400 different keys with this data structure.
+ */
+ uint16_t map(int64_t key) {
+ int32_t hash=(int32_t)(key>>55)&0x1ff;
+ hash^=(int32_t)(key>>44)&0x7ff;
+ hash^=(int32_t)(key>>33)&0x7ff;
+ hash^=(int32_t)(key>>22)&0x7ff;
+ hash^=(int32_t)(key>>11)&0x7ff;
+ hash^=(int32_t)key&0x7ff;
+ for(;;) {
+ if(values[hash]==0xffff) {
+ // Unused slot.
+ keys[hash]=key;
+ reverse[count]=hash;
+ return values[hash]=count++;
+ } else if(keys[hash]==key) {
+ // Found a slot with this key.
+ return values[hash];
+ } else {
+ // Used slot with a different key, move to another slot.
+ hash=(hash+prime)&0x7ff;
+ }
+ }
+ }
+
+ uint16_t countKeys() const { return count; }
+
+ /*
+ * Invert the hash map: Fill an array of length countKeys() with the keys
+ * indexed by their mapped values.
+ */
+ void invert(int64_t *k) const {
+ uint16_t i;
+
+ for(i=0; i<count; ++i) {
+ k[i]=keys[reverse[i]];
+ }
+ }
+};
+
+class BitSet : public UObject, public UnicodeContainable {
+public:
+ BitSet(const UnicodeSet &set, UErrorCode &errorCode) : bits(shortBits), restSet(set.clone()) {
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ BMPBitHash *bitHash=new BMPBitHash;
+ if(bitHash==NULL || restSet==NULL) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ UnicodeSetIterator iter(set);
+ int64_t b;
+ UChar32 start, end;
+ int32_t prevIndex, i, j;
+
+ b=0; // Not necessary but makes compilers happy.
+ prevIndex=-1;
+ for(;;) {
+ if(iter.nextRange() && !iter.isString()) {
+ start=iter.getCodepoint();
+ end=iter.getCodepointEnd();
+ } else {
+ start=0x10000;
+ }
+ i=start>>6;
+ if(prevIndex!=i) {
+ // Finish the end of the previous range.
+ if(prevIndex<0) {
+ prevIndex=0;
+ } else {
+ index[prevIndex++]=bitHash->map(b);
+ }
+ // Fill all-zero entries between ranges.
+ if(prevIndex<i) {
+ uint16_t zero=bitHash->map(0);
+ do {
+ index[prevIndex++]=zero;
+ } while(prevIndex<i);
+ }
+ b=0;
+ }
+ if(start>0xffff) {
+ break;
+ }
+ b|=~((INT64_C(1)<<(start&0x3f))-1);
+ j=end>>6;
+ if(i<j) {
+ // Set bits for the start of the range.
+ index[i++]=bitHash->map(b);
+ // Fill all-one entries inside the range.
+ if(i<j) {
+ uint16_t all=bitHash->map(INT64_C(0xffffffffffffffff));
+ do {
+ index[i++]=all;
+ } while(i<j);
+ }
+ b=INT64_C(0xffffffffffffffff);
+ }
+ /* i==j */
+ b&=(INT64_C(1)<<(end&0x3f))-1;
+ prevIndex=j;
+ }
+
+ if(bitHash->countKeys()>LENGTHOF(shortBits)) {
+ bits=(int64_t *)uprv_malloc(bitHash->countKeys()*8);
+ }
+ if(bits!=NULL) {
+ bitHash->invert(bits);
+ } else {
+ bits=shortBits;
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ latin1Set[0]=(uint32_t)bits[0];
+ latin1Set[1]=(uint32_t)(bits[0]>>32);
+ latin1Set[2]=(uint32_t)bits[1];
+ latin1Set[3]=(uint32_t)(bits[1]>>32);
+ latin1Set[4]=(uint32_t)bits[2];
+ latin1Set[5]=(uint32_t)(bits[2]>>32);
+ latin1Set[6]=(uint32_t)bits[3];
+ latin1Set[7]=(uint32_t)(bits[3]>>32);
+
+ restSet.remove(0, 0xffff);
+ }
+
+ ~BitSet() {
+ if(bits!=shortBits) {
+ uprv_free(bits);
+ }
+ delete restSet;
+ }
+
+ UBool contains(UChar32 c) const {
+ if((uint32_t)c<=0xff) {
+ return (UBool)((latin1Set[c>>5]&((uint32_t)1<<(c&0x1f)))!=0);
+ } else if((uint32_t)c<0xffff) {
+ return (UBool)((bits[c>>6]&(INT64_C(1)<<(c&0x3f)))!=0);
+ } else {
+ return restSet->contains(c);
+ }
+ }
+
+private:
+ uint16_t index[0x400];
+ int64_t shortBits[32];
+ int64_t *bits;
+
+ uint32_t latin1Bits[8];
+
+ UnicodeSet *restSet;
+};
Property changes on: icu46/source/test/perf/unisetperf/draft/bitset.cpp
___________________________________________________________________
Added: svn:eol-style
+ LF
« no previous file with comments | « icu46/source/test/perf/unisetperf/Makefile.in ('k') | icu46/source/test/perf/unisetperf/draft/contperf.bat » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698