icu46/source/test/perf/unisetperf/draft/bitset.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/test/perf/unisetperf/draft/bitset.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 **********************************************************************

	3 * Copyright (C) 2007, International Business Machines

	4 * Corporation and others. All Rights Reserved.

	5 **********************************************************************

	6 * file name: bitset.cpp

	7 * encoding: US-ASCII

	8 * tab size: 8 (not used)

	9 * indentation:4

	10 *

	11 * created on: 2007jan15

	12 * created by: Markus Scherer

	13 *

	14 * Idea for a "compiled", fast, read-only (immutable) version of a UnicodeSet

	15 * using a folded bit set consisting of a 1k-entry index table and a

	16 * compacted array of 64-bit words.

	17 * Uses a simple hash table for compaction.

	18 * Uses the original set for supplementary code points.

	19 */

	20

	21 #include "unicode/utypes.h"

	22 #include "unicont.h"

	23

	24 /*

	25 * Hash table for up to 1k 64-bit words, for 1 bit per BMP code point.

	26 * Hashes 64-bit words and maps them to 16-bit integers which are

	27 * assigned in order of new incoming words for subsequent storage

	28 * in a contiguous array.

	29 */

	30 struct BMPBitHash : public UObject {

	31 int64_t keys[0x800]; // 2k

	32 uint16_t values[0x800];

	33 uint16_t reverse[0x400];

	34 uint16_t count;

	35 const int32_t prime=1301; // Less than 2k.

	36

	37 BMPBitHash() : count(0) {

	38 // Fill values[] with 0xffff.

	39 uprv_memset(values, 0xff, sizeof(values));

	40 }

	41

	42 /*

	43 * Map a key to an integer count.

	44 * Map at most 1k=0x400 different keys with this data structure.

	45 */

	46 uint16_t map(int64_t key) {

	47 int32_t hash=(int32_t)(key>>55)&0x1ff;

	48 hash^=(int32_t)(key>>44)&0x7ff;

	49 hash^=(int32_t)(key>>33)&0x7ff;

	50 hash^=(int32_t)(key>>22)&0x7ff;

	51 hash^=(int32_t)(key>>11)&0x7ff;

	52 hash^=(int32_t)key&0x7ff;

	53 for(;;) {

	54 if(values[hash]==0xffff) {

	55 // Unused slot.

	56 keys[hash]=key;

	57 reverse[count]=hash;

	58 return values[hash]=count++;

	59 } else if(keys[hash]==key) {

	60 // Found a slot with this key.

	61 return values[hash];

	62 } else {

	63 // Used slot with a different key, move to another slot.

	64 hash=(hash+prime)&0x7ff;

	65 }

	66 }

	67 }

	68

	69 uint16_t countKeys() const { return count; }

	70

	71 /*

	72 * Invert the hash map: Fill an array of length countKeys() with the keys

	73 * indexed by their mapped values.

	74 */

	75 void invert(int64_t *k) const {

	76 uint16_t i;

	77

	78 for(i=0; i<count; ++i) {

	79 k[i]=keys[reverse[i]];

	80 }

	81 }

	82 };

	83

	84 class BitSet : public UObject, public UnicodeContainable {

	85 public:

	86 BitSet(const UnicodeSet &set, UErrorCode &errorCode) : bits(shortBits), rest Set(set.clone()) {

	87 if(U_FAILURE(errorCode)) {

	88 return;

	89 }

	90 BMPBitHash *bitHash=new BMPBitHash;

	91 if(bitHash==NULL \|\| restSet==NULL) {

	92 errorCode=U_MEMORY_ALLOCATION_ERROR;

	93 return;

	94 }

	95

	96 UnicodeSetIterator iter(set);

	97 int64_t b;

	98 UChar32 start, end;

	99 int32_t prevIndex, i, j;

	100

	101 b=0; // Not necessary but makes compilers happy.

	102 prevIndex=-1;

	103 for(;;) {

	104 if(iter.nextRange() && !iter.isString()) {

	105 start=iter.getCodepoint();

	106 end=iter.getCodepointEnd();

	107 } else {

	108 start=0x10000;

	109 }

	110 i=start>>6;

	111 if(prevIndex!=i) {

	112 // Finish the end of the previous range.

	113 if(prevIndex<0) {

	114 prevIndex=0;

	115 } else {

	116 index[prevIndex++]=bitHash->map(b);

	117 }

	118 // Fill all-zero entries between ranges.

	119 if(prevIndex<i) {

	120 uint16_t zero=bitHash->map(0);

	121 do {

	122 index[prevIndex++]=zero;

	123 } while(prevIndex<i);

	124 }

	125 b=0;

	126 }

	127 if(start>0xffff) {

	128 break;

	129 }

	130 b\|=~((INT64_C(1)<<(start&0x3f))-1);

	131 j=end>>6;

	132 if(i<j) {

	133 // Set bits for the start of the range.

	134 index[i++]=bitHash->map(b);

	135 // Fill all-one entries inside the range.

	136 if(i<j) {

	137 uint16_t all=bitHash->map(INT64_C(0xffffffffffffffff));

	138 do {

	139 index[i++]=all;

	140 } while(i<j);

	141 }

	142 b=INT64_C(0xffffffffffffffff);

	143 }

	144 /* i==j */

	145 b&=(INT64_C(1)<<(end&0x3f))-1;

	146 prevIndex=j;

	147 }

	148

	149 if(bitHash->countKeys()>LENGTHOF(shortBits)) {

	150 bits=(int64_t )uprv_malloc(bitHash->countKeys()8);

	151 }

	152 if(bits!=NULL) {

	153 bitHash->invert(bits);

	154 } else {

	155 bits=shortBits;

	156 errorCode=U_MEMORY_ALLOCATION_ERROR;

	157 return;

	158 }

	159

	160 latin1Set[0]=(uint32_t)bits[0];

	161 latin1Set[1]=(uint32_t)(bits[0]>>32);

	162 latin1Set[2]=(uint32_t)bits[1];

	163 latin1Set[3]=(uint32_t)(bits[1]>>32);

	164 latin1Set[4]=(uint32_t)bits[2];

	165 latin1Set[5]=(uint32_t)(bits[2]>>32);

	166 latin1Set[6]=(uint32_t)bits[3];

	167 latin1Set[7]=(uint32_t)(bits[3]>>32);

	168

	169 restSet.remove(0, 0xffff);

	170 }

	171

	172 ~BitSet() {

	173 if(bits!=shortBits) {

	174 uprv_free(bits);

	175 }

	176 delete restSet;

	177 }

	178

	179 UBool contains(UChar32 c) const {

	180 if((uint32_t)c<=0xff) {

	181 return (UBool)((latin1Set[c>>5]&((uint32_t)1<<(c&0x1f)))!=0);

	182 } else if((uint32_t)c<0xffff) {

	183 return (UBool)((bits[c>>6]&(INT64_C(1)<<(c&0x3f)))!=0);

	184 } else {

	185 return restSet->contains(c);

	186 }

	187 }

	188

	189 private:

	190 uint16_t index[0x400];

	191 int64_t shortBits[32];

	192 int64_t *bits;

	193

	194 uint32_t latin1Bits[8];

	195

	196 UnicodeSet *restSet;

	197 };

OLD	NEW

« no previous file with comments | « icu46/source/test/perf/unisetperf/Makefile.in ('k') | icu46/source/test/perf/unisetperf/draft/contperf.bat » ('j') | no next file with comments »