source/test/perf/unisetperf/draft/bitset.cpp - Issue 2435373002: Delete source/test

Side by Side Diff: source/test/perf/unisetperf/draft/bitset.cpp

Issue 2435373002: Delete source/test (Closed)

Patch Set: Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 /*

2 **********************************************************************

3 * Copyright (C) 2014, International Business Machines

4 * Corporation and others. All Rights Reserved.

5 **********************************************************************

6 * file name: bitset.cpp

7 * encoding: US-ASCII

8 * tab size: 8 (not used)

9 * indentation:4

10 *

11 * created on: 2007jan15

12 * created by: Markus Scherer

13 *

14 * Idea for a "compiled", fast, read-only (immutable) version of a UnicodeSet

15 * using a folded bit set consisting of a 1k-entry index table and a

16 * compacted array of 64-bit words.

17 * Uses a simple hash table for compaction.

18 * Uses the original set for supplementary code points.

19 */

20

21 #include "unicode/utypes.h"

22 #include "unicont.h"

23 #include "cmemory.h" // for UPRV_LENGTHOF

24

25 /*

26 * Hash table for up to 1k 64-bit words, for 1 bit per BMP code point.

27 * Hashes 64-bit words and maps them to 16-bit integers which are

28 * assigned in order of new incoming words for subsequent storage

29 * in a contiguous array.

30 */

31 struct BMPBitHash : public UObject {

32 int64_t keys[0x800]; // 2k

33 uint16_t values[0x800];

34 uint16_t reverse[0x400];

35 uint16_t count;

36 const int32_t prime=1301; // Less than 2k.

37

38 BMPBitHash() : count(0) {

39 // Fill values[] with 0xffff.

40 uprv_memset(values, 0xff, sizeof(values));

41 }

42

43 /*

44 * Map a key to an integer count.

45 * Map at most 1k=0x400 different keys with this data structure.

46 */

47 uint16_t map(int64_t key) {

48 int32_t hash=(int32_t)(key>>55)&0x1ff;

49 hash^=(int32_t)(key>>44)&0x7ff;

50 hash^=(int32_t)(key>>33)&0x7ff;

51 hash^=(int32_t)(key>>22)&0x7ff;

52 hash^=(int32_t)(key>>11)&0x7ff;

53 hash^=(int32_t)key&0x7ff;

54 for(;;) {

55 if(values[hash]==0xffff) {

56 // Unused slot.

57 keys[hash]=key;

58 reverse[count]=hash;

59 return values[hash]=count++;

60 } else if(keys[hash]==key) {

61 // Found a slot with this key.

62 return values[hash];

63 } else {

64 // Used slot with a different key, move to another slot.

65 hash=(hash+prime)&0x7ff;

66 }

67 }

68 }

69

70 uint16_t countKeys() const { return count; }

71

72 /*

73 * Invert the hash map: Fill an array of length countKeys() with the keys

74 * indexed by their mapped values.

75 */

76 void invert(int64_t *k) const {

77 uint16_t i;

78

79 for(i=0; i<count; ++i) {

80 k[i]=keys[reverse[i]];

81 }

82 }

83 };

84

85 class BitSet : public UObject, public UnicodeContainable {

86 public:

87 BitSet(const UnicodeSet &set, UErrorCode &errorCode) : bits(shortBits), rest Set(set.clone()) {

88 if(U_FAILURE(errorCode)) {

89 return;

90 }

91 BMPBitHash *bitHash=new BMPBitHash;

92 if(bitHash==NULL \|\| restSet==NULL) {

93 errorCode=U_MEMORY_ALLOCATION_ERROR;

94 return;

95 }

96

97 UnicodeSetIterator iter(set);

98 int64_t b;

99 UChar32 start, end;

100 int32_t prevIndex, i, j;

101

102 b=0; // Not necessary but makes compilers happy.

103 prevIndex=-1;

104 for(;;) {

105 if(iter.nextRange() && !iter.isString()) {

106 start=iter.getCodepoint();

107 end=iter.getCodepointEnd();

108 } else {

109 start=0x10000;

110 }

111 i=start>>6;

112 if(prevIndex!=i) {

113 // Finish the end of the previous range.

114 if(prevIndex<0) {

115 prevIndex=0;

116 } else {

117 index[prevIndex++]=bitHash->map(b);

118 }

119 // Fill all-zero entries between ranges.

120 if(prevIndex<i) {

121 uint16_t zero=bitHash->map(0);

122 do {

123 index[prevIndex++]=zero;

124 } while(prevIndex<i);

125 }

126 b=0;

127 }

128 if(start>0xffff) {

129 break;

130 }

131 b\|=~((INT64_C(1)<<(start&0x3f))-1);

132 j=end>>6;

133 if(i<j) {

134 // Set bits for the start of the range.

135 index[i++]=bitHash->map(b);

136 // Fill all-one entries inside the range.

137 if(i<j) {

138 uint16_t all=bitHash->map(INT64_C(0xffffffffffffffff));

139 do {

140 index[i++]=all;

141 } while(i<j);

142 }

143 b=INT64_C(0xffffffffffffffff);

144 }

145 /* i==j */

146 b&=(INT64_C(1)<<(end&0x3f))-1;

147 prevIndex=j;

148 }

149

150 if(bitHash->countKeys()>UPRV_LENGTHOF(shortBits)) {

151 bits=(int64_t )uprv_malloc(bitHash->countKeys()8);

152 }

153 if(bits!=NULL) {

154 bitHash->invert(bits);

155 } else {

156 bits=shortBits;

157 errorCode=U_MEMORY_ALLOCATION_ERROR;

158 return;

159 }

160

161 latin1Set[0]=(uint32_t)bits[0];

162 latin1Set[1]=(uint32_t)(bits[0]>>32);

163 latin1Set[2]=(uint32_t)bits[1];

164 latin1Set[3]=(uint32_t)(bits[1]>>32);

165 latin1Set[4]=(uint32_t)bits[2];

166 latin1Set[5]=(uint32_t)(bits[2]>>32);

167 latin1Set[6]=(uint32_t)bits[3];

168 latin1Set[7]=(uint32_t)(bits[3]>>32);

169

170 restSet.remove(0, 0xffff);

171 }

172

173 ~BitSet() {

174 if(bits!=shortBits) {

175 uprv_free(bits);

176 }

177 delete restSet;

178 }

179

180 UBool contains(UChar32 c) const {

181 if((uint32_t)c<=0xff) {

182 return (UBool)((latin1Set[c>>5]&((uint32_t)1<<(c&0x1f)))!=0);

183 } else if((uint32_t)c<0xffff) {

184 return (UBool)((bits[c>>6]&(INT64_C(1)<<(c&0x3f)))!=0);

185 } else {

186 return restSet->contains(c);

187 }

188 }

189

190 private:

191 uint16_t index[0x400];

192 int64_t shortBits[32];

193 int64_t *bits;

194

195 uint32_t latin1Bits[8];

196

197 UnicodeSet *restSet;

198 };

OLD	NEW

« no previous file with comments | « source/test/perf/unisetperf/Makefile.in ('k') | source/test/perf/unisetperf/draft/contperf.bat » ('j') | no next file with comments »