source/i18n/coleitr.cpp - Issue 845603002: Update ICU to 54.1 step 1

Side by Side Diff: source/i18n/coleitr.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master

Patch Set: remove unusued directories Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 *******************************************************************************	2 *******************************************************************************

3 * Copyright (C) 1996-2011, International Business Machines Corporation and *	3 * Copyright (C) 1996-2014, International Business Machines Corporation and

4 * others. All Rights Reserved. *	4 * others. All Rights Reserved.

5 *******************************************************************************	5 *******************************************************************************

6 */	6 */

7	7

8 /*	8 /*

9 * File coleitr.cpp	9 * File coleitr.cpp

10 *	10 *

11 *

12 *

13 * Created by: Helena Shih	11 * Created by: Helena Shih

14 *	12 *

15 * Modification History:	13 * Modification History:

16 *	14 *

17 * Date Name Description	15 * Date Name Description

18 *	16 *

19 * 6/23/97 helena Adding comments to make code more readable.	17 * 6/23/97 helena Adding comments to make code more readable.

20 * 08/03/98 erm Synched with 1.2 version of CollationElementIterator.ja va	18 * 08/03/98 erm Synched with 1.2 version of CollationElementIterator.ja va

21 * 12/10/99 aliu Ported Thai collation support from Java.	19 * 12/10/99 aliu Ported Thai collation support from Java.

22 * 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h)	20 * 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h)

23 * 02/19/01 swquek Removed CollationElementsIterator() since it is	21 * 02/19/01 swquek Removed CollationElementIterator() since it is

24 * private constructor and no calls are made to it	22 * private constructor and no calls are made to it

	23 * 2012-2014 markus Rewritten in C++ again.

25 */	24 */

26	25

27 #include "unicode/utypes.h"	26 #include "unicode/utypes.h"

28	27

29 #if !UCONFIG_NO_COLLATION	28 #if !UCONFIG_NO_COLLATION

30	29

31 #include "unicode/coleitr.h"	30 #include "unicode/coleitr.h"

	31 #include "unicode/tblcoll.h"

32 #include "unicode/ustring.h"	32 #include "unicode/ustring.h"

33 #include "ucol_imp.h"	33 #include "cmemory.h"

	34 #include "collation.h"

	35 #include "collationdata.h"

	36 #include "collationiterator.h"

	37 #include "collationsets.h"

	38 #include "collationtailoring.h"

34 #include "uassert.h"	39 #include "uassert.h"

35 #include "cmemory.h"	40 #include "uhash.h"

36	41 #include "utf16collationiterator.h"

	42 #include "uvectr32.h"

37	43

38 /* Constants --------------------------------------------------------------- */	44 /* Constants --------------------------------------------------------------- */

39	45

40 U_NAMESPACE_BEGIN	46 U_NAMESPACE_BEGIN

41	47

42 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)	48 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)

43	49

44 /* CollationElementIterator public constructor/destructor ------------------ */	50 /* CollationElementIterator public constructor/destructor ------------------ */

45	51

46 CollationElementIterator::CollationElementIterator(	52 CollationElementIterator::CollationElementIterator(

47 const CollationElementIterator& other)	53 const CollationElementIterator& other)

48 : UObject(other), isDataOwned_(TRUE)	54 : UObject(other), iter_(NULL), rbc_(NULL), otherHalf_(0), dir_(0), offse ts_(NULL) {

49 {

50 UErrorCode status = U_ZERO_ERROR;

51 m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0,

52 &status);

53

54 *this = other;	55 *this = other;

55 }	56 }

56	57

57 CollationElementIterator::~CollationElementIterator()	58 CollationElementIterator::~CollationElementIterator()

58 {	59 {

59 if (isDataOwned_) {	60 delete iter_;

60 ucol_closeElements(m_data_);	61 delete offsets_;

61 }

62 }	62 }

63	63

64 /* CollationElementIterator public methods --------------------------------- */	64 /* CollationElementIterator public methods --------------------------------- */

65	65

	66 namespace {

	67

	68 uint32_t getFirstHalf(uint32_t p, uint32_t lower32) {

	69 return (p & 0xffff0000) \| ((lower32 >> 16) & 0xff00) \| ((lower32 >> 8) & 0xf f);

	70 }

	71 uint32_t getSecondHalf(uint32_t p, uint32_t lower32) {

	72 return (p << 16) \| ((lower32 >> 8) & 0xff00) \| (lower32 & 0x3f);

	73 }

	74 UBool ceNeedsTwoParts(int64_t ce) {

	75 return (ce & INT64_C(0xffff00ff003f)) != 0;

	76 }

	77

	78 } // namespace

	79

66 int32_t CollationElementIterator::getOffset() const	80 int32_t CollationElementIterator::getOffset() const

67 {	81 {

68 return ucol_getOffset(m_data_);	82 if (dir_ < 0 && offsets_ != NULL && !offsets_->isEmpty()) {

	83 // CollationIterator::previousCE() decrements the CEs length

	84 // while it pops CEs from its internal buffer.

	85 int32_t i = iter_->getCEsLength();

	86 if (otherHalf_ != 0) {

	87 // Return the trailing CE offset while we are in the middle of a 64- bit CE.

	88 ++i;

	89 }

	90 U_ASSERT(i < offsets_->size());

	91 return offsets_->elementAti(i);

	92 }

	93 return iter_->getOffset();

69 }	94 }

70	95

71 /**	96 /**

72 * Get the ordering priority of the next character in the string.	97 * Get the ordering priority of the next character in the string.

73 * @return the next character's ordering. Returns NULLORDER if an error has	98 * @return the next character's ordering. Returns NULLORDER if an error has

74 * occured or if the end of string has been reached	99 * occured or if the end of string has been reached

75 */	100 */

76 int32_t CollationElementIterator::next(UErrorCode& status)	101 int32_t CollationElementIterator::next(UErrorCode& status)

77 {	102 {

78 return ucol_next(m_data_, &status);	103 if (U_FAILURE(status)) { return NULLORDER; }

	104 if (dir_ > 1) {

	105 // Continue forward iteration. Test this first.

	106 if (otherHalf_ != 0) {

	107 uint32_t oh = otherHalf_;

	108 otherHalf_ = 0;

	109 return oh;

	110 }

	111 } else if (dir_ == 1) {

	112 // next() after setOffset()

	113 dir_ = 2;

	114 } else if (dir_ == 0) {

	115 // The iter_ is already reset to the start of the text.

	116 dir_ = 2;

	117 } else /* dir_ < 0 */ {

	118 // illegal change of direction

	119 status = U_INVALID_STATE_ERROR;

	120 return NULLORDER;

	121 }

	122 // No need to keep all CEs in the buffer when we iterate.

	123 iter_->clearCEsIfNoneRemaining();

	124 int64_t ce = iter_->nextCE(status);

	125 if (ce == Collation::NO_CE) { return NULLORDER; }

	126 // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits .

	127 uint32_t p = (uint32_t)(ce >> 32);

	128 uint32_t lower32 = (uint32_t)ce;

	129 uint32_t firstHalf = getFirstHalf(p, lower32);

	130 uint32_t secondHalf = getSecondHalf(p, lower32);

	131 if (secondHalf != 0) {

	132 otherHalf_ = secondHalf \| 0xc0; // continuation CE

	133 }

	134 return firstHalf;

79 }	135 }

80	136

81 UBool CollationElementIterator::operator!=(	137 UBool CollationElementIterator::operator!=(

82 const CollationElementIterator& other) const	138 const CollationElementIterator& other) const

83 {	139 {

84 return !(*this == other);	140 return !(*this == other);

85 }	141 }

86	142

87 UBool CollationElementIterator::operator==(	143 UBool CollationElementIterator::operator==(

88 const CollationElementIterator& that) const	144 const CollationElementIterator& that) const

89 {	145 {

90 if (this == &that \|\| m_data_ == that.m_data_) {	146 if (this == &that) {

91 return TRUE;	147 return TRUE;

92 }	148 }

93	149

94 // option comparison	150 return

95 if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)	151 (rbc_ == that.rbc_ \|\| rbc_ == that.rbc_) &&

96 {	152 otherHalf_ == that.otherHalf_ &&

97 return FALSE;	153 normalizeDir() == that.normalizeDir() &&

98 }	154 string_ == that.string_ &&

99	155 iter_ == that.iter_;

100 // the constructor and setText always sets a length

101 // and we only compare the string not the contents of the normalization

102 // buffer

103 int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_. string);

104 int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iter atordata_.string);

105

106 if (thislength != thatlength) {

107 return FALSE;

108 }

109

110 if (uprv_memcmp(m_data_->iteratordata_.string,

111 that.m_data_->iteratordata_.string,

112 thislength * U_SIZEOF_UCHAR) != 0) {

113 return FALSE;

114 }

115 if (getOffset() != that.getOffset()) {

116 return FALSE;

117 }

118

119 // checking normalization buffer

120 if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {

121 if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {

122 return FALSE;

123 }

124 // both are in the normalization buffer

125 if (m_data_->iteratordata_.pos

126 - m_data_->iteratordata_.writableBuffer.getBuffer()

127 != that.m_data_->iteratordata_.pos

128 - that.m_data_->iteratordata_.writableBuffer.getBuffer()) {

129 // not in the same position in the normalization buffer

130 return FALSE;

131 }

132 }

133 else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {

134 return FALSE;

135 }

136 // checking ce position

137 return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)

138 == (that.m_data_->iteratordata_.CEpos

139 - that.m_data_->iteratordata_.CEs);

140 }	156 }

141	157

142 /**	158 /**

143 * Get the ordering priority of the previous collation element in the string.	159 * Get the ordering priority of the previous collation element in the string.

144 * @param status the error code status.	160 * @param status the error code status.

145 * @return the previous element's ordering. Returns NULLORDER if an error has	161 * @return the previous element's ordering. Returns NULLORDER if an error has

146 * occured or if the start of string has been reached.	162 * occured or if the start of string has been reached.

147 */	163 */

148 int32_t CollationElementIterator::previous(UErrorCode& status)	164 int32_t CollationElementIterator::previous(UErrorCode& status)

149 {	165 {

150 return ucol_previous(m_data_, &status);	166 if (U_FAILURE(status)) { return NULLORDER; }

	167 if (dir_ < 0) {

	168 // Continue backwards iteration. Test this first.

	169 if (otherHalf_ != 0) {

	170 uint32_t oh = otherHalf_;

	171 otherHalf_ = 0;

	172 return oh;

	173 }

	174 } else if (dir_ == 0) {

	175 iter_->resetToOffset(string_.length());

	176 dir_ = -1;

	177 } else if (dir_ == 1) {

	178 // previous() after setOffset()

	179 dir_ = -1;

	180 } else /* dir_ > 1 */ {

	181 // illegal change of direction

	182 status = U_INVALID_STATE_ERROR;

	183 return NULLORDER;

	184 }

	185 if (offsets_ == NULL) {

	186 offsets_ = new UVector32(status);

	187 if (offsets_ == NULL) {

	188 status = U_MEMORY_ALLOCATION_ERROR;

	189 return NULLORDER;

	190 }

	191 }

	192 // If we already have expansion CEs, then we also have offsets.

	193 // Otherwise remember the trailing offset in case we need to

	194 // write offsets for an artificial expansion.

	195 int32_t limitOffset = iter_->getCEsLength() == 0 ? iter_->getOffset() : 0;

	196 int64_t ce = iter_->previousCE(*offsets_, status);

	197 if (ce == Collation::NO_CE) { return NULLORDER; }

	198 // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits .

	199 uint32_t p = (uint32_t)(ce >> 32);

	200 uint32_t lower32 = (uint32_t)ce;

	201 uint32_t firstHalf = getFirstHalf(p, lower32);

	202 uint32_t secondHalf = getSecondHalf(p, lower32);

	203 if (secondHalf != 0) {

	204 if (offsets_->isEmpty()) {

	205 // When we convert a single 64-bit CE into two 32-bit CEs,

	206 // we need to make this artificial expansion behave like a normal ex pansion.

	207 // See CollationIterator::previousCE().

	208 offsets_->addElement(iter_->getOffset(), status);

	209 offsets_->addElement(limitOffset, status);

	210 }

	211 otherHalf_ = firstHalf;

	212 return secondHalf \| 0xc0; // continuation CE

	213 }

	214 return firstHalf;

151 }	215 }

152	216

153 /**	217 /**

154 * Resets the cursor to the beginning of the string.	218 * Resets the cursor to the beginning of the string.

155 */	219 */

156 void CollationElementIterator::reset()	220 void CollationElementIterator::reset()

157 {	221 {

158 ucol_reset(m_data_);	222 iter_ ->resetToOffset(0);

	223 otherHalf_ = 0;

	224 dir_ = 0;

159 }	225 }

160	226

161 void CollationElementIterator::setOffset(int32_t newOffset,	227 void CollationElementIterator::setOffset(int32_t newOffset,

162 UErrorCode& status)	228 UErrorCode& status)

163 {	229 {

164 ucol_setOffset(m_data_, newOffset, &status);	230 if (U_FAILURE(status)) { return; }

	231 if (0 < newOffset && newOffset < string_.length()) {

	232 int32_t offset = newOffset;

	233 do {

	234 UChar c = string_.charAt(offset);

	235 if (!rbc_->isUnsafe(c) \|\|

	236 (U16_IS_LEAD(c) && !rbc_->isUnsafe(string_.char32At(offset)) )) {

	237 break;

	238 }

	239 // Back up to before this unsafe character.

	240 --offset;

	241 } while (offset > 0);

	242 if (offset < newOffset) {

	243 // We might have backed up more than necessary.

	244 // For example, contractions "ch" and "cu" make both 'h' and 'u' uns afe,

	245 // but for text "chu" setOffset(2) should remain at 2

	246 // although we initially back up to offset 0.

	247 // Find the last safe offset no greater than newOffset by iterating forward.

	248 int32_t lastSafeOffset = offset;

	249 do {

	250 iter_->resetToOffset(lastSafeOffset);

	251 do {

	252 iter_->nextCE(status);

	253 if (U_FAILURE(status)) { return; }

	254 } while ((offset = iter_->getOffset()) == lastSafeOffset);

	255 if (offset <= newOffset) {

	256 lastSafeOffset = offset;

	257 }

	258 } while (offset < newOffset);

	259 newOffset = lastSafeOffset;

	260 }

	261 }

	262 iter_->resetToOffset(newOffset);

	263 otherHalf_ = 0;

	264 dir_ = 1;

165 }	265 }

166	266

167 /**	267 /**

168 * Sets the source to the new source string.	268 * Sets the source to the new source string.

169 */	269 */

170 void CollationElementIterator::setText(const UnicodeString& source,	270 void CollationElementIterator::setText(const UnicodeString& source,

171 UErrorCode& status)	271 UErrorCode& status)

172 {	272 {

173 if (U_FAILURE(status)) {	273 if (U_FAILURE(status)) {

174 return;	274 return;

175 }	275 }

176	276

177 int32_t length = source.length();	277 string_ = source;

178 UChar *string = NULL;	278 const UChar *s = string_.getBuffer();

179 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {	279 CollationIterator *newIter;

180 uprv_free((UChar *)m_data_->iteratordata_.string);	280 UBool numeric = rbc_->settings->isNumeric();

	281 if (rbc_->settings->dontCheckFCD()) {

	282 newIter = new UTF16CollationIterator(rbc_->data, numeric, s, s, s + stri ng_.length());

	283 } else {

	284 newIter = new FCDUTF16CollationIterator(rbc_->data, numeric, s, s, s + s tring_.length());

181 }	285 }

182 m_data_->isWritable = TRUE;	286 if (newIter == NULL) {

183 if (length > 0) {	287 status = U_MEMORY_ALLOCATION_ERROR;

184 string = (UChar )uprv_malloc(U_SIZEOF_UCHAR length);	288 return;

185 /* test for NULL */

186 if (string == NULL) {

187 status = U_MEMORY_ALLOCATION_ERROR;

188 return;

189 }

190 u_memcpy(string, source.getBuffer(), length);

191 }	289 }

192 else {	290 delete iter_;

193 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);	291 iter_ = newIter;

194 /* test for NULL */	292 otherHalf_ = 0;

195 if (string == NULL) {	293 dir_ = 0;

196 status = U_MEMORY_ALLOCATION_ERROR;

197 return;

198 }

199 *string = 0;

200 }

201 /* Free offsetBuffer before initializing it. */

202 ucol_freeOffsetBuffer(&(m_data_->iteratordata_));

203 uprv_init_collIterate(m_data_->iteratordata_.coll, string, length,

204 &m_data_->iteratordata_, &status);

205

206 m_data_->reset_ = TRUE;

207 }	294 }

208	295

209 // Sets the source to the new character iterator.	296 // Sets the source to the new character iterator.

210 void CollationElementIterator::setText(CharacterIterator& source,	297 void CollationElementIterator::setText(CharacterIterator& source,

211 UErrorCode& status)	298 UErrorCode& status)

212 {	299 {

213 if (U_FAILURE(status))	300 if (U_FAILURE(status))

214 return;	301 return;

215	302

216 int32_t length = source.getLength();	303 source.getText(string_);

217 UChar *buffer = NULL;	304 setText(string_, status);

218

219 if (length == 0) {

220 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);

221 /* test for NULL */

222 if (buffer == NULL) {

223 status = U_MEMORY_ALLOCATION_ERROR;

224 return;

225 }

226 *buffer = 0;

227 }

228 else {

229 buffer = (UChar )uprv_malloc(U_SIZEOF_UCHAR length);

230 /* test for NULL */

231 if (buffer == NULL) {

232 status = U_MEMORY_ALLOCATION_ERROR;

233 return;

234 }

235 /*

236 Using this constructor will prevent buffer from being removed when

237 string gets removed

238 */

239 UnicodeString string;

240 source.getText(string);

241 u_memcpy(buffer, string.getBuffer(), length);

242 }

243

244 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {

245 uprv_free((UChar *)m_data_->iteratordata_.string);

246 }

247 m_data_->isWritable = TRUE;

248 /* Free offsetBuffer before initializing it. */

249 ucol_freeOffsetBuffer(&(m_data_->iteratordata_));

250 uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length,

251 &m_data_->iteratordata_, &status);

252 m_data_->reset_ = TRUE;

253 }	305 }

254	306

255 int32_t CollationElementIterator::strengthOrder(int32_t order) const	307 int32_t CollationElementIterator::strengthOrder(int32_t order) const

256 {	308 {

257 UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);	309 UColAttributeValue s = (UColAttributeValue)rbc_->settings->getStrength();

258 // Mask off the unwanted differences.	310 // Mask off the unwanted differences.

259 if (s == UCOL_PRIMARY) {	311 if (s == UCOL_PRIMARY) {

260 order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;	312 order &= 0xffff0000;

261 }	313 }

262 else if (s == UCOL_SECONDARY) {	314 else if (s == UCOL_SECONDARY) {

263 order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;	315 order &= 0xffffff00;

264 }	316 }

265	317

266 return order;	318 return order;

267 }	319 }

268	320

269 /* CollationElementIterator private constructors/destructors --------------- */	321 /* CollationElementIterator private constructors/destructors --------------- */

270	322

271 /**	323 /**

272 * This is the "real" constructor for this class; it constructs an iterator	324 * This is the "real" constructor for this class; it constructs an iterator

273 * over the source text using the specified collator	325 * over the source text using the specified collator

274 */	326 */

275 CollationElementIterator::CollationElementIterator(	327 CollationElementIterator::CollationElementIterator(

276 const UnicodeString& sourceText,	328 const UnicodeString &source,

277 const RuleBasedCollator* order,	329 const RuleBasedCollator *coll,

278 UErrorCode& status)	330 UErrorCode &status)

279 : isDataOwned_(TRUE)	331 : iter_(NULL), rbc_(coll), otherHalf_(0), dir_(0), offsets_(NULL) {

280 {	332 setText(source, status);

281 if (U_FAILURE(status)) {

282 return;

283 }

284

285 int32_t length = sourceText.length();

286 UChar *string = NULL;

287

288 if (length > 0) {

289 string = (UChar )uprv_malloc(U_SIZEOF_UCHAR length);

290 /* test for NULL */

291 if (string == NULL) {

292 status = U_MEMORY_ALLOCATION_ERROR;

293 return;

294 }

295 /*

296 Using this constructor will prevent buffer from being removed when

297 string gets removed

298 */

299 u_memcpy(string, sourceText.getBuffer(), length);

300 }

301 else {

302 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);

303 /* test for NULL */

304 if (string == NULL) {

305 status = U_MEMORY_ALLOCATION_ERROR;

306 return;

307 }

308 *string = 0;

309 }

310 m_data_ = ucol_openElements(order->ucollator, string, length, &status);

311

312 /* Test for buffer overflows */

313 if (U_FAILURE(status)) {

314 return;

315 }

316 m_data_->isWritable = TRUE;

317 }	333 }

318	334

319 /**	335 /**

320 * This is the "real" constructor for this class; it constructs an iterator over	336 * This is the "real" constructor for this class; it constructs an iterator over

321 * the source text using the specified collator	337 * the source text using the specified collator

322 */	338 */

323 CollationElementIterator::CollationElementIterator(	339 CollationElementIterator::CollationElementIterator(

324 const CharacterIterator& sourceText,	340 const CharacterIterator &source,

325 const RuleBasedCollator* order,	341 const RuleBasedCollator *coll,

326 UErrorCode& status)	342 UErrorCode &status)

327 : isDataOwned_(TRUE)	343 : iter_(NULL), rbc_(coll), otherHalf_(0), dir_(0), offsets_(NULL) {

328 {	344 // We only call source.getText() which should be const anyway.

329 if (U_FAILURE(status))	345 setText(const_cast<CharacterIterator &>(source), status);

330 return;

331

332 // ** should I just drop this test? **

333 /*

334 if ( sourceText.endIndex() != 0 )

335 {

336 // A CollationElementIterator is really a two-layered beast.

337 // Internally it uses a Normalizer to munge the source text into a form

338 // where all "composed" Unicode characters (such as \u00FC) are split in to a

339 // normal character and a combining accent character.

340 // Afterward, CollationElementIterator does its own processing to handle

341 // expanding and contracting collation sequences, ignorables, and so on.

342

343 Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL

344 ? Normalizer::NO_OP : order->getDecomposition();

345

346 text = new Normalizer(sourceText, decomp);

347 if (text == NULL)

348 status = U_MEMORY_ALLOCATION_ERROR;

349 }

350 */

351 int32_t length = sourceText.getLength();

352 UChar *buffer;

353 if (length > 0) {

354 buffer = (UChar )uprv_malloc(U_SIZEOF_UCHAR length);

355 /* test for NULL */

356 if (buffer == NULL) {

357 status = U_MEMORY_ALLOCATION_ERROR;

358 return;

359 }

360 /*

361 Using this constructor will prevent buffer from being removed when

362 string gets removed

363 */

364 UnicodeString string(buffer, length, length);

365 ((CharacterIterator &)sourceText).getText(string);

366 const UChar *temp = string.getBuffer();

367 u_memcpy(buffer, temp, length);

368 }

369 else {

370 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);

371 /* test for NULL */

372 if (buffer == NULL) {

373 status = U_MEMORY_ALLOCATION_ERROR;

374 return;

375 }

376 *buffer = 0;

377 }

378 m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);

379

380 /* Test for buffer overflows */

381 if (U_FAILURE(status)) {

382 return;

383 }

384 m_data_->isWritable = TRUE;

385 }	346 }

386	347

387 /* CollationElementIterator protected methods ----------------------------- */	348 /* CollationElementIterator private methods -------------------------------- */

388	349

389 const CollationElementIterator& CollationElementIterator::operator=(	350 const CollationElementIterator& CollationElementIterator::operator=(

390 const CollationElementIterator& other)	351 const CollationElementIterator& other)

391 {	352 {

392 if (this != &other)	353 if (this == &other) {

393 {	354 return *this;

394 UCollationElements *ucolelem = this->m_data_;

395 UCollationElements *otherucolelem = other.m_data_;

396 collIterate *coliter = &(ucolelem->iteratordata_);

397 collIterate *othercoliter = &(otherucolelem->iteratordata_);

398 int length = 0;

399

400 // checking only UCOL_ITER_HASLEN is not enough here as we may be in

401 // the normalization buffer

402 length = (int)(othercoliter->endp - othercoliter->string);

403

404 ucolelem->reset_ = otherucolelem->reset_;

405 ucolelem->isWritable = TRUE;

406

407 /* create a duplicate of string */

408 if (length > 0) {

409 coliter->string = (UChar )uprv_malloc(length U_SIZEOF_UCHAR);

410 if(coliter->string != NULL) {

411 uprv_memcpy((UChar *)coliter->string, othercoliter->string,

412 length * U_SIZEOF_UCHAR);

413 } else { // Error: couldn't allocate memory. No copying should be do ne

414 length = 0;

415 }

416 }

417 else {

418 coliter->string = NULL;

419 }

420

421 /* start and end of string */

422 coliter->endp = coliter->string == NULL ? NULL : coliter->string + lengt h;

423

424 /* handle writable buffer here */

425

426 if (othercoliter->flags & UCOL_ITER_INNORMBUF) {

427 coliter->writableBuffer = othercoliter->writableBuffer;

428 coliter->writableBuffer.getTerminatedBuffer();

429 }

430

431 /* current position */

432 if (othercoliter->pos >= othercoliter->string &&

433 othercoliter->pos <= othercoliter->endp)

434 {

435 U_ASSERT(coliter->string != NULL);

436 coliter->pos = coliter->string +

437 (othercoliter->pos - othercoliter->string);

438 }

439 else {

440 coliter->pos = coliter->writableBuffer.getTerminatedBuffer() +

441 (othercoliter->pos - othercoliter->writableBuffer.getBuffer());

442 }

443

444 /* CE buffer */

445 int32_t CEsize;

446 if (coliter->extendCEs) {

447 uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL _EXPAND_CE_BUFFER_SIZE);

448 CEsize = sizeof(othercoliter->extendCEs);

449 if (CEsize > 0) {

450 othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize);

451 uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize) ;

452 }

453 coliter->toReturn = coliter->extendCEs +

454 (othercoliter->toReturn - othercoliter->extendCEs);

455 coliter->CEpos = coliter->extendCEs + CEsize;

456 } else {

457 CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs);

458 if (CEsize > 0) {

459 uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize);

460 }

461 coliter->toReturn = coliter->CEs +

462 (othercoliter->toReturn - othercoliter->CEs);

463 coliter->CEpos = coliter->CEs + CEsize;

464 }

465

466 if (othercoliter->fcdPosition != NULL) {

467 U_ASSERT(coliter->string != NULL);

468 coliter->fcdPosition = coliter->string +

469 (othercoliter->fcdPosition

470 - othercoliter->string);

471 }

472 else {

473 coliter->fcdPosition = NULL;

474 }

475 coliter->flags = othercoliter->flags/\| UCOL_ITER_HASLEN/;

476 coliter->origFlags = othercoliter->origFlags;

477 coliter->coll = othercoliter->coll;

478 this->isDataOwned_ = TRUE;

479 }	355 }

480	356

	357 CollationIterator *newIter;

	358 const FCDUTF16CollationIterator *otherFCDIter =

	359 dynamic_cast<const FCDUTF16CollationIterator *>(other.iter_);

	360 if(otherFCDIter != NULL) {

	361 newIter = new FCDUTF16CollationIterator(*otherFCDIter, string_.getBuffer ());

	362 } else {

	363 const UTF16CollationIterator *otherIter =

	364 dynamic_cast<const UTF16CollationIterator *>(other.iter_);

	365 if(otherIter != NULL) {

	366 newIter = new UTF16CollationIterator(*otherIter, string_.getBuffer() );

	367 } else {

	368 newIter = NULL;

	369 }

	370 }

	371 if(newIter != NULL) {

	372 delete iter_;

	373 iter_ = newIter;

	374 rbc_ = other.rbc_;

	375 otherHalf_ = other.otherHalf_;

	376 dir_ = other.dir_;

	377

	378 string_ = other.string_;

	379 }

	380 if(other.dir_ < 0 && other.offsets_ != NULL && !other.offsets_->isEmpty()) {

	381 UErrorCode errorCode = U_ZERO_ERROR;

	382 if(offsets_ == NULL) {

	383 offsets_ = new UVector32(other.offsets_->size(), errorCode);

	384 }

	385 if(offsets_ != NULL) {

	386 offsets_->assign(*other.offsets_, errorCode);

	387 }

	388 }

481 return *this;	389 return *this;

482 }	390 }

483	391

	392 namespace {

	393

	394 class MaxExpSink : public ContractionsAndExpansions::CESink {

	395 public:

	396 MaxExpSink(UHashtable *h, UErrorCode &ec) : maxExpansions(h), errorCode(ec) {}

	397 virtual ~MaxExpSink();

	398 virtual void handleCE(int64_t /ce/) {}

	399 virtual void handleExpansion(const int64_t ces[], int32_t length) {

	400 if (length <= 1) {

	401 // We do not need to add single CEs into the map.

	402 return;

	403 }

	404 int32_t count = 0; // number of CE "halves"

	405 for (int32_t i = 0; i < length; ++i) {

	406 count += ceNeedsTwoParts(ces[i]) ? 2 : 1;

	407 }

	408 // last "half" of the last CE

	409 int64_t ce = ces[length - 1];

	410 uint32_t p = (uint32_t)(ce >> 32);

	411 uint32_t lower32 = (uint32_t)ce;

	412 uint32_t lastHalf = getSecondHalf(p, lower32);

	413 if (lastHalf == 0) {

	414 lastHalf = getFirstHalf(p, lower32);

	415 U_ASSERT(lastHalf != 0);

	416 } else {

	417 lastHalf \|= 0xc0; // old-style continuation CE

	418 }

	419 if (count > uhash_igeti(maxExpansions, (int32_t)lastHalf)) {

	420 uhash_iputi(maxExpansions, (int32_t)lastHalf, count, &errorCode);

	421 }

	422 }

	423

	424 private:

	425 UHashtable *maxExpansions;

	426 UErrorCode &errorCode;

	427 };

	428

	429 MaxExpSink::~MaxExpSink() {}

	430

	431 } // namespace

	432

	433 UHashtable *

	434 CollationElementIterator::computeMaxExpansions(const CollationData *data, UError Code &errorCode) {

	435 if (U_FAILURE(errorCode)) { return NULL; }

	436 UHashtable *maxExpansions = uhash_open(uhash_hashLong, uhash_compareLong,

	437 uhash_compareLong, &errorCode);

	438 if (U_FAILURE(errorCode)) { return NULL; }

	439 MaxExpSink sink(maxExpansions, errorCode);

	440 ContractionsAndExpansions(NULL, NULL, &sink, TRUE).forData(data, errorCode);

	441 if (U_FAILURE(errorCode)) {

	442 uhash_close(maxExpansions);

	443 return NULL;

	444 }

	445 return maxExpansions;

	446 }

	447

	448 int32_t

	449 CollationElementIterator::getMaxExpansion(int32_t order) const {

	450 return getMaxExpansion(rbc_->tailoring->maxExpansions, order);

	451 }

	452

	453 int32_t

	454 CollationElementIterator::getMaxExpansion(const UHashtable *maxExpansions, int32 _t order) {

	455 if (order == 0) { return 1; }

	456 int32_t max;

	457 if(maxExpansions != NULL && (max = uhash_igeti(maxExpansions, order)) != 0) {

	458 return max;

	459 }

	460 if ((order & 0xc0) == 0xc0) {

	461 // old-style continuation CE

	462 return 2;

	463 } else {

	464 return 1;

	465 }

	466 }

	467

484 U_NAMESPACE_END	468 U_NAMESPACE_END

485	469

486 #endif /* #if !UCONFIG_NO_COLLATION */	470 #endif /* #if !UCONFIG_NO_COLLATION */

487

488 /* eof */

OLD	NEW

« no previous file with comments | « source/i18n/chnsecal.cpp ('k') | source/i18n/coll.cpp » ('j') | no next file with comments »