source/common/normalizer2impl.cpp - Issue 845603002: Update ICU to 54.1 step 1

Side by Side Diff: source/common/normalizer2impl.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master

Patch Set: remove unusued directories Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 *******************************************************************************	2 *******************************************************************************

3 *	3 *

4 * Copyright (C) 2009-2013, International Business Machines	4 * Copyright (C) 2009-2014, International Business Machines

5 * Corporation and others. All Rights Reserved.	5 * Corporation and others. All Rights Reserved.

6 *	6 *

7 *******************************************************************************	7 *******************************************************************************

8 * file name: normalizer2impl.cpp	8 * file name: normalizer2impl.cpp

9 * encoding: US-ASCII	9 * encoding: US-ASCII

10 * tab size: 8 (not used)	10 * tab size: 8 (not used)

11 * indentation:4	11 * indentation:4

12 *	12 *

13 * created on: 2009nov22	13 * created on: 2009nov22

14 * created by: Markus W. Scherer	14 * created by: Markus W. Scherer

(...skipping 231 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
246	246

247 struct CanonIterData : public UMemory {	247 struct CanonIterData : public UMemory {

248 CanonIterData(UErrorCode &errorCode);	248 CanonIterData(UErrorCode &errorCode);

249 ~CanonIterData();	249 ~CanonIterData();

250 void addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode );	250 void addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode );

251 UTrie2 *trie;	251 UTrie2 *trie;

252 UVector canonStartSets; // contains UnicodeSet *	252 UVector canonStartSets; // contains UnicodeSet *

253 };	253 };

254	254

255 Normalizer2Impl::~Normalizer2Impl() {	255 Normalizer2Impl::~Normalizer2Impl() {

256 udata_close(memory);

257 utrie2_close(normTrie);

258 delete fCanonIterData;	256 delete fCanonIterData;

259 }	257 }

260	258

261 UBool U_CALLCONV

262 Normalizer2Impl::isAcceptable(void *context,

263 const char * /* type /, const char /name/,

264 const UDataInfo *pInfo) {

265 if(

266 pInfo->size>=20 &&

267 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&

268 pInfo->charsetFamily==U_CHARSET_FAMILY &&

269 pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */

270 pInfo->dataFormat[1]==0x72 &&

271 pInfo->dataFormat[2]==0x6d &&

272 pInfo->dataFormat[3]==0x32 &&

273 pInfo->formatVersion[0]==2

274 ) {

275 Normalizer2Impl me=(Normalizer2Impl )context;

276 uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);

277 return TRUE;

278 } else {

279 return FALSE;

280 }

281 }

282

283 void	259 void

284 Normalizer2Impl::load(const char packageName, const char name, UErrorCode &err orCode) {	260 Normalizer2Impl::init(const int32_t inIndexes, const UTrie2 inTrie,

285 if(U_FAILURE(errorCode)) {	261 const uint16_t inExtraData, const uint8_t inSmallFCD) {

286 return;

287 }

288 memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &error Code);

289 if(U_FAILURE(errorCode)) {

290 return;

291 }

292 const uint8_t inBytes=(const uint8_t )udata_getMemory(memory);

293 const int32_t inIndexes=(const int32_t )inBytes;

294 int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;

295 if(indexesLength<=IX_MIN_MAYBE_YES) {

296 errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes.

297 return;

298 }

299

300 minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP];	262 minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP];

301 minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP];	263 minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP];

302	264

303 minYesNo=inIndexes[IX_MIN_YES_NO];	265 minYesNo=inIndexes[IX_MIN_YES_NO];

304 minYesNoMappingsOnly=inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY];	266 minYesNoMappingsOnly=inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY];

305 minNoNo=inIndexes[IX_MIN_NO_NO];	267 minNoNo=inIndexes[IX_MIN_NO_NO];

306 limitNoNo=inIndexes[IX_LIMIT_NO_NO];	268 limitNoNo=inIndexes[IX_LIMIT_NO_NO];

307 minMaybeYes=inIndexes[IX_MIN_MAYBE_YES];	269 minMaybeYes=inIndexes[IX_MIN_MAYBE_YES];

308	270

309 int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];	271 normTrie=inTrie;

310 int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];

311 normTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,

312 inBytes+offset, nextOffset-offset, NULL,

313 &errorCode);

314 if(U_FAILURE(errorCode)) {

315 return;

316 }

317	272

318 offset=nextOffset;	273 maybeYesCompositions=inExtraData;

319 nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];

320 maybeYesCompositions=(const uint16_t *)(inBytes+offset);

321 extraData=maybeYesCompositions+(MIN_NORMAL_MAYBE_YES-minMaybeYes);	274 extraData=maybeYesCompositions+(MIN_NORMAL_MAYBE_YES-minMaybeYes);

322	275

323 // smallFCD: new in formatVersion 2	276 smallFCD=inSmallFCD;

324 offset=nextOffset;

325 smallFCD=inBytes+offset;

326	277

327 // Build tccc180[].	278 // Build tccc180[].

328 // gennorm2 enforces lccc=0 for c<MIN_CCC_LCCC_CP=U+0300.	279 // gennorm2 enforces lccc=0 for c<MIN_CCC_LCCC_CP=U+0300.

329 uint8_t bits=0;	280 uint8_t bits=0;

330 for(UChar c=0; c<0x180; bits>>=1) {	281 for(UChar c=0; c<0x180; bits>>=1) {

331 if((c&0xff)==0) {	282 if((c&0xff)==0) {

332 bits=smallFCD[c>>8]; // one byte per 0x100 code points	283 bits=smallFCD[c>>8]; // one byte per 0x100 code points

333 }	284 }

334 if(bits&1) {	285 if(bits&1) {

335 for(int i=0; i<0x20; ++i, ++c) {	286 for(int i=0; i<0x20; ++i, ++c) {

(...skipping 14 matching lines...) Expand all Loading...
350 c=U16_GET_SUPPLEMENTARY(cpStart[0], cpStart[1]);	301 c=U16_GET_SUPPLEMENTARY(cpStart[0], cpStart[1]);

351 }	302 }

352 uint16_t prevNorm16=getNorm16(c);	303 uint16_t prevNorm16=getNorm16(c);

353 if(prevNorm16<=minYesNo) {	304 if(prevNorm16<=minYesNo) {

354 return 0; // yesYes and Hangul LV/LVT have ccc=tccc=0	305 return 0; // yesYes and Hangul LV/LVT have ccc=tccc=0

355 } else {	306 } else {

356 return (uint8_t)(*getMapping(prevNorm16)>>8); // tccc from yesNo	307 return (uint8_t)(*getMapping(prevNorm16)>>8); // tccc from yesNo

357 }	308 }

358 }	309 }

359	310

	311 namespace {

	312

	313 class LcccContext {

	314 public:

	315 LcccContext(const Normalizer2Impl &ni, UnicodeSet &s) : impl(ni), set(s) {}

	316

	317 void handleRange(UChar32 start, UChar32 end, uint16_t norm16) {

	318 if(impl.isAlgorithmicNoNo(norm16)) {

	319 // Range of code points with same-norm16-value algorithmic decomposi tions.

	320 // They might have different non-zero FCD16 values.

	321 do {

	322 uint16_t fcd16=impl.getFCD16(start);

	323 if(fcd16>0xff) { set.add(start); }

	324 } while(++start<=end);

	325 } else {

	326 uint16_t fcd16=impl.getFCD16(start);

	327 if(fcd16>0xff) { set.add(start, end); }

	328 }

	329 }

	330

	331 private:

	332 const Normalizer2Impl &impl;

	333 UnicodeSet &set;

	334 };

	335

	336 struct PropertyStartsContext {

	337 PropertyStartsContext(const Normalizer2Impl &ni, const USetAdder *adder)

	338 : impl(ni), sa(adder) {}

	339

	340 const Normalizer2Impl &impl;

	341 const USetAdder *sa;

	342 };

	343

	344 } // namespace

	345

360 U_CDECL_BEGIN	346 U_CDECL_BEGIN

361	347

362 static UBool U_CALLCONV	348 static UBool U_CALLCONV

	349 enumLcccRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {

	350 ((LcccContext *)context)->handleRange(start, end, (uint16_t)value);

	351 return TRUE;

	352 }

	353

	354 static UBool U_CALLCONV

	355 enumNorm16PropertyStartsRange(const void *context, UChar32 start, UChar32 end, u int32_t value) {

	356 /* add the start code point to the USet */

	357 const PropertyStartsContext ctx=(const PropertyStartsContext )context;

	358 const USetAdder *sa=ctx->sa;

	359 sa->add(sa->set, start);

	360 if(start!=end && ctx->impl.isAlgorithmicNoNo((uint16_t)value)) {

	361 // Range of code points with same-norm16-value algorithmic decomposition s.

	362 // They might have different non-zero FCD16 values.

	363 uint16_t prevFCD16=ctx->impl.getFCD16(start);

	364 while(++start<=end) {

	365 uint16_t fcd16=ctx->impl.getFCD16(start);

	366 if(fcd16!=prevFCD16) {

	367 sa->add(sa->set, start);

	368 prevFCD16=fcd16;

	369 }

	370 }

	371 }

	372 return TRUE;

	373 }

	374

	375 static UBool U_CALLCONV

363 enumPropertyStartsRange(const void context, UChar32 start, UChar32 /end/, uin t32_t /value*/) {	376 enumPropertyStartsRange(const void context, UChar32 start, UChar32 /end/, uin t32_t /value*/) {

364 /* add the start code point to the USet */	377 /* add the start code point to the USet */

365 const USetAdder sa=(const USetAdder )context;	378 const USetAdder sa=(const USetAdder )context;

366 sa->add(sa->set, start);	379 sa->add(sa->set, start);

367 return TRUE;	380 return TRUE;

368 }	381 }

369	382

370 static uint32_t U_CALLCONV	383 static uint32_t U_CALLCONV

371 segmentStarterMapper(const void * /context/, uint32_t value) {	384 segmentStarterMapper(const void * /context/, uint32_t value) {

372 return value&CANON_NOT_SEGMENT_STARTER;	385 return value&CANON_NOT_SEGMENT_STARTER;

373 }	386 }

374	387

375 U_CDECL_END	388 U_CDECL_END

376	389

377 void	390 void

	391 Normalizer2Impl::addLcccChars(UnicodeSet &set) const {

	392 /* add the start code point of each same-value range of each trie */

	393 LcccContext context(*this, set);

	394 utrie2_enum(normTrie, NULL, enumLcccRange, &context);

	395 }

	396

	397 void

378 Normalizer2Impl::addPropertyStarts(const USetAdder sa, UErrorCode & /errorCode */) const {	398 Normalizer2Impl::addPropertyStarts(const USetAdder sa, UErrorCode & /errorCode */) const {

379 /* add the start code point of each same-value range of each trie */	399 /* add the start code point of each same-value range of each trie */

380 utrie2_enum(normTrie, NULL, enumPropertyStartsRange, sa);	400 PropertyStartsContext context(*this, sa);

	401 utrie2_enum(normTrie, NULL, enumNorm16PropertyStartsRange, &context);

381	402

382 /* add Hangul LV syllables and LV+1 because of skippables */	403 /* add Hangul LV syllables and LV+1 because of skippables */

383 for(UChar c=Hangul::HANGUL_BASE; c<Hangul::HANGUL_LIMIT; c+=Hangul::JAMO_T_C OUNT) {	404 for(UChar c=Hangul::HANGUL_BASE; c<Hangul::HANGUL_LIMIT; c+=Hangul::JAMO_T_C OUNT) {

384 sa->add(sa->set, c);	405 sa->add(sa->set, c);

385 sa->add(sa->set, c+1);	406 sa->add(sa->set, c+1);

386 }	407 }

387 sa->add(sa->set, Hangul::HANGUL_LIMIT); /* add Hangul+1 to continue with oth er properties */	408 sa->add(sa->set, Hangul::HANGUL_LIMIT); /* add Hangul+1 to continue with oth er properties */

388 }	409 }

389	410

390 void	411 void

(...skipping 21 matching lines...) Expand all Loading...
412 // Back out the last character for full processing.	433 // Back out the last character for full processing.

413 // Copy this prefix.	434 // Copy this prefix.

414 if(--src!=prevSrc) {	435 if(--src!=prevSrc) {

415 if(buffer!=NULL) {	436 if(buffer!=NULL) {

416 buffer->appendZeroCC(prevSrc, src, errorCode);	437 buffer->appendZeroCC(prevSrc, src, errorCode);

417 }	438 }

418 }	439 }

419 return src;	440 return src;

420 }	441 }

421	442

	443 UnicodeString &

	444 Normalizer2Impl::decompose(const UnicodeString &src, UnicodeString &dest,

	445 UErrorCode &errorCode) const {

	446 if(U_FAILURE(errorCode)) {

	447 dest.setToBogus();

	448 return dest;

	449 }

	450 const UChar *sArray=src.getBuffer();

	451 if(&dest==&src \|\| sArray==NULL) {

	452 errorCode=U_ILLEGAL_ARGUMENT_ERROR;

	453 dest.setToBogus();

	454 return dest;

	455 }

	456 decompose(sArray, sArray+src.length(), dest, src.length(), errorCode);

	457 return dest;

	458 }

	459

	460 void

	461 Normalizer2Impl::decompose(const UChar src, const UChar limit,

	462 UnicodeString &dest,

	463 int32_t destLengthEstimate,

	464 UErrorCode &errorCode) const {

	465 if(destLengthEstimate<0 && limit!=NULL) {

	466 destLengthEstimate=(int32_t)(limit-src);

	467 }

	468 dest.remove();

	469 ReorderingBuffer buffer(*this, dest);

	470 if(buffer.init(destLengthEstimate, errorCode)) {

	471 decompose(src, limit, &buffer, errorCode);

	472 }

	473 }

	474

422 // Dual functionality:	475 // Dual functionality:

423 // buffer!=NULL: normalize	476 // buffer!=NULL: normalize

424 // buffer==NULL: isNormalized/spanQuickCheckYes	477 // buffer==NULL: isNormalized/spanQuickCheckYes

425 const UChar *	478 const UChar *

426 Normalizer2Impl::decompose(const UChar src, const UChar limit,	479 Normalizer2Impl::decompose(const UChar src, const UChar limit,

427 ReorderingBuffer *buffer,	480 ReorderingBuffer *buffer,

428 UErrorCode &errorCode) const {	481 UErrorCode &errorCode) const {

429 UChar32 minNoCP=minDecompNoCP;	482 UChar32 minNoCP=minDecompNoCP;

430 if(limit==NULL) {	483 if(limit==NULL) {

431 src=copyLowPrefixFromNulTerminated(src, minNoCP, buffer, errorCode);	484 src=copyLowPrefixFromNulTerminated(src, minNoCP, buffer, errorCode);

(...skipping 1614 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2046 nextOffset=indexes[Normalizer2Impl::IX_SMALL_FCD_OFFSET+1];	2099 nextOffset=indexes[Normalizer2Impl::IX_SMALL_FCD_OFFSET+1];

2047 offset=nextOffset;	2100 offset=nextOffset;

2048	2101

2049 U_ASSERT(offset==size);	2102 U_ASSERT(offset==size);

2050 }	2103 }

2051	2104

2052 return headerSize+size;	2105 return headerSize+size;

2053 }	2106 }

2054	2107

2055 #endif // !UCONFIG_NO_NORMALIZATION	2108 #endif // !UCONFIG_NO_NORMALIZATION

OLD	NEW

« no previous file with comments | « source/common/normalizer2impl.h ('k') | source/common/propname.cpp » ('j') | no next file with comments »