Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(564)

Side by Side Diff: source/common/normalizer2impl.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/common/normalizer2impl.h ('k') | source/common/propname.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ******************************************************************************* 2 *******************************************************************************
3 * 3 *
4 * Copyright (C) 2009-2013, International Business Machines 4 * Copyright (C) 2009-2014, International Business Machines
5 * Corporation and others. All Rights Reserved. 5 * Corporation and others. All Rights Reserved.
6 * 6 *
7 ******************************************************************************* 7 *******************************************************************************
8 * file name: normalizer2impl.cpp 8 * file name: normalizer2impl.cpp
9 * encoding: US-ASCII 9 * encoding: US-ASCII
10 * tab size: 8 (not used) 10 * tab size: 8 (not used)
11 * indentation:4 11 * indentation:4
12 * 12 *
13 * created on: 2009nov22 13 * created on: 2009nov22
14 * created by: Markus W. Scherer 14 * created by: Markus W. Scherer
(...skipping 231 matching lines...) Expand 10 before | Expand all | Expand 10 after
246 246
247 struct CanonIterData : public UMemory { 247 struct CanonIterData : public UMemory {
248 CanonIterData(UErrorCode &errorCode); 248 CanonIterData(UErrorCode &errorCode);
249 ~CanonIterData(); 249 ~CanonIterData();
250 void addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode ); 250 void addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode );
251 UTrie2 *trie; 251 UTrie2 *trie;
252 UVector canonStartSets; // contains UnicodeSet * 252 UVector canonStartSets; // contains UnicodeSet *
253 }; 253 };
254 254
255 Normalizer2Impl::~Normalizer2Impl() { 255 Normalizer2Impl::~Normalizer2Impl() {
256 udata_close(memory);
257 utrie2_close(normTrie);
258 delete fCanonIterData; 256 delete fCanonIterData;
259 } 257 }
260 258
261 UBool U_CALLCONV
262 Normalizer2Impl::isAcceptable(void *context,
263 const char * /* type */, const char * /*name*/,
264 const UDataInfo *pInfo) {
265 if(
266 pInfo->size>=20 &&
267 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
268 pInfo->charsetFamily==U_CHARSET_FAMILY &&
269 pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */
270 pInfo->dataFormat[1]==0x72 &&
271 pInfo->dataFormat[2]==0x6d &&
272 pInfo->dataFormat[3]==0x32 &&
273 pInfo->formatVersion[0]==2
274 ) {
275 Normalizer2Impl *me=(Normalizer2Impl *)context;
276 uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
277 return TRUE;
278 } else {
279 return FALSE;
280 }
281 }
282
283 void 259 void
284 Normalizer2Impl::load(const char *packageName, const char *name, UErrorCode &err orCode) { 260 Normalizer2Impl::init(const int32_t *inIndexes, const UTrie2 *inTrie,
285 if(U_FAILURE(errorCode)) { 261 const uint16_t *inExtraData, const uint8_t *inSmallFCD) {
286 return;
287 }
288 memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &error Code);
289 if(U_FAILURE(errorCode)) {
290 return;
291 }
292 const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
293 const int32_t *inIndexes=(const int32_t *)inBytes;
294 int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
295 if(indexesLength<=IX_MIN_MAYBE_YES) {
296 errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes.
297 return;
298 }
299
300 minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP]; 262 minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP];
301 minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP]; 263 minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP];
302 264
303 minYesNo=inIndexes[IX_MIN_YES_NO]; 265 minYesNo=inIndexes[IX_MIN_YES_NO];
304 minYesNoMappingsOnly=inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY]; 266 minYesNoMappingsOnly=inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
305 minNoNo=inIndexes[IX_MIN_NO_NO]; 267 minNoNo=inIndexes[IX_MIN_NO_NO];
306 limitNoNo=inIndexes[IX_LIMIT_NO_NO]; 268 limitNoNo=inIndexes[IX_LIMIT_NO_NO];
307 minMaybeYes=inIndexes[IX_MIN_MAYBE_YES]; 269 minMaybeYes=inIndexes[IX_MIN_MAYBE_YES];
308 270
309 int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET]; 271 normTrie=inTrie;
310 int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
311 normTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
312 inBytes+offset, nextOffset-offset, NULL,
313 &errorCode);
314 if(U_FAILURE(errorCode)) {
315 return;
316 }
317 272
318 offset=nextOffset; 273 maybeYesCompositions=inExtraData;
319 nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
320 maybeYesCompositions=(const uint16_t *)(inBytes+offset);
321 extraData=maybeYesCompositions+(MIN_NORMAL_MAYBE_YES-minMaybeYes); 274 extraData=maybeYesCompositions+(MIN_NORMAL_MAYBE_YES-minMaybeYes);
322 275
323 // smallFCD: new in formatVersion 2 276 smallFCD=inSmallFCD;
324 offset=nextOffset;
325 smallFCD=inBytes+offset;
326 277
327 // Build tccc180[]. 278 // Build tccc180[].
328 // gennorm2 enforces lccc=0 for c<MIN_CCC_LCCC_CP=U+0300. 279 // gennorm2 enforces lccc=0 for c<MIN_CCC_LCCC_CP=U+0300.
329 uint8_t bits=0; 280 uint8_t bits=0;
330 for(UChar c=0; c<0x180; bits>>=1) { 281 for(UChar c=0; c<0x180; bits>>=1) {
331 if((c&0xff)==0) { 282 if((c&0xff)==0) {
332 bits=smallFCD[c>>8]; // one byte per 0x100 code points 283 bits=smallFCD[c>>8]; // one byte per 0x100 code points
333 } 284 }
334 if(bits&1) { 285 if(bits&1) {
335 for(int i=0; i<0x20; ++i, ++c) { 286 for(int i=0; i<0x20; ++i, ++c) {
(...skipping 14 matching lines...) Expand all
350 c=U16_GET_SUPPLEMENTARY(cpStart[0], cpStart[1]); 301 c=U16_GET_SUPPLEMENTARY(cpStart[0], cpStart[1]);
351 } 302 }
352 uint16_t prevNorm16=getNorm16(c); 303 uint16_t prevNorm16=getNorm16(c);
353 if(prevNorm16<=minYesNo) { 304 if(prevNorm16<=minYesNo) {
354 return 0; // yesYes and Hangul LV/LVT have ccc=tccc=0 305 return 0; // yesYes and Hangul LV/LVT have ccc=tccc=0
355 } else { 306 } else {
356 return (uint8_t)(*getMapping(prevNorm16)>>8); // tccc from yesNo 307 return (uint8_t)(*getMapping(prevNorm16)>>8); // tccc from yesNo
357 } 308 }
358 } 309 }
359 310
311 namespace {
312
313 class LcccContext {
314 public:
315 LcccContext(const Normalizer2Impl &ni, UnicodeSet &s) : impl(ni), set(s) {}
316
317 void handleRange(UChar32 start, UChar32 end, uint16_t norm16) {
318 if(impl.isAlgorithmicNoNo(norm16)) {
319 // Range of code points with same-norm16-value algorithmic decomposi tions.
320 // They might have different non-zero FCD16 values.
321 do {
322 uint16_t fcd16=impl.getFCD16(start);
323 if(fcd16>0xff) { set.add(start); }
324 } while(++start<=end);
325 } else {
326 uint16_t fcd16=impl.getFCD16(start);
327 if(fcd16>0xff) { set.add(start, end); }
328 }
329 }
330
331 private:
332 const Normalizer2Impl &impl;
333 UnicodeSet &set;
334 };
335
336 struct PropertyStartsContext {
337 PropertyStartsContext(const Normalizer2Impl &ni, const USetAdder *adder)
338 : impl(ni), sa(adder) {}
339
340 const Normalizer2Impl &impl;
341 const USetAdder *sa;
342 };
343
344 } // namespace
345
360 U_CDECL_BEGIN 346 U_CDECL_BEGIN
361 347
362 static UBool U_CALLCONV 348 static UBool U_CALLCONV
349 enumLcccRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
350 ((LcccContext *)context)->handleRange(start, end, (uint16_t)value);
351 return TRUE;
352 }
353
354 static UBool U_CALLCONV
355 enumNorm16PropertyStartsRange(const void *context, UChar32 start, UChar32 end, u int32_t value) {
356 /* add the start code point to the USet */
357 const PropertyStartsContext *ctx=(const PropertyStartsContext *)context;
358 const USetAdder *sa=ctx->sa;
359 sa->add(sa->set, start);
360 if(start!=end && ctx->impl.isAlgorithmicNoNo((uint16_t)value)) {
361 // Range of code points with same-norm16-value algorithmic decomposition s.
362 // They might have different non-zero FCD16 values.
363 uint16_t prevFCD16=ctx->impl.getFCD16(start);
364 while(++start<=end) {
365 uint16_t fcd16=ctx->impl.getFCD16(start);
366 if(fcd16!=prevFCD16) {
367 sa->add(sa->set, start);
368 prevFCD16=fcd16;
369 }
370 }
371 }
372 return TRUE;
373 }
374
375 static UBool U_CALLCONV
363 enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, uin t32_t /*value*/) { 376 enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, uin t32_t /*value*/) {
364 /* add the start code point to the USet */ 377 /* add the start code point to the USet */
365 const USetAdder *sa=(const USetAdder *)context; 378 const USetAdder *sa=(const USetAdder *)context;
366 sa->add(sa->set, start); 379 sa->add(sa->set, start);
367 return TRUE; 380 return TRUE;
368 } 381 }
369 382
370 static uint32_t U_CALLCONV 383 static uint32_t U_CALLCONV
371 segmentStarterMapper(const void * /*context*/, uint32_t value) { 384 segmentStarterMapper(const void * /*context*/, uint32_t value) {
372 return value&CANON_NOT_SEGMENT_STARTER; 385 return value&CANON_NOT_SEGMENT_STARTER;
373 } 386 }
374 387
375 U_CDECL_END 388 U_CDECL_END
376 389
377 void 390 void
391 Normalizer2Impl::addLcccChars(UnicodeSet &set) const {
392 /* add the start code point of each same-value range of each trie */
393 LcccContext context(*this, set);
394 utrie2_enum(normTrie, NULL, enumLcccRange, &context);
395 }
396
397 void
378 Normalizer2Impl::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode */) const { 398 Normalizer2Impl::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode */) const {
379 /* add the start code point of each same-value range of each trie */ 399 /* add the start code point of each same-value range of each trie */
380 utrie2_enum(normTrie, NULL, enumPropertyStartsRange, sa); 400 PropertyStartsContext context(*this, sa);
401 utrie2_enum(normTrie, NULL, enumNorm16PropertyStartsRange, &context);
381 402
382 /* add Hangul LV syllables and LV+1 because of skippables */ 403 /* add Hangul LV syllables and LV+1 because of skippables */
383 for(UChar c=Hangul::HANGUL_BASE; c<Hangul::HANGUL_LIMIT; c+=Hangul::JAMO_T_C OUNT) { 404 for(UChar c=Hangul::HANGUL_BASE; c<Hangul::HANGUL_LIMIT; c+=Hangul::JAMO_T_C OUNT) {
384 sa->add(sa->set, c); 405 sa->add(sa->set, c);
385 sa->add(sa->set, c+1); 406 sa->add(sa->set, c+1);
386 } 407 }
387 sa->add(sa->set, Hangul::HANGUL_LIMIT); /* add Hangul+1 to continue with oth er properties */ 408 sa->add(sa->set, Hangul::HANGUL_LIMIT); /* add Hangul+1 to continue with oth er properties */
388 } 409 }
389 410
390 void 411 void
(...skipping 21 matching lines...) Expand all
412 // Back out the last character for full processing. 433 // Back out the last character for full processing.
413 // Copy this prefix. 434 // Copy this prefix.
414 if(--src!=prevSrc) { 435 if(--src!=prevSrc) {
415 if(buffer!=NULL) { 436 if(buffer!=NULL) {
416 buffer->appendZeroCC(prevSrc, src, errorCode); 437 buffer->appendZeroCC(prevSrc, src, errorCode);
417 } 438 }
418 } 439 }
419 return src; 440 return src;
420 } 441 }
421 442
443 UnicodeString &
444 Normalizer2Impl::decompose(const UnicodeString &src, UnicodeString &dest,
445 UErrorCode &errorCode) const {
446 if(U_FAILURE(errorCode)) {
447 dest.setToBogus();
448 return dest;
449 }
450 const UChar *sArray=src.getBuffer();
451 if(&dest==&src || sArray==NULL) {
452 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
453 dest.setToBogus();
454 return dest;
455 }
456 decompose(sArray, sArray+src.length(), dest, src.length(), errorCode);
457 return dest;
458 }
459
460 void
461 Normalizer2Impl::decompose(const UChar *src, const UChar *limit,
462 UnicodeString &dest,
463 int32_t destLengthEstimate,
464 UErrorCode &errorCode) const {
465 if(destLengthEstimate<0 && limit!=NULL) {
466 destLengthEstimate=(int32_t)(limit-src);
467 }
468 dest.remove();
469 ReorderingBuffer buffer(*this, dest);
470 if(buffer.init(destLengthEstimate, errorCode)) {
471 decompose(src, limit, &buffer, errorCode);
472 }
473 }
474
422 // Dual functionality: 475 // Dual functionality:
423 // buffer!=NULL: normalize 476 // buffer!=NULL: normalize
424 // buffer==NULL: isNormalized/spanQuickCheckYes 477 // buffer==NULL: isNormalized/spanQuickCheckYes
425 const UChar * 478 const UChar *
426 Normalizer2Impl::decompose(const UChar *src, const UChar *limit, 479 Normalizer2Impl::decompose(const UChar *src, const UChar *limit,
427 ReorderingBuffer *buffer, 480 ReorderingBuffer *buffer,
428 UErrorCode &errorCode) const { 481 UErrorCode &errorCode) const {
429 UChar32 minNoCP=minDecompNoCP; 482 UChar32 minNoCP=minDecompNoCP;
430 if(limit==NULL) { 483 if(limit==NULL) {
431 src=copyLowPrefixFromNulTerminated(src, minNoCP, buffer, errorCode); 484 src=copyLowPrefixFromNulTerminated(src, minNoCP, buffer, errorCode);
(...skipping 1614 matching lines...) Expand 10 before | Expand all | Expand 10 after
2046 nextOffset=indexes[Normalizer2Impl::IX_SMALL_FCD_OFFSET+1]; 2099 nextOffset=indexes[Normalizer2Impl::IX_SMALL_FCD_OFFSET+1];
2047 offset=nextOffset; 2100 offset=nextOffset;
2048 2101
2049 U_ASSERT(offset==size); 2102 U_ASSERT(offset==size);
2050 } 2103 }
2051 2104
2052 return headerSize+size; 2105 return headerSize+size;
2053 } 2106 }
2054 2107
2055 #endif // !UCONFIG_NO_NORMALIZATION 2108 #endif // !UCONFIG_NO_NORMALIZATION
OLDNEW
« no previous file with comments | « source/common/normalizer2impl.h ('k') | source/common/propname.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698