icu46/source/common/unorm.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/common/unorm.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 ******************************************************************************

	3 * Copyright (c) 1996-2010, International Business Machines

	4 * Corporation and others. All Rights Reserved.

	5 ******************************************************************************

	6 * File unorm.cpp

	7 *

	8 * Created by: Vladimir Weinstein 12052000

	9 *

	10 * Modification history :

	11 *

	12 * Date Name Description

	13 * 02/01/01 synwee Added normalization quickcheck enum and method.

	14 * 02/12/01 synwee Commented out quickcheck util api has been approved

	15 * Added private method for doing FCD checks

	16 * 02/23/01 synwee Modified quickcheck and checkFCE to run through

	17 * string for codepoints < 0x300 for the normalization

	18 * mode NFC.

	19 * 05/25/01+ Markus Scherer total rewrite, implement all normalization here

	20 * instead of just wrappers around normlzr.cpp,

	21 * load unorm.dat, support Unicode 3.1 with

	22 * supplementary code points, etc.

	23 * 2009-nov..2010-jan Markus Scherer total rewrite, new Normalizer2 API & code

	24 */

	25

	26 #include "unicode/utypes.h"

	27

	28 #if !UCONFIG_NO_NORMALIZATION

	29

	30 #include "unicode/udata.h"

	31 #include "unicode/ustring.h"

	32 #include "unicode/uiter.h"

	33 #include "unicode/unorm.h"

	34 #include "unicode/unorm2.h"

	35 #include "normalizer2impl.h"

	36 #include "unormimp.h"

	37 #include "uprops.h"

	38 #include "ustr_imp.h"

	39

	40 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))

	41

	42 U_NAMESPACE_USE

	43

	44 /* quick check functions ---------------------------------------------------- */

	45

	46 U_CAPI UNormalizationCheckResult U_EXPORT2

	47 unorm_quickCheck(const UChar *src,

	48 int32_t srcLength,

	49 UNormalizationMode mode,

	50 UErrorCode *pErrorCode) {

	51 const Normalizer2 n2=Normalizer2Factory::getInstance(mode, pErrorCode);

	52 return unorm2_quickCheck((const UNormalizer2 *)n2, src, srcLength, pErrorCod e);

	53 }

	54

	55 U_CAPI UNormalizationCheckResult U_EXPORT2

	56 unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength,

	57 UNormalizationMode mode, int32_t options,

	58 UErrorCode *pErrorCode) {

	59 const Normalizer2 n2=Normalizer2Factory::getInstance(mode, pErrorCode);

	60 if(options&UNORM_UNICODE_3_2) {

	61 FilteredNormalizer2 fn2(n2, uniset_getUnicode32Instance(*pErrorCode));

	62 return unorm2_quickCheck(

	63 reinterpret_cast<const UNormalizer2 >(static_cast<Normalizer2 >(&f n2)),

	64 src, srcLength, pErrorCode);

	65 } else {

	66 return unorm2_quickCheck((const UNormalizer2 *)n2, src, srcLength, pErro rCode);

	67 }

	68 }

	69

	70 U_CAPI UBool U_EXPORT2

	71 unorm_isNormalized(const UChar *src, int32_t srcLength,

	72 UNormalizationMode mode,

	73 UErrorCode *pErrorCode) {

	74 const Normalizer2 n2=Normalizer2Factory::getInstance(mode, pErrorCode);

	75 return unorm2_isNormalized((const UNormalizer2 *)n2, src, srcLength, pErrorC ode);

	76 }

	77

	78 U_CAPI UBool U_EXPORT2

	79 unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength,

	80 UNormalizationMode mode, int32_t options,

	81 UErrorCode *pErrorCode) {

	82 const Normalizer2 n2=Normalizer2Factory::getInstance(mode, pErrorCode);

	83 if(options&UNORM_UNICODE_3_2) {

	84 FilteredNormalizer2 fn2(n2, uniset_getUnicode32Instance(*pErrorCode));

	85 return unorm2_isNormalized(

	86 reinterpret_cast<const UNormalizer2 >(static_cast<Normalizer2 >(&f n2)),

	87 src, srcLength, pErrorCode);

	88 } else {

	89 return unorm2_isNormalized((const UNormalizer2 *)n2, src, srcLength, pEr rorCode);

	90 }

	91 }

	92

	93 /* normalize() API ---------------------------------------------------------- */

	94

	95 /** Public API for normalizing. */

	96 U_CAPI int32_t U_EXPORT2

	97 unorm_normalize(const UChar *src, int32_t srcLength,

	98 UNormalizationMode mode, int32_t options,

	99 UChar *dest, int32_t destCapacity,

	100 UErrorCode *pErrorCode) {

	101 const Normalizer2 n2=Normalizer2Factory::getInstance(mode, pErrorCode);

	102 if(options&UNORM_UNICODE_3_2) {

	103 FilteredNormalizer2 fn2(n2, uniset_getUnicode32Instance(*pErrorCode));

	104 return unorm2_normalize(

	105 reinterpret_cast<const UNormalizer2 >(static_cast<Normalizer2 >(&f n2)),

	106 src, srcLength, dest, destCapacity, pErrorCode);

	107 } else {

	108 return unorm2_normalize((const UNormalizer2 *)n2,

	109 src, srcLength, dest, destCapacity, pErrorCode);

	110 }

	111 }

	112

	113

	114 /* iteration functions ------------------------------------------------------ */

	115

	116 static int32_t

	117 unorm_iterate(UCharIterator *src, UBool forward,

	118 UChar *dest, int32_t destCapacity,

	119 UNormalizationMode mode, int32_t options,

	120 UBool doNormalize, UBool *pNeededToNormalize,

	121 UErrorCode *pErrorCode) {

	122 const Normalizer2 n2=Normalizer2Factory::getInstance(mode, pErrorCode);

	123 const UnicodeSet *uni32;

	124 if(options&UNORM_UNICODE_3_2) {

	125 uni32=uniset_getUnicode32Instance(*pErrorCode);

	126 } else {

	127 uni32=NULL; // unused

	128 }

	129 FilteredNormalizer2 fn2(n2, uni32);

	130 if(options&UNORM_UNICODE_3_2) {

	131 n2=&fn2;

	132 }

	133 if(U_FAILURE(*pErrorCode)) {

	134 return 0;

	135 }

	136 if( destCapacity<0 \|\| (dest==NULL && destCapacity>0) \|\|

	137 src==NULL

	138 ) {

	139 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	140 return 0;

	141 }

	142

	143 if(pNeededToNormalize!=NULL) {

	144 *pNeededToNormalize=FALSE;

	145 }

	146 if(!(forward ? src->hasNext(src) : src->hasPrevious(src))) {

	147 return u_terminateUChars(dest, destCapacity, 0, pErrorCode);

	148 }

	149

	150 UnicodeString buffer;

	151 UChar32 c;

	152 if(forward) {

	153 /* get one character and ignore its properties */

	154 buffer.append(uiter_next32(src));

	155 /* get all following characters until we see a boundary */

	156 while((c=uiter_next32(src))>=0) {

	157 if(n2->hasBoundaryBefore(c)) {

	158 /* back out the latest movement to stop at the boundary */

	159 src->move(src, -U16_LENGTH(c), UITER_CURRENT);

	160 break;

	161 } else {

	162 buffer.append(c);

	163 }

	164 }

	165 } else {

	166 while((c=uiter_previous32(src))>=0) {

	167 /* always write this character to the front of the buffer */

	168 buffer.insert(0, c);

	169 /* stop if this just-copied character is a boundary */

	170 if(n2->hasBoundaryBefore(c)) {

	171 break;

	172 }

	173 }

	174 }

	175

	176 UnicodeString destString(dest, 0, destCapacity);

	177 if(buffer.length()>0 && doNormalize) {

	178 n2->normalize(buffer, destString, pErrorCode).extract(dest, destCapacit y, pErrorCode);

	179 if(pNeededToNormalize!=NULL && U_SUCCESS(*pErrorCode)) {

	180 *pNeededToNormalize= destString!=buffer;

	181 }

	182 return destString.length();

	183 } else {

	184 /* just copy the source characters */

	185 return buffer.extract(dest, destCapacity, *pErrorCode);

	186 }

	187 }

	188

	189 U_CAPI int32_t U_EXPORT2

	190 unorm_previous(UCharIterator *src,

	191 UChar *dest, int32_t destCapacity,

	192 UNormalizationMode mode, int32_t options,

	193 UBool doNormalize, UBool *pNeededToNormalize,

	194 UErrorCode *pErrorCode) {

	195 return unorm_iterate(src, FALSE,

	196 dest, destCapacity,

	197 mode, options,

	198 doNormalize, pNeededToNormalize,

	199 pErrorCode);

	200 }

	201

	202 U_CAPI int32_t U_EXPORT2

	203 unorm_next(UCharIterator *src,

	204 UChar *dest, int32_t destCapacity,

	205 UNormalizationMode mode, int32_t options,

	206 UBool doNormalize, UBool *pNeededToNormalize,

	207 UErrorCode *pErrorCode) {

	208 return unorm_iterate(src, TRUE,

	209 dest, destCapacity,

	210 mode, options,

	211 doNormalize, pNeededToNormalize,

	212 pErrorCode);

	213 }

	214

	215 /* Concatenation of normalized strings -------------------------------------- */

	216

	217 U_CAPI int32_t U_EXPORT2

	218 unorm_concatenate(const UChar *left, int32_t leftLength,

	219 const UChar *right, int32_t rightLength,

	220 UChar *dest, int32_t destCapacity,

	221 UNormalizationMode mode, int32_t options,

	222 UErrorCode *pErrorCode) {

	223 const Normalizer2 n2=Normalizer2Factory::getInstance(mode, pErrorCode);

	224 const UnicodeSet *uni32;

	225 if(options&UNORM_UNICODE_3_2) {

	226 uni32=uniset_getUnicode32Instance(*pErrorCode);

	227 } else {

	228 uni32=NULL; // unused

	229 }

	230 FilteredNormalizer2 fn2(n2, uni32);

	231 if(options&UNORM_UNICODE_3_2) {

	232 n2=&fn2;

	233 }

	234 if(U_FAILURE(*pErrorCode)) {

	235 return 0;

	236 }

	237 if( destCapacity<0 \|\| (dest==NULL && destCapacity>0) \|\|

	238 left==NULL \|\| leftLength<-1 \|\|

	239 right==NULL \|\| rightLength<-1

	240 ) {

	241 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	242 return 0;

	243 }

	244

	245 /* check for overlapping right and destination */

	246 if( dest!=NULL &&

	247 ((right>=dest && right<(dest+destCapacity)) \|\|

	248 (rightLength>0 && dest>=right && dest<(right+rightLength)))

	249 ) {

	250 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	251 return 0;

	252 }

	253

	254 /* allow left==dest */

	255 UnicodeString destString;

	256 if(left==dest) {

	257 destString.setTo(dest, leftLength, destCapacity);

	258 } else {

	259 destString.setTo(dest, 0, destCapacity);

	260 destString.append(left, leftLength);

	261 }

	262 return n2->append(destString, UnicodeString(rightLength<0, right, rightLengt h), *pErrorCode).

	263 extract(dest, destCapacity, *pErrorCode);

	264 }

	265

	266 #endif /* #if !UCONFIG_NO_NORMALIZATION */

OLD	NEW

« no previous file with comments | « icu46/source/common/unistr_props.cpp ('k') | icu46/source/common/unorm_it.h » ('j') | no next file with comments »