icu46/source/common/filterednormalizer2.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/common/filterednormalizer2.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 *******************************************************************************

	3 *

	4 * Copyright (C) 2009-2010, International Business Machines

	5 * Corporation and others. All Rights Reserved.

	6 *

	7 *******************************************************************************

	8 * file name: filterednormalizer2.cpp

	9 * encoding: US-ASCII

	10 * tab size: 8 (not used)

	11 * indentation:4

	12 *

	13 * created on: 2009dec10

	14 * created by: Markus W. Scherer

	15 */

	16

	17 #include "unicode/utypes.h"

	18

	19 #if !UCONFIG_NO_NORMALIZATION

	20

	21 #include "unicode/normalizer2.h"

	22 #include "unicode/uniset.h"

	23 #include "unicode/unistr.h"

	24 #include "unicode/unorm.h"

	25 #include "cpputils.h"

	26

	27 U_NAMESPACE_BEGIN

	28

	29 UnicodeString &

	30 FilteredNormalizer2::normalize(const UnicodeString &src,

	31 UnicodeString &dest,

	32 UErrorCode &errorCode) const {

	33 uprv_checkCanGetBuffer(src, errorCode);

	34 if(U_FAILURE(errorCode)) {

	35 dest.setToBogus();

	36 return dest;

	37 }

	38 if(&dest==&src) {

	39 errorCode=U_ILLEGAL_ARGUMENT_ERROR;

	40 return dest;

	41 }

	42 dest.remove();

	43 return normalize(src, dest, USET_SPAN_SIMPLE, errorCode);

	44 }

	45

	46 // Internal: No argument checking, and appends to dest.

	47 // Pass as input spanCondition the one that is likely to yield a non-zero

	48 // span length at the start of src.

	49 // For set=[:age=3.2:], since almost all common characters were in Unicode 3.2,

	50 // USET_SPAN_SIMPLE should be passed in for the start of src

	51 // and USET_SPAN_NOT_CONTAINED should be passed in if we continue after

	52 // an in-filter prefix.

	53 UnicodeString &

	54 FilteredNormalizer2::normalize(const UnicodeString &src,

	55 UnicodeString &dest,

	56 USetSpanCondition spanCondition,

	57 UErrorCode &errorCode) const {

	58 UnicodeString tempDest; // Don't throw away destination buffer between iter ations.

	59 for(int32_t prevSpanLimit=0; prevSpanLimit<src.length();) {

	60 int32_t spanLimit=set.span(src, prevSpanLimit, spanCondition);

	61 int32_t spanLength=spanLimit-prevSpanLimit;

	62 if(spanCondition==USET_SPAN_NOT_CONTAINED) {

	63 if(spanLength!=0) {

	64 dest.append(src, prevSpanLimit, spanLength);

	65 }

	66 spanCondition=USET_SPAN_SIMPLE;

	67 } else {

	68 if(spanLength!=0) {

	69 // Not norm2.normalizeSecondAndAppend() because we do not want

	70 // to modify the non-filter part of dest.

	71 dest.append(norm2.normalize(src.tempSubStringBetween(prevSpanLim it, spanLimit),

	72 tempDest, errorCode));

	73 if(U_FAILURE(errorCode)) {

	74 break;

	75 }

	76 }

	77 spanCondition=USET_SPAN_NOT_CONTAINED;

	78 }

	79 prevSpanLimit=spanLimit;

	80 }

	81 return dest;

	82 }

	83

	84 UnicodeString &

	85 FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,

	86 const UnicodeString &second,

	87 UErrorCode &errorCode) const {

	88 return normalizeSecondAndAppend(first, second, TRUE, errorCode);

	89 }

	90

	91 UnicodeString &

	92 FilteredNormalizer2::append(UnicodeString &first,

	93 const UnicodeString &second,

	94 UErrorCode &errorCode) const {

	95 return normalizeSecondAndAppend(first, second, FALSE, errorCode);

	96 }

	97

	98 UnicodeString &

	99 FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,

	100 const UnicodeString &second,

	101 UBool doNormalize,

	102 UErrorCode &errorCode) const {

	103 uprv_checkCanGetBuffer(first, errorCode);

	104 uprv_checkCanGetBuffer(second, errorCode);

	105 if(U_FAILURE(errorCode)) {

	106 return first;

	107 }

	108 if(&first==&second) {

	109 errorCode=U_ILLEGAL_ARGUMENT_ERROR;

	110 return first;

	111 }

	112 if(first.isEmpty()) {

	113 if(doNormalize) {

	114 return normalize(second, first, errorCode);

	115 } else {

	116 return first=second;

	117 }

	118 }

	119 // merge the in-filter suffix of the first string with the in-filter prefix of the second

	120 int32_t prefixLimit=set.span(second, 0, USET_SPAN_SIMPLE);

	121 if(prefixLimit!=0) {

	122 UnicodeString prefix(second.tempSubString(0, prefixLimit));

	123 int32_t suffixStart=set.spanBack(first, INT32_MAX, USET_SPAN_SIMPLE);

	124 if(suffixStart==0) {

	125 if(doNormalize) {

	126 norm2.normalizeSecondAndAppend(first, prefix, errorCode);

	127 } else {

	128 norm2.append(first, prefix, errorCode);

	129 }

	130 } else {

	131 UnicodeString middle(first, suffixStart, INT32_MAX);

	132 if(doNormalize) {

	133 norm2.normalizeSecondAndAppend(middle, prefix, errorCode);

	134 } else {

	135 norm2.append(middle, prefix, errorCode);

	136 }

	137 first.replace(suffixStart, INT32_MAX, middle);

	138 }

	139 }

	140 if(prefixLimit<second.length()) {

	141 UnicodeString rest(second.tempSubString(prefixLimit, INT32_MAX));

	142 if(doNormalize) {

	143 normalize(rest, first, USET_SPAN_NOT_CONTAINED, errorCode);

	144 } else {

	145 first.append(rest);

	146 }

	147 }

	148 return first;

	149 }

	150

	151 UBool

	152 FilteredNormalizer2::getDecomposition(UChar32 c, UnicodeString &decomposition) c onst {

	153 return set.contains(c) && norm2.getDecomposition(c, decomposition);

	154 }

	155

	156 UBool

	157 FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {

	158 uprv_checkCanGetBuffer(s, errorCode);

	159 if(U_FAILURE(errorCode)) {

	160 return FALSE;

	161 }

	162 USetSpanCondition spanCondition=USET_SPAN_SIMPLE;

	163 for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {

	164 int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);

	165 if(spanCondition==USET_SPAN_NOT_CONTAINED) {

	166 spanCondition=USET_SPAN_SIMPLE;

	167 } else {

	168 if( !norm2.isNormalized(s.tempSubStringBetween(prevSpanLimit, spanLi mit), errorCode) \|\|

	169 U_FAILURE(errorCode)

	170 ) {

	171 return FALSE;

	172 }

	173 spanCondition=USET_SPAN_NOT_CONTAINED;

	174 }

	175 prevSpanLimit=spanLimit;

	176 }

	177 return TRUE;

	178 }

	179

	180 UNormalizationCheckResult

	181 FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) c onst {

	182 uprv_checkCanGetBuffer(s, errorCode);

	183 if(U_FAILURE(errorCode)) {

	184 return UNORM_MAYBE;

	185 }

	186 UNormalizationCheckResult result=UNORM_YES;

	187 USetSpanCondition spanCondition=USET_SPAN_SIMPLE;

	188 for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {

	189 int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);

	190 if(spanCondition==USET_SPAN_NOT_CONTAINED) {

	191 spanCondition=USET_SPAN_SIMPLE;

	192 } else {

	193 UNormalizationCheckResult qcResult=

	194 norm2.quickCheck(s.tempSubStringBetween(prevSpanLimit, spanLimit ), errorCode);

	195 if(U_FAILURE(errorCode) \|\| qcResult==UNORM_NO) {

	196 return qcResult;

	197 } else if(qcResult==UNORM_MAYBE) {

	198 result=qcResult;

	199 }

	200 spanCondition=USET_SPAN_NOT_CONTAINED;

	201 }

	202 prevSpanLimit=spanLimit;

	203 }

	204 return result;

	205 }

	206

	207 int32_t

	208 FilteredNormalizer2::spanQuickCheckYes(const UnicodeString &s, UErrorCode &error Code) const {

	209 uprv_checkCanGetBuffer(s, errorCode);

	210 if(U_FAILURE(errorCode)) {

	211 return 0;

	212 }

	213 USetSpanCondition spanCondition=USET_SPAN_SIMPLE;

	214 for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {

	215 int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);

	216 if(spanCondition==USET_SPAN_NOT_CONTAINED) {

	217 spanCondition=USET_SPAN_SIMPLE;

	218 } else {

	219 int32_t yesLimit=

	220 prevSpanLimit+

	221 norm2.spanQuickCheckYes(

	222 s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode) ;

	223 if(U_FAILURE(errorCode) \|\| yesLimit<spanLimit) {

	224 return yesLimit;

	225 }

	226 spanCondition=USET_SPAN_NOT_CONTAINED;

	227 }

	228 prevSpanLimit=spanLimit;

	229 }

	230 return s.length();

	231 }

	232

	233 UBool

	234 FilteredNormalizer2::hasBoundaryBefore(UChar32 c) const {

	235 return !set.contains(c) \|\| norm2.hasBoundaryBefore(c);

	236 }

	237

	238 UBool

	239 FilteredNormalizer2::hasBoundaryAfter(UChar32 c) const {

	240 return !set.contains(c) \|\| norm2.hasBoundaryAfter(c);

	241 }

	242

	243 UBool

	244 FilteredNormalizer2::isInert(UChar32 c) const {

	245 return !set.contains(c) \|\| norm2.isInert(c);

	246 }

	247

	248 U_NAMESPACE_END

	249

	250 // C API ------------------------------------------------------------------- ***

	251

	252 U_NAMESPACE_USE

	253

	254 U_DRAFT UNormalizer2 * U_EXPORT2

	255 unorm2_openFiltered(const UNormalizer2 norm2, const USet filterSet, UErrorCode *pErrorCode) {

	256 if(U_FAILURE(*pErrorCode)) {

	257 return NULL;

	258 }

	259 if(filterSet==NULL) {

	260 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	261 return NULL;

	262 }

	263 Normalizer2 fn2=new FilteredNormalizer2((Normalizer2 *)norm2,

	264 *UnicodeSet::fromUSet(filterSet));

	265 if(fn2==NULL) {

	266 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;

	267 }

	268 return (UNormalizer2 *)fn2;

	269 }

	270

	271 #endif // !UCONFIG_NO_NORMALIZATION

OLD	NEW

« no previous file with comments | « icu46/source/common/errorcode.cpp ('k') | icu46/source/common/hash.h » ('j') | no next file with comments »