Index: icu46/source/common/unorm.cpp |
=================================================================== |
--- icu46/source/common/unorm.cpp (revision 0) |
+++ icu46/source/common/unorm.cpp (revision 0) |
@@ -0,0 +1,266 @@ |
+/* |
+****************************************************************************** |
+* Copyright (c) 1996-2010, International Business Machines |
+* Corporation and others. All Rights Reserved. |
+****************************************************************************** |
+* File unorm.cpp |
+* |
+* Created by: Vladimir Weinstein 12052000 |
+* |
+* Modification history : |
+* |
+* Date Name Description |
+* 02/01/01 synwee Added normalization quickcheck enum and method. |
+* 02/12/01 synwee Commented out quickcheck util api has been approved |
+* Added private method for doing FCD checks |
+* 02/23/01 synwee Modified quickcheck and checkFCE to run through |
+* string for codepoints < 0x300 for the normalization |
+* mode NFC. |
+* 05/25/01+ Markus Scherer total rewrite, implement all normalization here |
+* instead of just wrappers around normlzr.cpp, |
+* load unorm.dat, support Unicode 3.1 with |
+* supplementary code points, etc. |
+* 2009-nov..2010-jan Markus Scherer total rewrite, new Normalizer2 API & code |
+*/ |
+ |
+#include "unicode/utypes.h" |
+ |
+#if !UCONFIG_NO_NORMALIZATION |
+ |
+#include "unicode/udata.h" |
+#include "unicode/ustring.h" |
+#include "unicode/uiter.h" |
+#include "unicode/unorm.h" |
+#include "unicode/unorm2.h" |
+#include "normalizer2impl.h" |
+#include "unormimp.h" |
+#include "uprops.h" |
+#include "ustr_imp.h" |
+ |
+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) |
+ |
+U_NAMESPACE_USE |
+ |
+/* quick check functions ---------------------------------------------------- */ |
+ |
+U_CAPI UNormalizationCheckResult U_EXPORT2 |
+unorm_quickCheck(const UChar *src, |
+ int32_t srcLength, |
+ UNormalizationMode mode, |
+ UErrorCode *pErrorCode) { |
+ const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); |
+ return unorm2_quickCheck((const UNormalizer2 *)n2, src, srcLength, pErrorCode); |
+} |
+ |
+U_CAPI UNormalizationCheckResult U_EXPORT2 |
+unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength, |
+ UNormalizationMode mode, int32_t options, |
+ UErrorCode *pErrorCode) { |
+ const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); |
+ if(options&UNORM_UNICODE_3_2) { |
+ FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*pErrorCode)); |
+ return unorm2_quickCheck( |
+ reinterpret_cast<const UNormalizer2 *>(static_cast<Normalizer2 *>(&fn2)), |
+ src, srcLength, pErrorCode); |
+ } else { |
+ return unorm2_quickCheck((const UNormalizer2 *)n2, src, srcLength, pErrorCode); |
+ } |
+} |
+ |
+U_CAPI UBool U_EXPORT2 |
+unorm_isNormalized(const UChar *src, int32_t srcLength, |
+ UNormalizationMode mode, |
+ UErrorCode *pErrorCode) { |
+ const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); |
+ return unorm2_isNormalized((const UNormalizer2 *)n2, src, srcLength, pErrorCode); |
+} |
+ |
+U_CAPI UBool U_EXPORT2 |
+unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength, |
+ UNormalizationMode mode, int32_t options, |
+ UErrorCode *pErrorCode) { |
+ const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); |
+ if(options&UNORM_UNICODE_3_2) { |
+ FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*pErrorCode)); |
+ return unorm2_isNormalized( |
+ reinterpret_cast<const UNormalizer2 *>(static_cast<Normalizer2 *>(&fn2)), |
+ src, srcLength, pErrorCode); |
+ } else { |
+ return unorm2_isNormalized((const UNormalizer2 *)n2, src, srcLength, pErrorCode); |
+ } |
+} |
+ |
+/* normalize() API ---------------------------------------------------------- */ |
+ |
+/** Public API for normalizing. */ |
+U_CAPI int32_t U_EXPORT2 |
+unorm_normalize(const UChar *src, int32_t srcLength, |
+ UNormalizationMode mode, int32_t options, |
+ UChar *dest, int32_t destCapacity, |
+ UErrorCode *pErrorCode) { |
+ const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); |
+ if(options&UNORM_UNICODE_3_2) { |
+ FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*pErrorCode)); |
+ return unorm2_normalize( |
+ reinterpret_cast<const UNormalizer2 *>(static_cast<Normalizer2 *>(&fn2)), |
+ src, srcLength, dest, destCapacity, pErrorCode); |
+ } else { |
+ return unorm2_normalize((const UNormalizer2 *)n2, |
+ src, srcLength, dest, destCapacity, pErrorCode); |
+ } |
+} |
+ |
+ |
+/* iteration functions ------------------------------------------------------ */ |
+ |
+static int32_t |
+unorm_iterate(UCharIterator *src, UBool forward, |
+ UChar *dest, int32_t destCapacity, |
+ UNormalizationMode mode, int32_t options, |
+ UBool doNormalize, UBool *pNeededToNormalize, |
+ UErrorCode *pErrorCode) { |
+ const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); |
+ const UnicodeSet *uni32; |
+ if(options&UNORM_UNICODE_3_2) { |
+ uni32=uniset_getUnicode32Instance(*pErrorCode); |
+ } else { |
+ uni32=NULL; // unused |
+ } |
+ FilteredNormalizer2 fn2(*n2, *uni32); |
+ if(options&UNORM_UNICODE_3_2) { |
+ n2=&fn2; |
+ } |
+ if(U_FAILURE(*pErrorCode)) { |
+ return 0; |
+ } |
+ if( destCapacity<0 || (dest==NULL && destCapacity>0) || |
+ src==NULL |
+ ) { |
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
+ return 0; |
+ } |
+ |
+ if(pNeededToNormalize!=NULL) { |
+ *pNeededToNormalize=FALSE; |
+ } |
+ if(!(forward ? src->hasNext(src) : src->hasPrevious(src))) { |
+ return u_terminateUChars(dest, destCapacity, 0, pErrorCode); |
+ } |
+ |
+ UnicodeString buffer; |
+ UChar32 c; |
+ if(forward) { |
+ /* get one character and ignore its properties */ |
+ buffer.append(uiter_next32(src)); |
+ /* get all following characters until we see a boundary */ |
+ while((c=uiter_next32(src))>=0) { |
+ if(n2->hasBoundaryBefore(c)) { |
+ /* back out the latest movement to stop at the boundary */ |
+ src->move(src, -U16_LENGTH(c), UITER_CURRENT); |
+ break; |
+ } else { |
+ buffer.append(c); |
+ } |
+ } |
+ } else { |
+ while((c=uiter_previous32(src))>=0) { |
+ /* always write this character to the front of the buffer */ |
+ buffer.insert(0, c); |
+ /* stop if this just-copied character is a boundary */ |
+ if(n2->hasBoundaryBefore(c)) { |
+ break; |
+ } |
+ } |
+ } |
+ |
+ UnicodeString destString(dest, 0, destCapacity); |
+ if(buffer.length()>0 && doNormalize) { |
+ n2->normalize(buffer, destString, *pErrorCode).extract(dest, destCapacity, *pErrorCode); |
+ if(pNeededToNormalize!=NULL && U_SUCCESS(*pErrorCode)) { |
+ *pNeededToNormalize= destString!=buffer; |
+ } |
+ return destString.length(); |
+ } else { |
+ /* just copy the source characters */ |
+ return buffer.extract(dest, destCapacity, *pErrorCode); |
+ } |
+} |
+ |
+U_CAPI int32_t U_EXPORT2 |
+unorm_previous(UCharIterator *src, |
+ UChar *dest, int32_t destCapacity, |
+ UNormalizationMode mode, int32_t options, |
+ UBool doNormalize, UBool *pNeededToNormalize, |
+ UErrorCode *pErrorCode) { |
+ return unorm_iterate(src, FALSE, |
+ dest, destCapacity, |
+ mode, options, |
+ doNormalize, pNeededToNormalize, |
+ pErrorCode); |
+} |
+ |
+U_CAPI int32_t U_EXPORT2 |
+unorm_next(UCharIterator *src, |
+ UChar *dest, int32_t destCapacity, |
+ UNormalizationMode mode, int32_t options, |
+ UBool doNormalize, UBool *pNeededToNormalize, |
+ UErrorCode *pErrorCode) { |
+ return unorm_iterate(src, TRUE, |
+ dest, destCapacity, |
+ mode, options, |
+ doNormalize, pNeededToNormalize, |
+ pErrorCode); |
+} |
+ |
+/* Concatenation of normalized strings -------------------------------------- */ |
+ |
+U_CAPI int32_t U_EXPORT2 |
+unorm_concatenate(const UChar *left, int32_t leftLength, |
+ const UChar *right, int32_t rightLength, |
+ UChar *dest, int32_t destCapacity, |
+ UNormalizationMode mode, int32_t options, |
+ UErrorCode *pErrorCode) { |
+ const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); |
+ const UnicodeSet *uni32; |
+ if(options&UNORM_UNICODE_3_2) { |
+ uni32=uniset_getUnicode32Instance(*pErrorCode); |
+ } else { |
+ uni32=NULL; // unused |
+ } |
+ FilteredNormalizer2 fn2(*n2, *uni32); |
+ if(options&UNORM_UNICODE_3_2) { |
+ n2=&fn2; |
+ } |
+ if(U_FAILURE(*pErrorCode)) { |
+ return 0; |
+ } |
+ if( destCapacity<0 || (dest==NULL && destCapacity>0) || |
+ left==NULL || leftLength<-1 || |
+ right==NULL || rightLength<-1 |
+ ) { |
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
+ return 0; |
+ } |
+ |
+ /* check for overlapping right and destination */ |
+ if( dest!=NULL && |
+ ((right>=dest && right<(dest+destCapacity)) || |
+ (rightLength>0 && dest>=right && dest<(right+rightLength))) |
+ ) { |
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
+ return 0; |
+ } |
+ |
+ /* allow left==dest */ |
+ UnicodeString destString; |
+ if(left==dest) { |
+ destString.setTo(dest, leftLength, destCapacity); |
+ } else { |
+ destString.setTo(dest, 0, destCapacity); |
+ destString.append(left, leftLength); |
+ } |
+ return n2->append(destString, UnicodeString(rightLength<0, right, rightLength), *pErrorCode). |
+ extract(dest, destCapacity, *pErrorCode); |
+} |
+ |
+#endif /* #if !UCONFIG_NO_NORMALIZATION */ |
Property changes on: icu46/source/common/unorm.cpp |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |