Index: source/common/normalizer2impl.h |
diff --git a/source/common/normalizer2impl.h b/source/common/normalizer2impl.h |
index 3b85eb040ed0d6eafb9d2012b1c0c3b2954406af..eb026dbecda12ad35c44b503d209e38ceb7e0d2c 100644 |
--- a/source/common/normalizer2impl.h |
+++ b/source/common/normalizer2impl.h |
@@ -1,7 +1,7 @@ |
/* |
******************************************************************************* |
* |
-* Copyright (C) 2009-2013, International Business Machines |
+* Copyright (C) 2009-2014, International Business Machines |
* Corporation and others. All Rights Reserved. |
* |
******************************************************************************* |
@@ -22,7 +22,6 @@ |
#if !UCONFIG_NO_NORMALIZATION |
#include "unicode/normalizer2.h" |
-#include "unicode/udata.h" |
#include "unicode/unistr.h" |
#include "unicode/unorm.h" |
#include "unicode/utf16.h" |
@@ -34,15 +33,19 @@ U_NAMESPACE_BEGIN |
struct CanonIterData; |
-class Hangul { |
+class U_COMMON_API Hangul { |
public: |
/* Korean Hangul and Jamo constants */ |
enum { |
JAMO_L_BASE=0x1100, /* "lead" jamo */ |
+ JAMO_L_END=0x1112, |
JAMO_V_BASE=0x1161, /* "vowel" jamo */ |
+ JAMO_V_END=0x1175, |
JAMO_T_BASE=0x11a7, /* "trail" jamo */ |
+ JAMO_T_END=0x11c2, |
HANGUL_BASE=0xac00, |
+ HANGUL_END=0xd7a3, |
JAMO_L_COUNT=19, |
JAMO_V_COUNT=21, |
@@ -110,7 +113,7 @@ private: |
class Normalizer2Impl; |
-class ReorderingBuffer : public UMemory { |
+class U_COMMON_API ReorderingBuffer : public UMemory { |
public: |
ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest) : |
impl(ni), str(dest), |
@@ -213,15 +216,17 @@ private: |
UChar *codePointStart, *codePointLimit; |
}; |
-class U_COMMON_API Normalizer2Impl : public UMemory { |
+class U_COMMON_API Normalizer2Impl : public UObject { |
public: |
- Normalizer2Impl() : memory(NULL), normTrie(NULL), fCanonIterData(NULL) { |
+ Normalizer2Impl() : normTrie(NULL), fCanonIterData(NULL) { |
fCanonIterDataInitOnce.reset(); |
} |
- ~Normalizer2Impl(); |
+ virtual ~Normalizer2Impl(); |
- void load(const char *packageName, const char *name, UErrorCode &errorCode); |
+ void init(const int32_t *inIndexes, const UTrie2 *inTrie, |
+ const uint16_t *inExtraData, const uint8_t *inSmallFCD); |
+ void addLcccChars(UnicodeSet &set) const; |
void addPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const; |
void addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const; |
@@ -242,6 +247,7 @@ public: |
return UNORM_NO; |
} |
} |
+ UBool isAlgorithmicNoNo(uint16_t norm16) const { return limitNoNo<=norm16 && norm16<minMaybeYes; } |
UBool isCompNo(uint16_t norm16) const { return minNoNo<=norm16 && norm16<minMaybeYes; } |
UBool isDecompYes(uint16_t norm16) const { return norm16<minYesNo || minMaybeYes<=norm16; } |
@@ -416,6 +422,18 @@ public: |
// higher-level functionality ------------------------------------------ *** |
+ // NFD without an NFD Normalizer2 instance. |
+ UnicodeString &decompose(const UnicodeString &src, UnicodeString &dest, |
+ UErrorCode &errorCode) const; |
+ /** |
+ * Decomposes [src, limit[ and writes the result to dest. |
+ * limit can be NULL if src is NUL-terminated. |
+ * destLengthEstimate is the initial dest buffer capacity and can be -1. |
+ */ |
+ void decompose(const UChar *src, const UChar *limit, |
+ UnicodeString &dest, int32_t destLengthEstimate, |
+ UErrorCode &errorCode) const; |
+ |
const UChar *decompose(const UChar *src, const UChar *limit, |
ReorderingBuffer *buffer, UErrorCode &errorCode) const; |
void decomposeAndAppend(const UChar *src, const UChar *limit, |
@@ -460,9 +478,6 @@ public: |
} |
UBool isFCDInert(UChar32 c) const { return getFCD16(c)<=1; } |
private: |
- static UBool U_CALLCONV |
- isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo); |
- |
UBool isMaybe(uint16_t norm16) const { return minMaybeYes<=norm16 && norm16<=JAMO_VT; } |
UBool isMaybeOrNonZeroCC(uint16_t norm16) const { return norm16>=minMaybeYes; } |
static UBool isInert(uint16_t norm16) { return norm16==0; } |
@@ -566,8 +581,7 @@ private: |
int32_t getCanonValue(UChar32 c) const; |
const UnicodeSet &getCanonStartSet(int32_t n) const; |
- UDataMemory *memory; |
- UVersionInfo dataVersion; |
+ // UVersionInfo dataVersion; |
// Code point thresholds for quick check codes. |
UChar32 minDecompNoCP; |
@@ -580,13 +594,13 @@ private: |
uint16_t limitNoNo; |
uint16_t minMaybeYes; |
- UTrie2 *normTrie; |
+ const UTrie2 *normTrie; |
const uint16_t *maybeYesCompositions; |
const uint16_t *extraData; // mappings and/or compositions for yesYes, yesNo & noNo characters |
const uint8_t *smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0 |
uint8_t tccc180[0x180]; // tccc values for U+0000..U+017F |
- public: // CanonIterData is public to allow access from C callback functions. |
+public: // CanonIterData is public to allow access from C callback functions. |
UInitOnce fCanonIterDataInitOnce; |
CanonIterData *fCanonIterData; |
}; |
@@ -602,13 +616,8 @@ private: |
*/ |
class U_COMMON_API Normalizer2Factory { |
public: |
- static const Normalizer2 *getNFCInstance(UErrorCode &errorCode); |
- static const Normalizer2 *getNFDInstance(UErrorCode &errorCode); |
static const Normalizer2 *getFCDInstance(UErrorCode &errorCode); |
static const Normalizer2 *getFCCInstance(UErrorCode &errorCode); |
- static const Normalizer2 *getNFKCInstance(UErrorCode &errorCode); |
- static const Normalizer2 *getNFKDInstance(UErrorCode &errorCode); |
- static const Normalizer2 *getNFKC_CFInstance(UErrorCode &errorCode); |
static const Normalizer2 *getNoopInstance(UErrorCode &errorCode); |
static const Normalizer2 *getInstance(UNormalizationMode mode, UErrorCode &errorCode); |