Index: icu46/source/i18n/unicode/colldata.h |
=================================================================== |
--- icu46/source/i18n/unicode/colldata.h (revision 0) |
+++ icu46/source/i18n/unicode/colldata.h (revision 0) |
@@ -0,0 +1,452 @@ |
+/* |
+ ****************************************************************************** |
+ * Copyright (C) 1996-2010, International Business Machines * |
+ * Corporation and others. All Rights Reserved. * |
+ ****************************************************************************** |
+ */ |
+ |
+/** |
+ * \file |
+ * \brief C++ API: Collation data used to compute minLengthInChars. |
+ * \internal |
+ */ |
+ |
+#ifndef COLL_DATA_H |
+#define COLL_DATA_H |
+ |
+#include "unicode/utypes.h" |
+ |
+#if !UCONFIG_NO_COLLATION |
+ |
+#include "unicode/uobject.h" |
+#include "unicode/ucol.h" |
+ |
+U_NAMESPACE_BEGIN |
+ |
+/** |
+ * The size of the internal buffer for the Collator's short description string. |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+#define KEY_BUFFER_SIZE 64 |
+ |
+ /** |
+ * The size of the internal CE buffer in a <code>CEList</code> object |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+#define CELIST_BUFFER_SIZE 4 |
+ |
+/** |
+ * \def INSTRUMENT_CELIST |
+ * Define this to enable the <code>CEList</code> objects to collect |
+ * statistics. |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+//#define INSTRUMENT_CELIST |
+ |
+ /** |
+ * The size of the initial list in a <code>StringList</code> object. |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+#define STRING_LIST_BUFFER_SIZE 16 |
+ |
+/** |
+ * \def INSTRUMENT_STRING_LIST |
+ * Define this to enable the <code>StringList</code> objects to |
+ * collect statistics. |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+//#define INSTRUMENT_STRING_LIST |
+ |
+ /** |
+ * This object holds a list of CEs generated from a particular |
+ * <code>UnicodeString</code> |
+ * |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+class U_I18N_API CEList : public UObject |
+{ |
+public: |
+ /** |
+ * Construct a <code>CEList</code> object. |
+ * |
+ * @param coll - the Collator used to collect the CEs. |
+ * @param string - the string for which to collect the CEs. |
+ * @param status - will be set if any errors occur. |
+ * |
+ * Note: if on return, status is set to an error code, |
+ * the only safe thing to do with this object is to call |
+ * the destructor. |
+ * |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+ CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status); |
+ |
+ /** |
+ * The destructor. |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+ ~CEList(); |
+ |
+ /** |
+ * Return the number of CEs in the list. |
+ * |
+ * @return the number of CEs in the list. |
+ * |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+ int32_t size() const; |
+ |
+ /** |
+ * Get a particular CE from the list. |
+ * |
+ * @param index - the index of the CE to return |
+ * |
+ * @return the CE, or <code>0</code> if <code>index</code> is out of range |
+ * |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+ uint32_t get(int32_t index) const; |
+ |
+ /** |
+ * Check if the CEs in another <code>CEList</code> match the |
+ * suffix of this list starting at a give offset. |
+ * |
+ * @param offset - the offset of the suffix |
+ * @param other - the other <code>CEList</code> |
+ * |
+ * @return <code>TRUE</code> if the CEs match, <code>FALSE</code> otherwise. |
+ * |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+ UBool matchesAt(int32_t offset, const CEList *other) const; |
+ |
+ /** |
+ * The index operator. |
+ * |
+ * @param index - the index |
+ * |
+ * @return a reference to the given CE in the list |
+ * |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+ uint32_t &operator[](int32_t index) const; |
+ |
+ /** |
+ * UObject glue... |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+ virtual UClassID getDynamicClassID() const; |
+ /** |
+ * UObject glue... |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+ static UClassID getStaticClassID(); |
+ |
+private: |
+ void add(uint32_t ce, UErrorCode &status); |
+ |
+ uint32_t ceBuffer[CELIST_BUFFER_SIZE]; |
+ uint32_t *ces; |
+ int32_t listMax; |
+ int32_t listSize; |
+ |
+#ifdef INSTRUMENT_CELIST |
+ static int32_t _active; |
+ static int32_t _histogram[10]; |
+#endif |
+}; |
+ |
+/** |
+ * StringList |
+ * |
+ * This object holds a list of <code>UnicodeString</code> objects. |
+ * |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+class U_I18N_API StringList : public UObject |
+{ |
+public: |
+ /** |
+ * Construct an empty <code>StringList</code> |
+ * |
+ * @param status - will be set if any errors occur. |
+ * |
+ * Note: if on return, status is set to an error code, |
+ * the only safe thing to do with this object is to call |
+ * the destructor. |
+ * |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+ StringList(UErrorCode &status); |
+ |
+ /** |
+ * The destructor. |
+ * |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+ ~StringList(); |
+ |
+ /** |
+ * Add a string to the list. |
+ * |
+ * @param string - the string to add |
+ * @param status - will be set if any errors occur. |
+ * |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+ void add(const UnicodeString *string, UErrorCode &status); |
+ |
+ /** |
+ * Add an array of Unicode code points to the list. |
+ * |
+ * @param chars - the address of the array of code points |
+ * @param count - the number of code points in the array |
+ * @param status - will be set if any errors occur. |
+ * |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+ void add(const UChar *chars, int32_t count, UErrorCode &status); |
+ |
+ /** |
+ * Get a particular string from the list. |
+ * |
+ * @param index - the index of the string |
+ * |
+ * @return a pointer to the <code>UnicodeString</code> or <code>NULL</code> |
+ * if <code>index</code> is out of bounds. |
+ * |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+ const UnicodeString *get(int32_t index) const; |
+ |
+ /** |
+ * Get the number of stings in the list. |
+ * |
+ * @return the number of strings in the list. |
+ * |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+ int32_t size() const; |
+ |
+ /** |
+ * the UObject glue... |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+ virtual UClassID getDynamicClassID() const; |
+ /** |
+ * the UObject glue... |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+ static UClassID getStaticClassID(); |
+ |
+private: |
+ UnicodeString *strings; |
+ int32_t listMax; |
+ int32_t listSize; |
+ |
+#ifdef INSTRUMENT_STRING_LIST |
+ static int32_t _lists; |
+ static int32_t _strings; |
+ static int32_t _histogram[101]; |
+#endif |
+}; |
+ |
+/* |
+ * Forward references to internal classes. |
+ */ |
+class StringToCEsMap; |
+class CEToStringsMap; |
+class CollDataCache; |
+ |
+/** |
+ * CollData |
+ * |
+ * This class holds the Collator-specific data needed to |
+ * compute the length of the shortest string that can |
+ * generate a partcular list of CEs. |
+ * |
+ * <code>CollData</code> objects are quite expensive to compute. Because |
+ * of this, they are cached. When you call <code>CollData::open</code> it |
+ * returns a reference counted cached object. When you call <code>CollData::close</code> |
+ * the reference count on the object is decremented but the object is not deleted. |
+ * |
+ * If you do not need to reuse any unreferenced objects in the cache, you can call |
+ * <code>CollData::flushCollDataCache</code>. If you no longer need any <code>CollData</code> |
+ * objects, you can call <code>CollData::freeCollDataCache</code> |
+ * |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+class U_I18N_API CollData : public UObject |
+{ |
+public: |
+ /** |
+ * Construct a <code>CollData</code> object. |
+ * |
+ * @param collator - the collator |
+ * @param status - will be set if any errors occur. |
+ * |
+ * @return the <code>CollData</code> object. You must call |
+ * <code>close</code> when you are done using the object. |
+ * |
+ * Note: if on return, status is set to an error code, |
+ * the only safe thing to do with this object is to call |
+ * <code>CollData::close</code>. |
+ * |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+ static CollData *open(UCollator *collator, UErrorCode &status); |
+ |
+ /** |
+ * Release a <code>CollData</code> object. |
+ * |
+ * @param collData - the object |
+ * |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+ static void close(CollData *collData); |
+ |
+ /** |
+ * Get the <code>UCollator</code> object used to create this object. |
+ * The object returned may not be the exact object that was used to |
+ * create this object, but it will have the same behavior. |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+ UCollator *getCollator() const; |
+ |
+ /** |
+ * Get a list of all the strings which generate a list |
+ * of CEs starting with a given CE. |
+ * |
+ * @param ce - the CE |
+ * |
+ * return a <code>StringList</code> object containing all |
+ * the stirngs, or <code>NULL</code> if there are |
+ * no such strings. |
+ * |
+ * @internal ICU 4.0.1 technology preview. |
+ */ |
+ const StringList *getStringList(int32_t ce) const; |
+ |
+ /** |
+ * Get a list of the CEs generated by a partcular stirng. |
+ * |
+ * @param string - the string |
+ * |
+ * @return a <code>CEList</code> object containt the CEs. You |
+ * must call <code>freeCEList</code> when you are finished |
+ * using the <code>CEList</code>/ |
+ * |
+ * @internal ICU 4.0.1 technology preview. |
+ */ |
+ const CEList *getCEList(const UnicodeString *string) const; |
+ |
+ /** |
+ * Release a <code>CEList</code> returned by <code>getCEList</code>. |
+ * |
+ * @param list - the <code>CEList</code> to free. |
+ * |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+ void freeCEList(const CEList *list); |
+ |
+ /** |
+ * Return the length of the shortest string that will generate |
+ * the given list of CEs. |
+ * |
+ * @param ces - the CEs |
+ * @param offset - the offset of the first CE in the list to use. |
+ * |
+ * @return the length of the shortest string. |
+ * |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+ int32_t minLengthInChars(const CEList *ces, int32_t offset) const; |
+ |
+ |
+ /** |
+ * Return the length of the shortest string that will generate |
+ * the given list of CEs. |
+ * |
+ * Note: the algorithm used to do this computation is recursive. To |
+ * limit the amount of recursion, a "history" list is used to record |
+ * the best answer starting at a particular offset in the list of CEs. |
+ * If the same offset is visited again during the recursion, the answer |
+ * in the history list is used. |
+ * |
+ * @param ces - the CEs |
+ * @param offset - the offset of the first CE in the list to use. |
+ * @param history - the history list. Must be at least as long as |
+ * the number of cEs in the <code>CEList</code> |
+ * |
+ * @return the length of the shortest string. |
+ * |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+ int32_t minLengthInChars(const CEList *ces, int32_t offset, int32_t *history) const; |
+ |
+ /** |
+ * UObject glue... |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+ virtual UClassID getDynamicClassID() const; |
+ /** |
+ * UObject glue... |
+ * @internal ICU 4.0.1 technology preview |
+ */ |
+ static UClassID getStaticClassID(); |
+ |
+ /** |
+ * <code>CollData</code> objects are expensive to compute, and so |
+ * may be cached. This routine will free the cached objects and delete |
+ * the cache. |
+ * |
+ * WARNING: Don't call this until you are have called <code>close</code> |
+ * for each <code>CollData</code> object that you have used. also, |
+ * DO NOT call this if another thread may be calling <code>flushCollDataCache</code> |
+ * at the same time. |
+ * |
+ * @internal 4.0.1 technology preview |
+ */ |
+ static void freeCollDataCache(); |
+ |
+ /** |
+ * <code>CollData</code> objects are expensive to compute, and so |
+ * may be cached. This routine will remove any unused <code>CollData</code> |
+ * objects from the cache. |
+ * |
+ * @internal 4.0.1 technology preview |
+ */ |
+ static void flushCollDataCache(); |
+ |
+private: |
+ friend class CollDataCache; |
+ friend class CollDataCacheEntry; |
+ |
+ CollData(UCollator *collator, char *cacheKey, int32_t cachekeyLength, UErrorCode &status); |
+ ~CollData(); |
+ |
+ CollData(); |
+ |
+ static char *getCollatorKey(UCollator *collator, char *buffer, int32_t bufferLength); |
+ |
+ static CollDataCache *getCollDataCache(); |
+ |
+ UCollator *coll; |
+ StringToCEsMap *charsToCEList; |
+ CEToStringsMap *ceToCharsStartingWith; |
+ |
+ char keyBuffer[KEY_BUFFER_SIZE]; |
+ char *key; |
+ |
+ static CollDataCache *collDataCache; |
+ |
+ uint32_t minHan; |
+ uint32_t maxHan; |
+ |
+ uint32_t jamoLimits[4]; |
+}; |
+ |
+U_NAMESPACE_END |
+ |
+#endif // #if !UCONFIG_NO_COLLATION |
+#endif // #ifndef COLL_DATA_H |
Property changes on: icu46/source/i18n/unicode/colldata.h |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |