| Index: icu46/source/i18n/unicode/colldata.h
|
| ===================================================================
|
| --- icu46/source/i18n/unicode/colldata.h (revision 0)
|
| +++ icu46/source/i18n/unicode/colldata.h (revision 0)
|
| @@ -0,0 +1,452 @@
|
| +/*
|
| + ******************************************************************************
|
| + * Copyright (C) 1996-2010, International Business Machines *
|
| + * Corporation and others. All Rights Reserved. *
|
| + ******************************************************************************
|
| + */
|
| +
|
| +/**
|
| + * \file
|
| + * \brief C++ API: Collation data used to compute minLengthInChars.
|
| + * \internal
|
| + */
|
| +
|
| +#ifndef COLL_DATA_H
|
| +#define COLL_DATA_H
|
| +
|
| +#include "unicode/utypes.h"
|
| +
|
| +#if !UCONFIG_NO_COLLATION
|
| +
|
| +#include "unicode/uobject.h"
|
| +#include "unicode/ucol.h"
|
| +
|
| +U_NAMESPACE_BEGIN
|
| +
|
| +/**
|
| + * The size of the internal buffer for the Collator's short description string.
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| +#define KEY_BUFFER_SIZE 64
|
| +
|
| + /**
|
| + * The size of the internal CE buffer in a <code>CEList</code> object
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| +#define CELIST_BUFFER_SIZE 4
|
| +
|
| +/**
|
| + * \def INSTRUMENT_CELIST
|
| + * Define this to enable the <code>CEList</code> objects to collect
|
| + * statistics.
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| +//#define INSTRUMENT_CELIST
|
| +
|
| + /**
|
| + * The size of the initial list in a <code>StringList</code> object.
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| +#define STRING_LIST_BUFFER_SIZE 16
|
| +
|
| +/**
|
| + * \def INSTRUMENT_STRING_LIST
|
| + * Define this to enable the <code>StringList</code> objects to
|
| + * collect statistics.
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| +//#define INSTRUMENT_STRING_LIST
|
| +
|
| + /**
|
| + * This object holds a list of CEs generated from a particular
|
| + * <code>UnicodeString</code>
|
| + *
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| +class U_I18N_API CEList : public UObject
|
| +{
|
| +public:
|
| + /**
|
| + * Construct a <code>CEList</code> object.
|
| + *
|
| + * @param coll - the Collator used to collect the CEs.
|
| + * @param string - the string for which to collect the CEs.
|
| + * @param status - will be set if any errors occur.
|
| + *
|
| + * Note: if on return, status is set to an error code,
|
| + * the only safe thing to do with this object is to call
|
| + * the destructor.
|
| + *
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| + CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status);
|
| +
|
| + /**
|
| + * The destructor.
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| + ~CEList();
|
| +
|
| + /**
|
| + * Return the number of CEs in the list.
|
| + *
|
| + * @return the number of CEs in the list.
|
| + *
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| + int32_t size() const;
|
| +
|
| + /**
|
| + * Get a particular CE from the list.
|
| + *
|
| + * @param index - the index of the CE to return
|
| + *
|
| + * @return the CE, or <code>0</code> if <code>index</code> is out of range
|
| + *
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| + uint32_t get(int32_t index) const;
|
| +
|
| + /**
|
| + * Check if the CEs in another <code>CEList</code> match the
|
| + * suffix of this list starting at a give offset.
|
| + *
|
| + * @param offset - the offset of the suffix
|
| + * @param other - the other <code>CEList</code>
|
| + *
|
| + * @return <code>TRUE</code> if the CEs match, <code>FALSE</code> otherwise.
|
| + *
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| + UBool matchesAt(int32_t offset, const CEList *other) const;
|
| +
|
| + /**
|
| + * The index operator.
|
| + *
|
| + * @param index - the index
|
| + *
|
| + * @return a reference to the given CE in the list
|
| + *
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| + uint32_t &operator[](int32_t index) const;
|
| +
|
| + /**
|
| + * UObject glue...
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| + virtual UClassID getDynamicClassID() const;
|
| + /**
|
| + * UObject glue...
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| + static UClassID getStaticClassID();
|
| +
|
| +private:
|
| + void add(uint32_t ce, UErrorCode &status);
|
| +
|
| + uint32_t ceBuffer[CELIST_BUFFER_SIZE];
|
| + uint32_t *ces;
|
| + int32_t listMax;
|
| + int32_t listSize;
|
| +
|
| +#ifdef INSTRUMENT_CELIST
|
| + static int32_t _active;
|
| + static int32_t _histogram[10];
|
| +#endif
|
| +};
|
| +
|
| +/**
|
| + * StringList
|
| + *
|
| + * This object holds a list of <code>UnicodeString</code> objects.
|
| + *
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| +class U_I18N_API StringList : public UObject
|
| +{
|
| +public:
|
| + /**
|
| + * Construct an empty <code>StringList</code>
|
| + *
|
| + * @param status - will be set if any errors occur.
|
| + *
|
| + * Note: if on return, status is set to an error code,
|
| + * the only safe thing to do with this object is to call
|
| + * the destructor.
|
| + *
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| + StringList(UErrorCode &status);
|
| +
|
| + /**
|
| + * The destructor.
|
| + *
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| + ~StringList();
|
| +
|
| + /**
|
| + * Add a string to the list.
|
| + *
|
| + * @param string - the string to add
|
| + * @param status - will be set if any errors occur.
|
| + *
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| + void add(const UnicodeString *string, UErrorCode &status);
|
| +
|
| + /**
|
| + * Add an array of Unicode code points to the list.
|
| + *
|
| + * @param chars - the address of the array of code points
|
| + * @param count - the number of code points in the array
|
| + * @param status - will be set if any errors occur.
|
| + *
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| + void add(const UChar *chars, int32_t count, UErrorCode &status);
|
| +
|
| + /**
|
| + * Get a particular string from the list.
|
| + *
|
| + * @param index - the index of the string
|
| + *
|
| + * @return a pointer to the <code>UnicodeString</code> or <code>NULL</code>
|
| + * if <code>index</code> is out of bounds.
|
| + *
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| + const UnicodeString *get(int32_t index) const;
|
| +
|
| + /**
|
| + * Get the number of stings in the list.
|
| + *
|
| + * @return the number of strings in the list.
|
| + *
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| + int32_t size() const;
|
| +
|
| + /**
|
| + * the UObject glue...
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| + virtual UClassID getDynamicClassID() const;
|
| + /**
|
| + * the UObject glue...
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| + static UClassID getStaticClassID();
|
| +
|
| +private:
|
| + UnicodeString *strings;
|
| + int32_t listMax;
|
| + int32_t listSize;
|
| +
|
| +#ifdef INSTRUMENT_STRING_LIST
|
| + static int32_t _lists;
|
| + static int32_t _strings;
|
| + static int32_t _histogram[101];
|
| +#endif
|
| +};
|
| +
|
| +/*
|
| + * Forward references to internal classes.
|
| + */
|
| +class StringToCEsMap;
|
| +class CEToStringsMap;
|
| +class CollDataCache;
|
| +
|
| +/**
|
| + * CollData
|
| + *
|
| + * This class holds the Collator-specific data needed to
|
| + * compute the length of the shortest string that can
|
| + * generate a partcular list of CEs.
|
| + *
|
| + * <code>CollData</code> objects are quite expensive to compute. Because
|
| + * of this, they are cached. When you call <code>CollData::open</code> it
|
| + * returns a reference counted cached object. When you call <code>CollData::close</code>
|
| + * the reference count on the object is decremented but the object is not deleted.
|
| + *
|
| + * If you do not need to reuse any unreferenced objects in the cache, you can call
|
| + * <code>CollData::flushCollDataCache</code>. If you no longer need any <code>CollData</code>
|
| + * objects, you can call <code>CollData::freeCollDataCache</code>
|
| + *
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| +class U_I18N_API CollData : public UObject
|
| +{
|
| +public:
|
| + /**
|
| + * Construct a <code>CollData</code> object.
|
| + *
|
| + * @param collator - the collator
|
| + * @param status - will be set if any errors occur.
|
| + *
|
| + * @return the <code>CollData</code> object. You must call
|
| + * <code>close</code> when you are done using the object.
|
| + *
|
| + * Note: if on return, status is set to an error code,
|
| + * the only safe thing to do with this object is to call
|
| + * <code>CollData::close</code>.
|
| + *
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| + static CollData *open(UCollator *collator, UErrorCode &status);
|
| +
|
| + /**
|
| + * Release a <code>CollData</code> object.
|
| + *
|
| + * @param collData - the object
|
| + *
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| + static void close(CollData *collData);
|
| +
|
| + /**
|
| + * Get the <code>UCollator</code> object used to create this object.
|
| + * The object returned may not be the exact object that was used to
|
| + * create this object, but it will have the same behavior.
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| + UCollator *getCollator() const;
|
| +
|
| + /**
|
| + * Get a list of all the strings which generate a list
|
| + * of CEs starting with a given CE.
|
| + *
|
| + * @param ce - the CE
|
| + *
|
| + * return a <code>StringList</code> object containing all
|
| + * the stirngs, or <code>NULL</code> if there are
|
| + * no such strings.
|
| + *
|
| + * @internal ICU 4.0.1 technology preview.
|
| + */
|
| + const StringList *getStringList(int32_t ce) const;
|
| +
|
| + /**
|
| + * Get a list of the CEs generated by a partcular stirng.
|
| + *
|
| + * @param string - the string
|
| + *
|
| + * @return a <code>CEList</code> object containt the CEs. You
|
| + * must call <code>freeCEList</code> when you are finished
|
| + * using the <code>CEList</code>/
|
| + *
|
| + * @internal ICU 4.0.1 technology preview.
|
| + */
|
| + const CEList *getCEList(const UnicodeString *string) const;
|
| +
|
| + /**
|
| + * Release a <code>CEList</code> returned by <code>getCEList</code>.
|
| + *
|
| + * @param list - the <code>CEList</code> to free.
|
| + *
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| + void freeCEList(const CEList *list);
|
| +
|
| + /**
|
| + * Return the length of the shortest string that will generate
|
| + * the given list of CEs.
|
| + *
|
| + * @param ces - the CEs
|
| + * @param offset - the offset of the first CE in the list to use.
|
| + *
|
| + * @return the length of the shortest string.
|
| + *
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| + int32_t minLengthInChars(const CEList *ces, int32_t offset) const;
|
| +
|
| +
|
| + /**
|
| + * Return the length of the shortest string that will generate
|
| + * the given list of CEs.
|
| + *
|
| + * Note: the algorithm used to do this computation is recursive. To
|
| + * limit the amount of recursion, a "history" list is used to record
|
| + * the best answer starting at a particular offset in the list of CEs.
|
| + * If the same offset is visited again during the recursion, the answer
|
| + * in the history list is used.
|
| + *
|
| + * @param ces - the CEs
|
| + * @param offset - the offset of the first CE in the list to use.
|
| + * @param history - the history list. Must be at least as long as
|
| + * the number of cEs in the <code>CEList</code>
|
| + *
|
| + * @return the length of the shortest string.
|
| + *
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| + int32_t minLengthInChars(const CEList *ces, int32_t offset, int32_t *history) const;
|
| +
|
| + /**
|
| + * UObject glue...
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| + virtual UClassID getDynamicClassID() const;
|
| + /**
|
| + * UObject glue...
|
| + * @internal ICU 4.0.1 technology preview
|
| + */
|
| + static UClassID getStaticClassID();
|
| +
|
| + /**
|
| + * <code>CollData</code> objects are expensive to compute, and so
|
| + * may be cached. This routine will free the cached objects and delete
|
| + * the cache.
|
| + *
|
| + * WARNING: Don't call this until you are have called <code>close</code>
|
| + * for each <code>CollData</code> object that you have used. also,
|
| + * DO NOT call this if another thread may be calling <code>flushCollDataCache</code>
|
| + * at the same time.
|
| + *
|
| + * @internal 4.0.1 technology preview
|
| + */
|
| + static void freeCollDataCache();
|
| +
|
| + /**
|
| + * <code>CollData</code> objects are expensive to compute, and so
|
| + * may be cached. This routine will remove any unused <code>CollData</code>
|
| + * objects from the cache.
|
| + *
|
| + * @internal 4.0.1 technology preview
|
| + */
|
| + static void flushCollDataCache();
|
| +
|
| +private:
|
| + friend class CollDataCache;
|
| + friend class CollDataCacheEntry;
|
| +
|
| + CollData(UCollator *collator, char *cacheKey, int32_t cachekeyLength, UErrorCode &status);
|
| + ~CollData();
|
| +
|
| + CollData();
|
| +
|
| + static char *getCollatorKey(UCollator *collator, char *buffer, int32_t bufferLength);
|
| +
|
| + static CollDataCache *getCollDataCache();
|
| +
|
| + UCollator *coll;
|
| + StringToCEsMap *charsToCEList;
|
| + CEToStringsMap *ceToCharsStartingWith;
|
| +
|
| + char keyBuffer[KEY_BUFFER_SIZE];
|
| + char *key;
|
| +
|
| + static CollDataCache *collDataCache;
|
| +
|
| + uint32_t minHan;
|
| + uint32_t maxHan;
|
| +
|
| + uint32_t jamoLimits[4];
|
| +};
|
| +
|
| +U_NAMESPACE_END
|
| +
|
| +#endif // #if !UCONFIG_NO_COLLATION
|
| +#endif // #ifndef COLL_DATA_H
|
|
|
| Property changes on: icu46/source/i18n/unicode/colldata.h
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|