| Index: icu46/source/common/triedict.h
|
| ===================================================================
|
| --- icu46/source/common/triedict.h (revision 0)
|
| +++ icu46/source/common/triedict.h (revision 0)
|
| @@ -0,0 +1,346 @@
|
| +/**
|
| + *******************************************************************************
|
| + * Copyright (C) 2006, International Business Machines Corporation and others. *
|
| + * All Rights Reserved. *
|
| + *******************************************************************************
|
| + */
|
| +
|
| +#ifndef TRIEDICT_H
|
| +#define TRIEDICT_H
|
| +
|
| +#include "unicode/utypes.h"
|
| +#include "unicode/uobject.h"
|
| +#include "unicode/utext.h"
|
| +
|
| +struct UEnumeration;
|
| +struct UDataSwapper;
|
| +struct UDataMemory;
|
| +
|
| + /**
|
| + * <p>UDataSwapFn function for use in swapping a compact dictionary.</p>
|
| + *
|
| + * @param ds Pointer to UDataSwapper containing global data about the
|
| + * transformation and function pointers for handling primitive
|
| + * types.
|
| + * @param inData Pointer to the input data to be transformed or examined.
|
| + * @param length Length of the data, counting bytes. May be -1 for preflighting.
|
| + * If length>=0, then transform the data.
|
| + * If length==-1, then only determine the length of the data.
|
| + * The length cannot be determined from the data itself for all
|
| + * types of data (e.g., not for simple arrays of integers).
|
| + * @param outData Pointer to the output data buffer.
|
| + * If length>=0 (transformation), then the output buffer must
|
| + * have a capacity of at least length.
|
| + * If length==-1, then outData will not be used and can be NULL.
|
| + * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must
|
| + * fulfill U_SUCCESS on input.
|
| + * @return The actual length of the data.
|
| + *
|
| + * @see UDataSwapper
|
| + */
|
| +
|
| +U_CAPI int32_t U_EXPORT2
|
| +triedict_swap(const UDataSwapper *ds,
|
| + const void *inData, int32_t length, void *outData,
|
| + UErrorCode *pErrorCode);
|
| +
|
| +U_NAMESPACE_BEGIN
|
| +
|
| +class StringEnumeration;
|
| +struct CompactTrieHeader;
|
| +
|
| +/*******************************************************************
|
| + * TrieWordDictionary
|
| + */
|
| +
|
| +/**
|
| + * <p>TrieWordDictionary is an abstract class that represents a word
|
| + * dictionary based on a trie. The base protocol is read-only.
|
| + * Subclasses may allow writing.</p>
|
| + */
|
| +class U_COMMON_API TrieWordDictionary : public UMemory {
|
| + public:
|
| +
|
| + /**
|
| + * <p>Default constructor.</p>
|
| + *
|
| + */
|
| + TrieWordDictionary();
|
| +
|
| + /**
|
| + * <p>Virtual destructor.</p>
|
| + */
|
| + virtual ~TrieWordDictionary();
|
| +
|
| + /**
|
| + * <p>Find dictionary words that match the text.</p>
|
| + *
|
| + * @param text A UText representing the text. The
|
| + * iterator is left after the longest prefix match in the dictionary.
|
| + * @param start The current position in text.
|
| + * @param maxLength The maximum number of code units to match.
|
| + * @param lengths An array that is filled with the lengths of words that matched.
|
| + * @param count Filled with the number of elements output in lengths.
|
| + * @param limit The size of the lengths array; this limits the number of words output.
|
| + * @return The number of characters in text that were matched.
|
| + */
|
| + virtual int32_t matches( UText *text,
|
| + int32_t maxLength,
|
| + int32_t *lengths,
|
| + int &count,
|
| + int limit ) const = 0;
|
| +
|
| + /**
|
| + * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
|
| + *
|
| + * @param status A status code recording the success of the call.
|
| + * @return A StringEnumeration that will iterate through the whole dictionary.
|
| + * The caller is responsible for closing it. The order is unspecified.
|
| + */
|
| + virtual StringEnumeration *openWords( UErrorCode &status ) const = 0;
|
| +
|
| +};
|
| +
|
| +/*******************************************************************
|
| + * MutableTrieDictionary
|
| + */
|
| +
|
| +/**
|
| + * <p>MutableTrieDictionary is a TrieWordDictionary that allows words to be
|
| + * added.</p>
|
| + */
|
| +
|
| +struct TernaryNode; // Forwards declaration
|
| +
|
| +class U_COMMON_API MutableTrieDictionary : public TrieWordDictionary {
|
| + private:
|
| + /**
|
| + * The root node of the trie
|
| + * @internal
|
| + */
|
| +
|
| + TernaryNode *fTrie;
|
| +
|
| + /**
|
| + * A UText for internal use
|
| + * @internal
|
| + */
|
| +
|
| + UText *fIter;
|
| +
|
| + friend class CompactTrieDictionary; // For fast conversion
|
| +
|
| + public:
|
| +
|
| + /**
|
| + * <p>Constructor.</p>
|
| + *
|
| + * @param median A UChar around which to balance the trie. Ideally, it should
|
| + * begin at least one word that is near the median of the set in the dictionary
|
| + * @param status A status code recording the success of the call.
|
| + */
|
| + MutableTrieDictionary( UChar median, UErrorCode &status );
|
| +
|
| + /**
|
| + * <p>Virtual destructor.</p>
|
| + */
|
| + virtual ~MutableTrieDictionary();
|
| +
|
| + /**
|
| + * <p>Find dictionary words that match the text.</p>
|
| + *
|
| + * @param text A UText representing the text. The
|
| + * iterator is left after the longest prefix match in the dictionary.
|
| + * @param maxLength The maximum number of code units to match.
|
| + * @param lengths An array that is filled with the lengths of words that matched.
|
| + * @param count Filled with the number of elements output in lengths.
|
| + * @param limit The size of the lengths array; this limits the number of words output.
|
| + * @return The number of characters in text that were matched.
|
| + */
|
| + virtual int32_t matches( UText *text,
|
| + int32_t maxLength,
|
| + int32_t *lengths,
|
| + int &count,
|
| + int limit ) const;
|
| +
|
| + /**
|
| + * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
|
| + *
|
| + * @param status A status code recording the success of the call.
|
| + * @return A StringEnumeration that will iterate through the whole dictionary.
|
| + * The caller is responsible for closing it. The order is unspecified.
|
| + */
|
| + virtual StringEnumeration *openWords( UErrorCode &status ) const;
|
| +
|
| + /**
|
| + * <p>Add one word to the dictionary.</p>
|
| + *
|
| + * @param word A UChar buffer containing the word.
|
| + * @param length The length of the word.
|
| + * @param status The resultant status
|
| + */
|
| + virtual void addWord( const UChar *word,
|
| + int32_t length,
|
| + UErrorCode &status);
|
| +
|
| +#if 0
|
| + /**
|
| + * <p>Add all strings from a UEnumeration to the dictionary.</p>
|
| + *
|
| + * @param words A UEnumeration that will return the desired words.
|
| + * @param status The resultant status
|
| + */
|
| + virtual void addWords( UEnumeration *words, UErrorCode &status );
|
| +#endif
|
| +
|
| +protected:
|
| + /**
|
| + * <p>Search the dictionary for matches.</p>
|
| + *
|
| + * @param text A UText representing the text. The
|
| + * iterator is left after the longest prefix match in the dictionary.
|
| + * @param maxLength The maximum number of code units to match.
|
| + * @param lengths An array that is filled with the lengths of words that matched.
|
| + * @param count Filled with the number of elements output in lengths.
|
| + * @param limit The size of the lengths array; this limits the number of words output.
|
| + * @param parent The parent of the current node
|
| + * @param pMatched The returned parent node matched the input
|
| + * @return The number of characters in text that were matched.
|
| + */
|
| + virtual int32_t search( UText *text,
|
| + int32_t maxLength,
|
| + int32_t *lengths,
|
| + int &count,
|
| + int limit,
|
| + TernaryNode *&parent,
|
| + UBool &pMatched ) const;
|
| +
|
| +private:
|
| + /**
|
| + * <p>Private constructor. The root node it not allocated.</p>
|
| + *
|
| + * @param status A status code recording the success of the call.
|
| + */
|
| + MutableTrieDictionary( UErrorCode &status );
|
| +};
|
| +
|
| +/*******************************************************************
|
| + * CompactTrieDictionary
|
| + */
|
| +
|
| +/**
|
| + * <p>CompactTrieDictionary is a TrieWordDictionary that has been compacted
|
| + * to save space.</p>
|
| + */
|
| +class U_COMMON_API CompactTrieDictionary : public TrieWordDictionary {
|
| + private:
|
| + /**
|
| + * The root node of the trie
|
| + */
|
| +
|
| + const CompactTrieHeader *fData;
|
| +
|
| + /**
|
| + * A UBool indicating whether or not we own the fData.
|
| + */
|
| +
|
| + UBool fOwnData;
|
| +
|
| + UDataMemory *fUData;
|
| + public:
|
| + /**
|
| + * <p>Construct a dictionary from a UDataMemory.</p>
|
| + *
|
| + * @param data A pointer to a UDataMemory, which is adopted
|
| + * @param status A status code giving the result of the constructor
|
| + */
|
| + CompactTrieDictionary(UDataMemory *dataObj, UErrorCode &status);
|
| +
|
| + /**
|
| + * <p>Construct a dictionary from raw saved data.</p>
|
| + *
|
| + * @param data A pointer to the raw data, which is still owned by the caller
|
| + * @param status A status code giving the result of the constructor
|
| + */
|
| + CompactTrieDictionary(const void *dataObj, UErrorCode &status);
|
| +
|
| + /**
|
| + * <p>Construct a dictionary from a MutableTrieDictionary.</p>
|
| + *
|
| + * @param dict The dictionary to use as input.
|
| + * @param status A status code recording the success of the call.
|
| + */
|
| + CompactTrieDictionary( const MutableTrieDictionary &dict, UErrorCode &status );
|
| +
|
| + /**
|
| + * <p>Virtual destructor.</p>
|
| + */
|
| + virtual ~CompactTrieDictionary();
|
| +
|
| + /**
|
| + * <p>Find dictionary words that match the text.</p>
|
| + *
|
| + * @param text A UText representing the text. The
|
| + * iterator is left after the longest prefix match in the dictionary.
|
| + * @param maxLength The maximum number of code units to match.
|
| + * @param lengths An array that is filled with the lengths of words that matched.
|
| + * @param count Filled with the number of elements output in lengths.
|
| + * @param limit The size of the lengths array; this limits the number of words output.
|
| + * @return The number of characters in text that were matched.
|
| + */
|
| + virtual int32_t matches( UText *text,
|
| + int32_t rangeEnd,
|
| + int32_t *lengths,
|
| + int &count,
|
| + int limit ) const;
|
| +
|
| + /**
|
| + * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
|
| + *
|
| + * @param status A status code recording the success of the call.
|
| + * @return A StringEnumeration that will iterate through the whole dictionary.
|
| + * The caller is responsible for closing it. The order is unspecified.
|
| + */
|
| + virtual StringEnumeration *openWords( UErrorCode &status ) const;
|
| +
|
| + /**
|
| + * <p>Return the size of the compact data.</p>
|
| + *
|
| + * @return The size of the dictionary's compact data.
|
| + */
|
| + virtual uint32_t dataSize() const;
|
| +
|
| + /**
|
| + * <p>Return a void * pointer to the compact data, platform-endian.</p>
|
| + *
|
| + * @return The data for the compact dictionary, suitable for passing to the
|
| + * constructor.
|
| + */
|
| + virtual const void *data() const;
|
| +
|
| + /**
|
| + * <p>Return a MutableTrieDictionary clone of this dictionary.</p>
|
| + *
|
| + * @param status A status code recording the success of the call.
|
| + * @return A MutableTrieDictionary with the same data as this dictionary
|
| + */
|
| + virtual MutableTrieDictionary *cloneMutable( UErrorCode &status ) const;
|
| +
|
| + private:
|
| +
|
| + /**
|
| + * <p>Convert a MutableTrieDictionary into a compact data blob.</p>
|
| + *
|
| + * @param dict The dictionary to convert.
|
| + * @param status A status code recording the success of the call.
|
| + * @return A single data blob starting with a CompactTrieHeader.
|
| + */
|
| + static CompactTrieHeader *compactMutableTrieDictionary( const MutableTrieDictionary &dict,
|
| + UErrorCode &status );
|
| +
|
| +};
|
| +
|
| +U_NAMESPACE_END
|
| +
|
| + /* TRIEDICT_H */
|
| +#endif
|
|
|
| Property changes on: icu46/source/common/triedict.h
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|