| Index: icu46/source/common/dictbe.h
|
| ===================================================================
|
| --- icu46/source/common/dictbe.h (revision 68397)
|
| +++ icu46/source/common/dictbe.h (working copy)
|
| @@ -1,8 +1,8 @@
|
| /**
|
| - *******************************************************************************
|
| - * Copyright (C) 2006, International Business Machines Corporation and others. *
|
| - * All Rights Reserved. *
|
| - *******************************************************************************
|
| + **********************************************************************************
|
| + * Copyright (C) 2006-2010, International Business Machines Corporation and others.
|
| + * All Rights Reserved.
|
| + **********************************************************************************
|
| */
|
|
|
| #ifndef DICTBE_H
|
| @@ -65,31 +65,31 @@
|
| */
|
| virtual ~DictionaryBreakEngine();
|
|
|
| - /**
|
| - * <p>Indicate whether this engine handles a particular character for
|
| - * a particular kind of break.</p>
|
| - *
|
| - * @param c A character which begins a run that the engine might handle
|
| - * @param breakType The type of text break which the caller wants to determine
|
| - * @return TRUE if this engine handles the particular character and break
|
| - * type.
|
| - */
|
| + /**
|
| + * <p>Indicate whether this engine handles a particular character for
|
| + * a particular kind of break.</p>
|
| + *
|
| + * @param c A character which begins a run that the engine might handle
|
| + * @param breakType The type of text break which the caller wants to determine
|
| + * @return TRUE if this engine handles the particular character and break
|
| + * type.
|
| + */
|
| virtual UBool handles( UChar32 c, int32_t breakType ) const;
|
|
|
| - /**
|
| - * <p>Find any breaks within a run in the supplied text.</p>
|
| - *
|
| - * @param text A UText representing the text. The
|
| - * iterator is left at the end of the run of characters which the engine
|
| - * is capable of handling.
|
| - * @param startPos The start of the run within the supplied text.
|
| - * @param endPos The end of the run within the supplied text.
|
| - * @param reverse Whether the caller is looking for breaks in a reverse
|
| - * direction.
|
| - * @param breakType The type of break desired, or -1.
|
| - * @param foundBreaks An allocated C array of the breaks found, if any
|
| - * @return The number of breaks found.
|
| - */
|
| + /**
|
| + * <p>Find any breaks within a run in the supplied text.</p>
|
| + *
|
| + * @param text A UText representing the text. The iterator is left at
|
| + * the end of the run of characters which the engine is capable of handling
|
| + * that starts from the first (or last) character in the range.
|
| + * @param startPos The start of the run within the supplied text.
|
| + * @param endPos The end of the run within the supplied text.
|
| + * @param reverse Whether the caller is looking for breaks in a reverse
|
| + * direction.
|
| + * @param breakType The type of break desired, or -1.
|
| + * @param foundBreaks An allocated C array of the breaks found, if any
|
| + * @return The number of breaks found.
|
| + */
|
| virtual int32_t findBreaks( UText *text,
|
| int32_t startPos,
|
| int32_t endPos,
|
| @@ -114,7 +114,7 @@
|
| // virtual void setBreakTypes( uint32_t breakTypes );
|
|
|
| /**
|
| - * <p>Divide up a range of known dictionary characters.</p>
|
| + * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
|
| *
|
| * @param text A UText representing the text
|
| * @param rangeStart The start of the range of dictionary characters
|
| @@ -171,7 +171,7 @@
|
|
|
| protected:
|
| /**
|
| - * <p>Divide up a range of known dictionary characters.</p>
|
| + * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
|
| *
|
| * @param text A UText representing the text
|
| * @param rangeStart The start of the range of dictionary characters
|
| @@ -186,7 +186,67 @@
|
|
|
| };
|
|
|
| +/*******************************************************************
|
| + * CjkBreakEngine
|
| + */
|
|
|
| +//indicates language/script that the CjkBreakEngine will handle
|
| +enum LanguageType {
|
| + kKorean,
|
| + kChineseJapanese
|
| +};
|
| +
|
| +/**
|
| + * <p>CjkBreakEngine is a kind of DictionaryBreakEngine that uses a
|
| + * TrieWordDictionary with costs associated with each word and
|
| + * Viterbi decoding to determine CJK-specific breaks.</p>
|
| + */
|
| +class CjkBreakEngine : public DictionaryBreakEngine {
|
| + protected:
|
| + /**
|
| + * The set of characters handled by this engine
|
| + * @internal
|
| + */
|
| + UnicodeSet fHangulWordSet;
|
| + UnicodeSet fHanWordSet;
|
| + UnicodeSet fKatakanaWordSet;
|
| + UnicodeSet fHiraganaWordSet;
|
| +
|
| + const TrieWordDictionary *fDictionary;
|
| +
|
| + public:
|
| +
|
| + /**
|
| + * <p>Default constructor.</p>
|
| + *
|
| + * @param adoptDictionary A TrieWordDictionary to adopt. Deleted when the
|
| + * engine is deleted. The TrieWordDictionary must contain costs for each word
|
| + * in order for the dictionary to work properly.
|
| + */
|
| + CjkBreakEngine(const TrieWordDictionary *adoptDictionary, LanguageType type, UErrorCode &status);
|
| +
|
| + /**
|
| + * <p>Virtual destructor.</p>
|
| + */
|
| + virtual ~CjkBreakEngine();
|
| +
|
| + protected:
|
| + /**
|
| + * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
|
| + *
|
| + * @param text A UText representing the text
|
| + * @param rangeStart The start of the range of dictionary characters
|
| + * @param rangeEnd The end of the range of dictionary characters
|
| + * @param foundBreaks Output of C array of int32_t break positions, or 0
|
| + * @return The number of breaks found
|
| + */
|
| + virtual int32_t divideUpDictionaryRange( UText *text,
|
| + int32_t rangeStart,
|
| + int32_t rangeEnd,
|
| + UStack &foundBreaks ) const;
|
| +
|
| +};
|
| +
|
| U_NAMESPACE_END
|
|
|
| /* DICTBE_H */
|
|
|