Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Unified Diff: icu46/source/common/dictbe.h

Issue 6370014: CJK segmentation patch for ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/
Patch Set: Created 9 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « icu46/source/common/brkeng.cpp ('k') | icu46/source/common/dictbe.cpp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: icu46/source/common/dictbe.h
===================================================================
--- icu46/source/common/dictbe.h (revision 68397)
+++ icu46/source/common/dictbe.h (working copy)
@@ -1,8 +1,8 @@
/**
- *******************************************************************************
- * Copyright (C) 2006, International Business Machines Corporation and others. *
- * All Rights Reserved. *
- *******************************************************************************
+ **********************************************************************************
+ * Copyright (C) 2006-2010, International Business Machines Corporation and others.
+ * All Rights Reserved.
+ **********************************************************************************
*/
#ifndef DICTBE_H
@@ -65,31 +65,31 @@
*/
virtual ~DictionaryBreakEngine();
- /**
- * <p>Indicate whether this engine handles a particular character for
- * a particular kind of break.</p>
- *
- * @param c A character which begins a run that the engine might handle
- * @param breakType The type of text break which the caller wants to determine
- * @return TRUE if this engine handles the particular character and break
- * type.
- */
+ /**
+ * <p>Indicate whether this engine handles a particular character for
+ * a particular kind of break.</p>
+ *
+ * @param c A character which begins a run that the engine might handle
+ * @param breakType The type of text break which the caller wants to determine
+ * @return TRUE if this engine handles the particular character and break
+ * type.
+ */
virtual UBool handles( UChar32 c, int32_t breakType ) const;
- /**
- * <p>Find any breaks within a run in the supplied text.</p>
- *
- * @param text A UText representing the text. The
- * iterator is left at the end of the run of characters which the engine
- * is capable of handling.
- * @param startPos The start of the run within the supplied text.
- * @param endPos The end of the run within the supplied text.
- * @param reverse Whether the caller is looking for breaks in a reverse
- * direction.
- * @param breakType The type of break desired, or -1.
- * @param foundBreaks An allocated C array of the breaks found, if any
- * @return The number of breaks found.
- */
+ /**
+ * <p>Find any breaks within a run in the supplied text.</p>
+ *
+ * @param text A UText representing the text. The iterator is left at
+ * the end of the run of characters which the engine is capable of handling
+ * that starts from the first (or last) character in the range.
+ * @param startPos The start of the run within the supplied text.
+ * @param endPos The end of the run within the supplied text.
+ * @param reverse Whether the caller is looking for breaks in a reverse
+ * direction.
+ * @param breakType The type of break desired, or -1.
+ * @param foundBreaks An allocated C array of the breaks found, if any
+ * @return The number of breaks found.
+ */
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
@@ -114,7 +114,7 @@
// virtual void setBreakTypes( uint32_t breakTypes );
/**
- * <p>Divide up a range of known dictionary characters.</p>
+ * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
*
* @param text A UText representing the text
* @param rangeStart The start of the range of dictionary characters
@@ -171,7 +171,7 @@
protected:
/**
- * <p>Divide up a range of known dictionary characters.</p>
+ * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
*
* @param text A UText representing the text
* @param rangeStart The start of the range of dictionary characters
@@ -186,7 +186,67 @@
};
+/*******************************************************************
+ * CjkBreakEngine
+ */
+//indicates language/script that the CjkBreakEngine will handle
+enum LanguageType {
+ kKorean,
+ kChineseJapanese
+};
+
+/**
+ * <p>CjkBreakEngine is a kind of DictionaryBreakEngine that uses a
+ * TrieWordDictionary with costs associated with each word and
+ * Viterbi decoding to determine CJK-specific breaks.</p>
+ */
+class CjkBreakEngine : public DictionaryBreakEngine {
+ protected:
+ /**
+ * The set of characters handled by this engine
+ * @internal
+ */
+ UnicodeSet fHangulWordSet;
+ UnicodeSet fHanWordSet;
+ UnicodeSet fKatakanaWordSet;
+ UnicodeSet fHiraganaWordSet;
+
+ const TrieWordDictionary *fDictionary;
+
+ public:
+
+ /**
+ * <p>Default constructor.</p>
+ *
+ * @param adoptDictionary A TrieWordDictionary to adopt. Deleted when the
+ * engine is deleted. The TrieWordDictionary must contain costs for each word
+ * in order for the dictionary to work properly.
+ */
+ CjkBreakEngine(const TrieWordDictionary *adoptDictionary, LanguageType type, UErrorCode &status);
+
+ /**
+ * <p>Virtual destructor.</p>
+ */
+ virtual ~CjkBreakEngine();
+
+ protected:
+ /**
+ * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
+ *
+ * @param text A UText representing the text
+ * @param rangeStart The start of the range of dictionary characters
+ * @param rangeEnd The end of the range of dictionary characters
+ * @param foundBreaks Output of C array of int32_t break positions, or 0
+ * @return The number of breaks found
+ */
+ virtual int32_t divideUpDictionaryRange( UText *text,
+ int32_t rangeStart,
+ int32_t rangeEnd,
+ UStack &foundBreaks ) const;
+
+};
+
U_NAMESPACE_END
/* DICTBE_H */
« no previous file with comments | « icu46/source/common/brkeng.cpp ('k') | icu46/source/common/dictbe.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698