Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(59)

Side by Side Diff: icu46/source/common/dictbe.h

Issue 6370014: CJK segmentation patch for ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/
Patch Set: Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « icu46/source/common/brkeng.cpp ('k') | icu46/source/common/dictbe.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /** 1 /**
2 ******************************************************************************* 2 ******************************************************************************* ***
3 * Copyright (C) 2006, International Business Machines Corporation and others. * 3 * Copyright (C) 2006-2010, International Business Machines Corporation and othe rs.
4 * All Rights Reserved. * 4 * All Rights Reserved.
5 ******************************************************************************* 5 ******************************************************************************* ***
6 */ 6 */
7 7
8 #ifndef DICTBE_H 8 #ifndef DICTBE_H
9 #define DICTBE_H 9 #define DICTBE_H
10 10
11 #include "unicode/utypes.h" 11 #include "unicode/utypes.h"
12 #include "unicode/uniset.h" 12 #include "unicode/uniset.h"
13 #include "unicode/utext.h" 13 #include "unicode/utext.h"
14 14
15 #include "brkeng.h" 15 #include "brkeng.h"
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
58 * 58 *
59 * @param breakTypes A bitmap of types handled by the engine. 59 * @param breakTypes A bitmap of types handled by the engine.
60 */ 60 */
61 DictionaryBreakEngine( uint32_t breakTypes ); 61 DictionaryBreakEngine( uint32_t breakTypes );
62 62
63 /** 63 /**
64 * <p>Virtual destructor.</p> 64 * <p>Virtual destructor.</p>
65 */ 65 */
66 virtual ~DictionaryBreakEngine(); 66 virtual ~DictionaryBreakEngine();
67 67
68 /** 68 /**
69 * <p>Indicate whether this engine handles a particular character for 69 * <p>Indicate whether this engine handles a particular character for
70 * a particular kind of break.</p> 70 * a particular kind of break.</p>
71 * 71 *
72 * @param c A character which begins a run that the engine might handle 72 * @param c A character which begins a run that the engine might handle
73 * @param breakType The type of text break which the caller wants to determine 73 * @param breakType The type of text break which the caller wants to determine
74 * @return TRUE if this engine handles the particular character and break 74 * @return TRUE if this engine handles the particular character and break
75 * type. 75 * type.
76 */ 76 */
77 virtual UBool handles( UChar32 c, int32_t breakType ) const; 77 virtual UBool handles( UChar32 c, int32_t breakType ) const;
78 78
79 /** 79 /**
80 * <p>Find any breaks within a run in the supplied text.</p> 80 * <p>Find any breaks within a run in the supplied text.</p>
81 * 81 *
82 * @param text A UText representing the text. The 82 * @param text A UText representing the text. The iterator is left at
83 * iterator is left at the end of the run of characters which the engine 83 * the end of the run of characters which the engine is capable of handling
84 * is capable of handling. 84 * that starts from the first (or last) character in the range.
85 * @param startPos The start of the run within the supplied text. 85 * @param startPos The start of the run within the supplied text.
86 * @param endPos The end of the run within the supplied text. 86 * @param endPos The end of the run within the supplied text.
87 * @param reverse Whether the caller is looking for breaks in a reverse 87 * @param reverse Whether the caller is looking for breaks in a reverse
88 * direction. 88 * direction.
89 * @param breakType The type of break desired, or -1. 89 * @param breakType The type of break desired, or -1.
90 * @param foundBreaks An allocated C array of the breaks found, if any 90 * @param foundBreaks An allocated C array of the breaks found, if any
91 * @return The number of breaks found. 91 * @return The number of breaks found.
92 */ 92 */
93 virtual int32_t findBreaks( UText *text, 93 virtual int32_t findBreaks( UText *text,
94 int32_t startPos, 94 int32_t startPos,
95 int32_t endPos, 95 int32_t endPos,
96 UBool reverse, 96 UBool reverse,
97 int32_t breakType, 97 int32_t breakType,
98 UStack &foundBreaks ) const; 98 UStack &foundBreaks ) const;
99 99
100 protected: 100 protected:
101 101
102 /** 102 /**
103 * <p>Set the character set handled by this engine.</p> 103 * <p>Set the character set handled by this engine.</p>
104 * 104 *
105 * @param set A UnicodeSet of the set of characters handled by the engine 105 * @param set A UnicodeSet of the set of characters handled by the engine
106 */ 106 */
107 virtual void setCharacters( const UnicodeSet &set ); 107 virtual void setCharacters( const UnicodeSet &set );
108 108
109 /** 109 /**
110 * <p>Set the break types handled by this engine.</p> 110 * <p>Set the break types handled by this engine.</p>
111 * 111 *
112 * @param breakTypes A bitmap of types handled by the engine. 112 * @param breakTypes A bitmap of types handled by the engine.
113 */ 113 */
114 // virtual void setBreakTypes( uint32_t breakTypes ); 114 // virtual void setBreakTypes( uint32_t breakTypes );
115 115
116 /** 116 /**
117 * <p>Divide up a range of known dictionary characters.</p> 117 * <p>Divide up a range of known dictionary characters handled by this break en gine.</p>
118 * 118 *
119 * @param text A UText representing the text 119 * @param text A UText representing the text
120 * @param rangeStart The start of the range of dictionary characters 120 * @param rangeStart The start of the range of dictionary characters
121 * @param rangeEnd The end of the range of dictionary characters 121 * @param rangeEnd The end of the range of dictionary characters
122 * @param foundBreaks Output of C array of int32_t break positions, or 0 122 * @param foundBreaks Output of C array of int32_t break positions, or 0
123 * @return The number of breaks found 123 * @return The number of breaks found
124 */ 124 */
125 virtual int32_t divideUpDictionaryRange( UText *text, 125 virtual int32_t divideUpDictionaryRange( UText *text,
126 int32_t rangeStart, 126 int32_t rangeStart,
127 int32_t rangeEnd, 127 int32_t rangeEnd,
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
164 */ 164 */
165 ThaiBreakEngine(const TrieWordDictionary *adoptDictionary, UErrorCode &status) ; 165 ThaiBreakEngine(const TrieWordDictionary *adoptDictionary, UErrorCode &status) ;
166 166
167 /** 167 /**
168 * <p>Virtual destructor.</p> 168 * <p>Virtual destructor.</p>
169 */ 169 */
170 virtual ~ThaiBreakEngine(); 170 virtual ~ThaiBreakEngine();
171 171
172 protected: 172 protected:
173 /** 173 /**
174 * <p>Divide up a range of known dictionary characters.</p> 174 * <p>Divide up a range of known dictionary characters handled by this break en gine.</p>
175 * 175 *
176 * @param text A UText representing the text 176 * @param text A UText representing the text
177 * @param rangeStart The start of the range of dictionary characters 177 * @param rangeStart The start of the range of dictionary characters
178 * @param rangeEnd The end of the range of dictionary characters 178 * @param rangeEnd The end of the range of dictionary characters
179 * @param foundBreaks Output of C array of int32_t break positions, or 0 179 * @param foundBreaks Output of C array of int32_t break positions, or 0
180 * @return The number of breaks found 180 * @return The number of breaks found
181 */ 181 */
182 virtual int32_t divideUpDictionaryRange( UText *text, 182 virtual int32_t divideUpDictionaryRange( UText *text,
183 int32_t rangeStart, 183 int32_t rangeStart,
184 int32_t rangeEnd, 184 int32_t rangeEnd,
185 UStack &foundBreaks ) const; 185 UStack &foundBreaks ) const;
186 186
187 }; 187 };
188 188
189 /*******************************************************************
190 * CjkBreakEngine
191 */
192
193 //indicates language/script that the CjkBreakEngine will handle
194 enum LanguageType {
195 kKorean,
196 kChineseJapanese
197 };
198
199 /**
200 * <p>CjkBreakEngine is a kind of DictionaryBreakEngine that uses a
201 * TrieWordDictionary with costs associated with each word and
202 * Viterbi decoding to determine CJK-specific breaks.</p>
203 */
204 class CjkBreakEngine : public DictionaryBreakEngine {
205 protected:
206 /**
207 * The set of characters handled by this engine
208 * @internal
209 */
210 UnicodeSet fHangulWordSet;
211 UnicodeSet fHanWordSet;
212 UnicodeSet fKatakanaWordSet;
213 UnicodeSet fHiraganaWordSet;
214
215 const TrieWordDictionary *fDictionary;
216
217 public:
218
219 /**
220 * <p>Default constructor.</p>
221 *
222 * @param adoptDictionary A TrieWordDictionary to adopt. Deleted when the
223 * engine is deleted. The TrieWordDictionary must contain costs for each wor d
224 * in order for the dictionary to work properly.
225 */
226 CjkBreakEngine(const TrieWordDictionary *adoptDictionary, LanguageType type, U ErrorCode &status);
227
228 /**
229 * <p>Virtual destructor.</p>
230 */
231 virtual ~CjkBreakEngine();
232
233 protected:
234 /**
235 * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
236 *
237 * @param text A UText representing the text
238 * @param rangeStart The start of the range of dictionary characters
239 * @param rangeEnd The end of the range of dictionary characters
240 * @param foundBreaks Output of C array of int32_t break positions, or 0
241 * @return The number of breaks found
242 */
243 virtual int32_t divideUpDictionaryRange( UText *text,
244 int32_t rangeStart,
245 int32_t rangeEnd,
246 UStack &foundBreaks ) const;
247
248 };
189 249
190 U_NAMESPACE_END 250 U_NAMESPACE_END
191 251
192 /* DICTBE_H */ 252 /* DICTBE_H */
193 #endif 253 #endif
OLDNEW
« no previous file with comments | « icu46/source/common/brkeng.cpp ('k') | icu46/source/common/dictbe.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698