Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(348)

Side by Side Diff: chrome/browser/spellcheck_worditerator.h

Issue 395007: Move Mac to using renderer spellchecker. (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: ui test fix Created 11 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « chrome/browser/spellcheck_unittest.cc ('k') | chrome/browser/spellcheck_worditerator.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef CHROME_BROWSER_SPELLCHECK_WORDITERATOR_H_
6 #define CHROME_BROWSER_SPELLCHECK_WORDITERATOR_H_
7
8 #include <map>
9 #include <string>
10
11 #include "base/basictypes.h"
12 #include "base/string16.h"
13
14 #include "unicode/uscript.h"
15
16 // A class which handles character attributes dependent on a spellchecker and
17 // its dictionary.
18 // This class is used by the SpellcheckWordIterator class to determine whether
19 // or not a character is one used by the spellchecker and its dictinary.
20 class SpellcheckCharAttribute {
21 public:
22 SpellcheckCharAttribute();
23
24 ~SpellcheckCharAttribute();
25
26 // Sets the default language of the spell checker. This controls which
27 // characters are considered parts of words of the given language.
28 void SetDefaultLanguage(const std::string& language);
29
30 // Returns whether or not the given character is a character used by the
31 // selected dictionary.
32 // Parameters
33 // * character [in] (UChar32)
34 // Represents a Unicode character to be checked.
35 // Return values
36 // * true
37 // The given character is a word character.
38 // * false
39 // The given character is not a word character.
40 bool IsWordChar(UChar32 character) const;
41
42 // Returns whether or not the given character is a character used by
43 // contractions.
44 // Parameters
45 // * character [in] (UChar32)
46 // Represents a Unicode character to be checked.
47 // Return values
48 // * true
49 // The given character is a character used by contractions.
50 // * false
51 // The given character is not a character used by contractions.
52 bool IsContractionChar(UChar32 character) const;
53
54 private:
55 // Initializes the mapping table.
56 void InitializeScriptTable();
57
58 // Retrieves the ICU script code.
59 UScriptCode GetScriptCode(UChar32 character) const;
60
61 // Updates an entry in the mapping table.
62 void SetWordScript(const int script_code, bool in_use);
63
64 // Returns whether or not the given script is used by the selected
65 // dictionary.
66 bool IsWordScript(const UScriptCode script_code) const;
67
68 private:
69 // Represents a mapping table from a script code to a boolean value
70 // representing whether or not the script is used by the selected dictionary.
71 bool script_attributes_[USCRIPT_CODE_LIMIT];
72
73 // Represents a table of characters used by contractions.
74 std::map<UChar32, bool> middle_letters_;
75
76 DISALLOW_COPY_AND_ASSIGN(SpellcheckCharAttribute);
77 };
78
79 // A class which implements methods for finding the location of word boundaries
80 // used by the Spellchecker class.
81 // This class is implemented on the following assumptions:
82 // * An input string is encoded in UTF-16 (i.e. it may contain surrogate
83 // pairs), and;
84 // * The length of a string is the number of UTF-16 characters in the string
85 // (i.e. the length of a non-BMP character becomes two).
86 class SpellcheckWordIterator {
87 public:
88 SpellcheckWordIterator();
89
90 ~SpellcheckWordIterator();
91
92 // Initializes a word-iterator object.
93 // Parameters
94 // * attribute [in] (const SpellcheckCharAttribute*)
95 // Represents a set of character attributes used for filtering out
96 // non-word characters.
97 // * word [in] (const char16*)
98 // Represents a string from which this object extracts words.
99 // (This string does not have to be NUL-terminated.)
100 // * length [in] (size_t)
101 // Represents the length of the given string, in UTF-16 characters.
102 // This value should not include terminating NUL characters.
103 // * allow_contraction [in] (bool)
104 // Represents a flag to control whether or not this object should split a
105 // possible contraction (e.g. "isn't", "in'n'out", etc.)
106 // Return values
107 // * true
108 // This word-iterator object is initialized successfully.
109 // * false
110 // An error occured while initializing this object.
111 void Initialize(const SpellcheckCharAttribute* attribute,
112 const char16* word,
113 size_t length,
114 bool allow_contraction);
115
116 // Retrieves a word (or a contraction).
117 // Parameters
118 // * word_string [out] (string16*)
119 // Represents a word (or a contraction) to be checked its spelling.
120 // This |word_string| has been already normalized to its canonical form
121 // (i.e. decomposed ligatures, replaced full-width latin characters to
122 // its ASCII alternatives, etc.) so that a SpellChecker object can check
123 // its spelling without any additional operations.
124 // On the other hand, a substring of the input string
125 // string16 str(&word[word_start], word_length);
126 // represents the non-normalized version of this extracted word.
127 // * word_start [out] (int*)
128 // Represents the offset of this word from the beginning of the input
129 // string, in UTF-16 characters.
130 // * word_length [out] (int*)
131 // Represents the length of an extracted word before normalization, in
132 // UTF-16 characters.
133 // When the input string contains ligatures, this value may not be equal
134 // to the length of the |word_string|.
135 // Return values
136 // * true
137 // Found a word (or a contraction) to be checked its spelling.
138 // * false
139 // Not found any more words or contractions to be checked their spellings.
140 bool GetNextWord(string16* word_string,
141 int* word_start,
142 int* word_length);
143
144 private:
145 // Retrieves a segment consisting of word characters (and contraction
146 // characters if the |allow_contraction| value is true).
147 void GetSegment(int* segment_start,
148 int* segment_end);
149
150 // Discards non-word characters at the beginning and the end of the given
151 // segment.
152 void TrimSegment(int segment_start,
153 int segment_end,
154 int* word_start,
155 int* word_length) const;
156
157 // Normalizes the given segment of the |word_| variable and write its
158 // canonical form to the |output_string|.
159 bool Normalize(int input_start,
160 int input_length,
161 string16* output_string) const;
162
163 private:
164 // The pointer to the input string from which we are extracting words.
165 const char16* word_;
166
167 // The length of the original string.
168 int length_;
169
170 // The current position in the original string.
171 int position_;
172
173 // The flag to control whether or not this object should extract possible
174 // contractions.
175 bool allow_contraction_;
176
177 // The character attributes used for filtering out non-word characters.
178 const SpellcheckCharAttribute* attribute_;
179
180 DISALLOW_COPY_AND_ASSIGN(SpellcheckWordIterator);
181 };
182
183 #endif // CHROME_BROWSER_SPELLCHECK_WORDITERATOR_H_
OLDNEW
« no previous file with comments | « chrome/browser/spellcheck_unittest.cc ('k') | chrome/browser/spellcheck_worditerator.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698