chrome/browser/spellcheck_worditerator.h - Issue 395007: Move Mac to using renderer spellchecker.

Side by Side Diff: chrome/browser/spellcheck_worditerator.h

Issue 395007: Move Mac to using renderer spellchecker. (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: ui test fix Created 11 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #ifndef CHROME_BROWSER_SPELLCHECK_WORDITERATOR_H_

6 #define CHROME_BROWSER_SPELLCHECK_WORDITERATOR_H_

7

8 #include <map>

9 #include <string>

10

11 #include "base/basictypes.h"

12 #include "base/string16.h"

13

14 #include "unicode/uscript.h"

15

16 // A class which handles character attributes dependent on a spellchecker and

17 // its dictionary.

18 // This class is used by the SpellcheckWordIterator class to determine whether

19 // or not a character is one used by the spellchecker and its dictinary.

20 class SpellcheckCharAttribute {

21 public:

22 SpellcheckCharAttribute();

23

24 ~SpellcheckCharAttribute();

25

26 // Sets the default language of the spell checker. This controls which

27 // characters are considered parts of words of the given language.

28 void SetDefaultLanguage(const std::string& language);

29

30 // Returns whether or not the given character is a character used by the

31 // selected dictionary.

32 // Parameters

33 // * character [in] (UChar32)

34 // Represents a Unicode character to be checked.

35 // Return values

36 // * true

37 // The given character is a word character.

38 // * false

39 // The given character is not a word character.

40 bool IsWordChar(UChar32 character) const;

41

42 // Returns whether or not the given character is a character used by

43 // contractions.

44 // Parameters

45 // * character [in] (UChar32)

46 // Represents a Unicode character to be checked.

47 // Return values

48 // * true

49 // The given character is a character used by contractions.

50 // * false

51 // The given character is not a character used by contractions.

52 bool IsContractionChar(UChar32 character) const;

53

54 private:

55 // Initializes the mapping table.

56 void InitializeScriptTable();

57

58 // Retrieves the ICU script code.

59 UScriptCode GetScriptCode(UChar32 character) const;

60

61 // Updates an entry in the mapping table.

62 void SetWordScript(const int script_code, bool in_use);

63

64 // Returns whether or not the given script is used by the selected

65 // dictionary.

66 bool IsWordScript(const UScriptCode script_code) const;

67

68 private:

69 // Represents a mapping table from a script code to a boolean value

70 // representing whether or not the script is used by the selected dictionary.

71 bool script_attributes_[USCRIPT_CODE_LIMIT];

72

73 // Represents a table of characters used by contractions.

74 std::map<UChar32, bool> middle_letters_;

75

76 DISALLOW_COPY_AND_ASSIGN(SpellcheckCharAttribute);

77 };

78

79 // A class which implements methods for finding the location of word boundaries

80 // used by the Spellchecker class.

81 // This class is implemented on the following assumptions:

82 // * An input string is encoded in UTF-16 (i.e. it may contain surrogate

83 // pairs), and;

84 // * The length of a string is the number of UTF-16 characters in the string

85 // (i.e. the length of a non-BMP character becomes two).

86 class SpellcheckWordIterator {

87 public:

88 SpellcheckWordIterator();

89

90 ~SpellcheckWordIterator();

91

92 // Initializes a word-iterator object.

93 // Parameters

94 // * attribute [in] (const SpellcheckCharAttribute*)

95 // Represents a set of character attributes used for filtering out

96 // non-word characters.

97 // * word [in] (const char16*)

98 // Represents a string from which this object extracts words.

99 // (This string does not have to be NUL-terminated.)

100 // * length [in] (size_t)

101 // Represents the length of the given string, in UTF-16 characters.

102 // This value should not include terminating NUL characters.

103 // * allow_contraction [in] (bool)

104 // Represents a flag to control whether or not this object should split a

105 // possible contraction (e.g. "isn't", "in'n'out", etc.)

106 // Return values

107 // * true

108 // This word-iterator object is initialized successfully.

109 // * false

110 // An error occured while initializing this object.

111 void Initialize(const SpellcheckCharAttribute* attribute,

112 const char16* word,

113 size_t length,

114 bool allow_contraction);

115

116 // Retrieves a word (or a contraction).

117 // Parameters

118 // * word_string [out] (string16*)

119 // Represents a word (or a contraction) to be checked its spelling.

120 // This \|word_string\| has been already normalized to its canonical form

121 // (i.e. decomposed ligatures, replaced full-width latin characters to

122 // its ASCII alternatives, etc.) so that a SpellChecker object can check

123 // its spelling without any additional operations.

124 // On the other hand, a substring of the input string

125 // string16 str(&word[word_start], word_length);

126 // represents the non-normalized version of this extracted word.

127 // * word_start [out] (int*)

128 // Represents the offset of this word from the beginning of the input

129 // string, in UTF-16 characters.

130 // * word_length [out] (int*)

131 // Represents the length of an extracted word before normalization, in

132 // UTF-16 characters.

133 // When the input string contains ligatures, this value may not be equal

134 // to the length of the \|word_string\|.

135 // Return values

136 // * true

137 // Found a word (or a contraction) to be checked its spelling.

138 // * false

139 // Not found any more words or contractions to be checked their spellings.

140 bool GetNextWord(string16* word_string,

141 int* word_start,

142 int* word_length);

143

144 private:

145 // Retrieves a segment consisting of word characters (and contraction

146 // characters if the \|allow_contraction\| value is true).

147 void GetSegment(int* segment_start,

148 int* segment_end);

149

150 // Discards non-word characters at the beginning and the end of the given

151 // segment.

152 void TrimSegment(int segment_start,

153 int segment_end,

154 int* word_start,

155 int* word_length) const;

156

157 // Normalizes the given segment of the \|word_\| variable and write its

158 // canonical form to the \|output_string\|.

159 bool Normalize(int input_start,

160 int input_length,

161 string16* output_string) const;

162

163 private:

164 // The pointer to the input string from which we are extracting words.

165 const char16* word_;

166

167 // The length of the original string.

168 int length_;

169

170 // The current position in the original string.

171 int position_;

172

173 // The flag to control whether or not this object should extract possible

174 // contractions.

175 bool allow_contraction_;

176

177 // The character attributes used for filtering out non-word characters.

178 const SpellcheckCharAttribute* attribute_;

179

180 DISALLOW_COPY_AND_ASSIGN(SpellcheckWordIterator);

181 };

182

183 #endif // CHROME_BROWSER_SPELLCHECK_WORDITERATOR_H_

OLD	NEW

« no previous file with comments | « chrome/browser/spellcheck_unittest.cc ('k') | chrome/browser/spellcheck_worditerator.cc » ('j') | no next file with comments »