Index: chrome/browser/spellcheck_worditerator.h |
=================================================================== |
--- chrome/browser/spellcheck_worditerator.h (revision 32394) |
+++ chrome/browser/spellcheck_worditerator.h (working copy) |
@@ -1,183 +0,0 @@ |
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
-// Use of this source code is governed by a BSD-style license that can be |
-// found in the LICENSE file. |
- |
-#ifndef CHROME_BROWSER_SPELLCHECK_WORDITERATOR_H_ |
-#define CHROME_BROWSER_SPELLCHECK_WORDITERATOR_H_ |
- |
-#include <map> |
-#include <string> |
- |
-#include "base/basictypes.h" |
-#include "base/string16.h" |
- |
-#include "unicode/uscript.h" |
- |
-// A class which handles character attributes dependent on a spellchecker and |
-// its dictionary. |
-// This class is used by the SpellcheckWordIterator class to determine whether |
-// or not a character is one used by the spellchecker and its dictinary. |
-class SpellcheckCharAttribute { |
- public: |
- SpellcheckCharAttribute(); |
- |
- ~SpellcheckCharAttribute(); |
- |
- // Sets the default language of the spell checker. This controls which |
- // characters are considered parts of words of the given language. |
- void SetDefaultLanguage(const std::string& language); |
- |
- // Returns whether or not the given character is a character used by the |
- // selected dictionary. |
- // Parameters |
- // * character [in] (UChar32) |
- // Represents a Unicode character to be checked. |
- // Return values |
- // * true |
- // The given character is a word character. |
- // * false |
- // The given character is not a word character. |
- bool IsWordChar(UChar32 character) const; |
- |
- // Returns whether or not the given character is a character used by |
- // contractions. |
- // Parameters |
- // * character [in] (UChar32) |
- // Represents a Unicode character to be checked. |
- // Return values |
- // * true |
- // The given character is a character used by contractions. |
- // * false |
- // The given character is not a character used by contractions. |
- bool IsContractionChar(UChar32 character) const; |
- |
- private: |
- // Initializes the mapping table. |
- void InitializeScriptTable(); |
- |
- // Retrieves the ICU script code. |
- UScriptCode GetScriptCode(UChar32 character) const; |
- |
- // Updates an entry in the mapping table. |
- void SetWordScript(const int script_code, bool in_use); |
- |
- // Returns whether or not the given script is used by the selected |
- // dictionary. |
- bool IsWordScript(const UScriptCode script_code) const; |
- |
- private: |
- // Represents a mapping table from a script code to a boolean value |
- // representing whether or not the script is used by the selected dictionary. |
- bool script_attributes_[USCRIPT_CODE_LIMIT]; |
- |
- // Represents a table of characters used by contractions. |
- std::map<UChar32, bool> middle_letters_; |
- |
- DISALLOW_COPY_AND_ASSIGN(SpellcheckCharAttribute); |
-}; |
- |
-// A class which implements methods for finding the location of word boundaries |
-// used by the Spellchecker class. |
-// This class is implemented on the following assumptions: |
-// * An input string is encoded in UTF-16 (i.e. it may contain surrogate |
-// pairs), and; |
-// * The length of a string is the number of UTF-16 characters in the string |
-// (i.e. the length of a non-BMP character becomes two). |
-class SpellcheckWordIterator { |
- public: |
- SpellcheckWordIterator(); |
- |
- ~SpellcheckWordIterator(); |
- |
- // Initializes a word-iterator object. |
- // Parameters |
- // * attribute [in] (const SpellcheckCharAttribute*) |
- // Represents a set of character attributes used for filtering out |
- // non-word characters. |
- // * word [in] (const char16*) |
- // Represents a string from which this object extracts words. |
- // (This string does not have to be NUL-terminated.) |
- // * length [in] (size_t) |
- // Represents the length of the given string, in UTF-16 characters. |
- // This value should not include terminating NUL characters. |
- // * allow_contraction [in] (bool) |
- // Represents a flag to control whether or not this object should split a |
- // possible contraction (e.g. "isn't", "in'n'out", etc.) |
- // Return values |
- // * true |
- // This word-iterator object is initialized successfully. |
- // * false |
- // An error occured while initializing this object. |
- void Initialize(const SpellcheckCharAttribute* attribute, |
- const char16* word, |
- size_t length, |
- bool allow_contraction); |
- |
- // Retrieves a word (or a contraction). |
- // Parameters |
- // * word_string [out] (string16*) |
- // Represents a word (or a contraction) to be checked its spelling. |
- // This |word_string| has been already normalized to its canonical form |
- // (i.e. decomposed ligatures, replaced full-width latin characters to |
- // its ASCII alternatives, etc.) so that a SpellChecker object can check |
- // its spelling without any additional operations. |
- // On the other hand, a substring of the input string |
- // string16 str(&word[word_start], word_length); |
- // represents the non-normalized version of this extracted word. |
- // * word_start [out] (int*) |
- // Represents the offset of this word from the beginning of the input |
- // string, in UTF-16 characters. |
- // * word_length [out] (int*) |
- // Represents the length of an extracted word before normalization, in |
- // UTF-16 characters. |
- // When the input string contains ligatures, this value may not be equal |
- // to the length of the |word_string|. |
- // Return values |
- // * true |
- // Found a word (or a contraction) to be checked its spelling. |
- // * false |
- // Not found any more words or contractions to be checked their spellings. |
- bool GetNextWord(string16* word_string, |
- int* word_start, |
- int* word_length); |
- |
- private: |
- // Retrieves a segment consisting of word characters (and contraction |
- // characters if the |allow_contraction| value is true). |
- void GetSegment(int* segment_start, |
- int* segment_end); |
- |
- // Discards non-word characters at the beginning and the end of the given |
- // segment. |
- void TrimSegment(int segment_start, |
- int segment_end, |
- int* word_start, |
- int* word_length) const; |
- |
- // Normalizes the given segment of the |word_| variable and write its |
- // canonical form to the |output_string|. |
- bool Normalize(int input_start, |
- int input_length, |
- string16* output_string) const; |
- |
- private: |
- // The pointer to the input string from which we are extracting words. |
- const char16* word_; |
- |
- // The length of the original string. |
- int length_; |
- |
- // The current position in the original string. |
- int position_; |
- |
- // The flag to control whether or not this object should extract possible |
- // contractions. |
- bool allow_contraction_; |
- |
- // The character attributes used for filtering out non-word characters. |
- const SpellcheckCharAttribute* attribute_; |
- |
- DISALLOW_COPY_AND_ASSIGN(SpellcheckWordIterator); |
-}; |
- |
-#endif // CHROME_BROWSER_SPELLCHECK_WORDITERATOR_H_ |