| Index: chrome/browser/spellcheck_worditerator.h
|
| ===================================================================
|
| --- chrome/browser/spellcheck_worditerator.h (revision 32394)
|
| +++ chrome/browser/spellcheck_worditerator.h (working copy)
|
| @@ -1,183 +0,0 @@
|
| -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
|
| -// Use of this source code is governed by a BSD-style license that can be
|
| -// found in the LICENSE file.
|
| -
|
| -#ifndef CHROME_BROWSER_SPELLCHECK_WORDITERATOR_H_
|
| -#define CHROME_BROWSER_SPELLCHECK_WORDITERATOR_H_
|
| -
|
| -#include <map>
|
| -#include <string>
|
| -
|
| -#include "base/basictypes.h"
|
| -#include "base/string16.h"
|
| -
|
| -#include "unicode/uscript.h"
|
| -
|
| -// A class which handles character attributes dependent on a spellchecker and
|
| -// its dictionary.
|
| -// This class is used by the SpellcheckWordIterator class to determine whether
|
| -// or not a character is one used by the spellchecker and its dictinary.
|
| -class SpellcheckCharAttribute {
|
| - public:
|
| - SpellcheckCharAttribute();
|
| -
|
| - ~SpellcheckCharAttribute();
|
| -
|
| - // Sets the default language of the spell checker. This controls which
|
| - // characters are considered parts of words of the given language.
|
| - void SetDefaultLanguage(const std::string& language);
|
| -
|
| - // Returns whether or not the given character is a character used by the
|
| - // selected dictionary.
|
| - // Parameters
|
| - // * character [in] (UChar32)
|
| - // Represents a Unicode character to be checked.
|
| - // Return values
|
| - // * true
|
| - // The given character is a word character.
|
| - // * false
|
| - // The given character is not a word character.
|
| - bool IsWordChar(UChar32 character) const;
|
| -
|
| - // Returns whether or not the given character is a character used by
|
| - // contractions.
|
| - // Parameters
|
| - // * character [in] (UChar32)
|
| - // Represents a Unicode character to be checked.
|
| - // Return values
|
| - // * true
|
| - // The given character is a character used by contractions.
|
| - // * false
|
| - // The given character is not a character used by contractions.
|
| - bool IsContractionChar(UChar32 character) const;
|
| -
|
| - private:
|
| - // Initializes the mapping table.
|
| - void InitializeScriptTable();
|
| -
|
| - // Retrieves the ICU script code.
|
| - UScriptCode GetScriptCode(UChar32 character) const;
|
| -
|
| - // Updates an entry in the mapping table.
|
| - void SetWordScript(const int script_code, bool in_use);
|
| -
|
| - // Returns whether or not the given script is used by the selected
|
| - // dictionary.
|
| - bool IsWordScript(const UScriptCode script_code) const;
|
| -
|
| - private:
|
| - // Represents a mapping table from a script code to a boolean value
|
| - // representing whether or not the script is used by the selected dictionary.
|
| - bool script_attributes_[USCRIPT_CODE_LIMIT];
|
| -
|
| - // Represents a table of characters used by contractions.
|
| - std::map<UChar32, bool> middle_letters_;
|
| -
|
| - DISALLOW_COPY_AND_ASSIGN(SpellcheckCharAttribute);
|
| -};
|
| -
|
| -// A class which implements methods for finding the location of word boundaries
|
| -// used by the Spellchecker class.
|
| -// This class is implemented on the following assumptions:
|
| -// * An input string is encoded in UTF-16 (i.e. it may contain surrogate
|
| -// pairs), and;
|
| -// * The length of a string is the number of UTF-16 characters in the string
|
| -// (i.e. the length of a non-BMP character becomes two).
|
| -class SpellcheckWordIterator {
|
| - public:
|
| - SpellcheckWordIterator();
|
| -
|
| - ~SpellcheckWordIterator();
|
| -
|
| - // Initializes a word-iterator object.
|
| - // Parameters
|
| - // * attribute [in] (const SpellcheckCharAttribute*)
|
| - // Represents a set of character attributes used for filtering out
|
| - // non-word characters.
|
| - // * word [in] (const char16*)
|
| - // Represents a string from which this object extracts words.
|
| - // (This string does not have to be NUL-terminated.)
|
| - // * length [in] (size_t)
|
| - // Represents the length of the given string, in UTF-16 characters.
|
| - // This value should not include terminating NUL characters.
|
| - // * allow_contraction [in] (bool)
|
| - // Represents a flag to control whether or not this object should split a
|
| - // possible contraction (e.g. "isn't", "in'n'out", etc.)
|
| - // Return values
|
| - // * true
|
| - // This word-iterator object is initialized successfully.
|
| - // * false
|
| - // An error occured while initializing this object.
|
| - void Initialize(const SpellcheckCharAttribute* attribute,
|
| - const char16* word,
|
| - size_t length,
|
| - bool allow_contraction);
|
| -
|
| - // Retrieves a word (or a contraction).
|
| - // Parameters
|
| - // * word_string [out] (string16*)
|
| - // Represents a word (or a contraction) to be checked its spelling.
|
| - // This |word_string| has been already normalized to its canonical form
|
| - // (i.e. decomposed ligatures, replaced full-width latin characters to
|
| - // its ASCII alternatives, etc.) so that a SpellChecker object can check
|
| - // its spelling without any additional operations.
|
| - // On the other hand, a substring of the input string
|
| - // string16 str(&word[word_start], word_length);
|
| - // represents the non-normalized version of this extracted word.
|
| - // * word_start [out] (int*)
|
| - // Represents the offset of this word from the beginning of the input
|
| - // string, in UTF-16 characters.
|
| - // * word_length [out] (int*)
|
| - // Represents the length of an extracted word before normalization, in
|
| - // UTF-16 characters.
|
| - // When the input string contains ligatures, this value may not be equal
|
| - // to the length of the |word_string|.
|
| - // Return values
|
| - // * true
|
| - // Found a word (or a contraction) to be checked its spelling.
|
| - // * false
|
| - // Not found any more words or contractions to be checked their spellings.
|
| - bool GetNextWord(string16* word_string,
|
| - int* word_start,
|
| - int* word_length);
|
| -
|
| - private:
|
| - // Retrieves a segment consisting of word characters (and contraction
|
| - // characters if the |allow_contraction| value is true).
|
| - void GetSegment(int* segment_start,
|
| - int* segment_end);
|
| -
|
| - // Discards non-word characters at the beginning and the end of the given
|
| - // segment.
|
| - void TrimSegment(int segment_start,
|
| - int segment_end,
|
| - int* word_start,
|
| - int* word_length) const;
|
| -
|
| - // Normalizes the given segment of the |word_| variable and write its
|
| - // canonical form to the |output_string|.
|
| - bool Normalize(int input_start,
|
| - int input_length,
|
| - string16* output_string) const;
|
| -
|
| - private:
|
| - // The pointer to the input string from which we are extracting words.
|
| - const char16* word_;
|
| -
|
| - // The length of the original string.
|
| - int length_;
|
| -
|
| - // The current position in the original string.
|
| - int position_;
|
| -
|
| - // The flag to control whether or not this object should extract possible
|
| - // contractions.
|
| - bool allow_contraction_;
|
| -
|
| - // The character attributes used for filtering out non-word characters.
|
| - const SpellcheckCharAttribute* attribute_;
|
| -
|
| - DISALLOW_COPY_AND_ASSIGN(SpellcheckWordIterator);
|
| -};
|
| -
|
| -#endif // CHROME_BROWSER_SPELLCHECK_WORDITERATOR_H_
|
|
|