Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(219)

Side by Side Diff: third_party/hunspell/src/parsers/textparser.hxx

Issue 2544793003: [spellcheck] Updated Hunspell to 1.5.4 (Closed)
Patch Set: Test Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * parser classes for MySpell 2 * parser classes for MySpell
3 * 3 *
4 * implemented: text, HTML, TeX 4 * implemented: text, HTML, TeX
5 * 5 *
6 * Copyright (C) 2002, Laszlo Nemeth 6 * Copyright (C) 2002, Laszlo Nemeth
7 * 7 *
8 */ 8 */
9 /* ***** BEGIN LICENSE BLOCK *****
10 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
11 *
12 * The contents of this file are subject to the Mozilla Public License Version
13 * 1.1 (the "License"); you may not use this file except in compliance with
14 * the License. You may obtain a copy of the License at
15 * http://www.mozilla.org/MPL/
16 *
17 * Software distributed under the License is distributed on an "AS IS" basis,
18 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
19 * for the specific language governing rights and limitations under the
20 * License.
21 *
22 * The Original Code is Hunspell, based on MySpell.
23 *
24 * The Initial Developers of the Original Code are
25 * Kevin Hendricks (MySpell) and Németh László (Hunspell).
26 * Portions created by the Initial Developers are Copyright (C) 2002-2005
27 * the Initial Developers. All Rights Reserved.
28 *
29 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
30 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
31 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
32 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
33 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
34 *
35 * Alternatively, the contents of this file may be used under the terms of
36 * either the GNU General Public License Version 2 or later (the "GPL"), or
37 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
38 * in which case the provisions of the GPL or the LGPL are applicable instead
39 * of those above. If you wish to allow use of your version of this file only
40 * under the terms of either the GPL or the LGPL, and not to allow others to
41 * use your version of this file under the terms of the MPL, indicate your
42 * decision by deleting the provisions above and replace them with the notice
43 * and other provisions required by the GPL or the LGPL. If you do not delete
44 * the provisions above, a recipient may use your version of this file under
45 * the terms of any one of the MPL, the GPL or the LGPL.
46 *
47 * ***** END LICENSE BLOCK ***** */
9 48
10 #ifndef _TEXTPARSER_HXX_ 49 #ifndef TEXTPARSER_HXX_
11 #define _TEXTPARSER_HXX_ 50 #define TEXTPARSER_HXX_
12 51
13 // set sum of actual and previous lines 52 // set sum of actual and previous lines
14 #define MAXPREVLINE 4 53 #define MAXPREVLINE 4
15 54
16 #ifndef MAXLNLEN 55 #ifndef MAXLNLEN
17 #define MAXLNLEN 8192 56 #define MAXLNLEN 8192
18 #endif 57 #endif
19 58
59 #include "../hunspell/w_char.hxx"
60
61 #include <vector>
62
20 /* 63 /*
21 * Base Text Parser 64 * Base Text Parser
22 * 65 *
23 */ 66 */
24 67
25 class TextParser 68 class TextParser {
26 { 69 protected:
70 int wordcharacters[256]; // for detection of the word boundaries
71 std::string line[MAXPREVLINE]; // parsed and previous lines
72 std::vector<bool> urlline; // mask for url detection
73 int checkurl;
74 int actual; // actual line
75 size_t head; // head position
76 size_t token;// begin of token
77 int state; // state of automata
78 int utf8; // UTF-8 character encoding
79 int next_char(const char* line, size_t* pos);
80 const w_char* wordchars_utf16;
81 int wclen;
27 82
28 protected: 83 public:
29 void init(const char *); 84 TextParser(const w_char* wordchars, int len);
30 void init(unsigned short * wordchars, int len); 85 explicit TextParser(const char* wc);
31 int wordcharacters[256]; // for detection of the word boundari es
32 char line[MAXPREVLINE][MAXLNLEN]; // parsed and previous lines
33 char urlline[MAXLNLEN]; // mask for url detection
34 int checkurl;
35 int actual; // actual line
36 int head; // head position
37 int token; // begin of token
38 int state; // state of automata
39 int utf8; // UTF-8 character encoding
40 int next_char(char * line, int * pos);
41 unsigned short * wordchars_utf16;
42 int wclen;
43
44 public:
45
46 TextParser();
47 TextParser(unsigned short * wordchars, int len);
48 TextParser(const char * wc);
49 virtual ~TextParser(); 86 virtual ~TextParser();
50 87
51 void put_line(char * line); 88 void put_line(const char* line);
52 char * get_line(); 89 std::string get_line() const;
53 char * get_prevline(int n); 90 std::string get_prevline(int n) const;
54 virtual char * next_token(); 91 virtual bool next_token(std::string&);
55 int change_token(const char * word); 92 virtual int change_token(const char* word);
56 void set_url_checking(int check); 93 void set_url_checking(int check);
57 94
58 int get_tokenpos(); 95 size_t get_tokenpos();
59 int is_wordchar(char * w); 96 int is_wordchar(const char* w);
60 const char * get_latin1(char * s); 97 inline int is_utf8() { return utf8; }
61 char * next_char(); 98 const char* get_latin1(const char* s);
62 int tokenize_urls(); 99 char* next_char();
63 void check_urls(); 100 int tokenize_urls();
64 int get_url(int token_pos, int * head); 101 void check_urls();
65 char * alloc_token(int token, int * head); 102 int get_url(size_t token_pos, size_t* head);
103 bool alloc_token(size_t token, size_t* head, std::string& out);
104 private:
105 void init(const char*);
106 void init(const w_char* wordchars, int len);
66 }; 107 };
67 108
68 #endif 109 #endif
69
OLDNEW
« no previous file with comments | « third_party/hunspell/src/parsers/testparser.cxx ('k') | third_party/hunspell/src/parsers/textparser.cxx » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698