Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(395)

Side by Side Diff: third_party/hunspell/src/parsers/latexparser.cxx

Issue 2544793003: [spellcheck] Updated Hunspell to 1.5.4 (Closed)
Patch Set: Test Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* ***** BEGIN LICENSE BLOCK *****
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3 *
4 * The contents of this file are subject to the Mozilla Public License Version
5 * 1.1 (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 * http://www.mozilla.org/MPL/
8 *
9 * Software distributed under the License is distributed on an "AS IS" basis,
10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11 * for the specific language governing rights and limitations under the
12 * License.
13 *
14 * The Original Code is Hunspell, based on MySpell.
15 *
16 * The Initial Developers of the Original Code are
17 * Kevin Hendricks (MySpell) and Németh László (Hunspell).
18 * Portions created by the Initial Developers are Copyright (C) 2002-2005
19 * the Initial Developers. All Rights Reserved.
20 *
21 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
22 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
23 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
24 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
25 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
26 *
27 * Alternatively, the contents of this file may be used under the terms of
28 * either the GNU General Public License Version 2 or later (the "GPL"), or
29 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
38 *
39 * ***** END LICENSE BLOCK ***** */
40
1 #include <cstdlib> 41 #include <cstdlib>
2 #include <cstring> 42 #include <cstring>
3 #include <cstdio> 43 #include <cstdio>
4 #include <ctype.h> 44 #include <ctype.h>
5 45
6 #include "../hunspell/csutil.hxx" 46 #include "../hunspell/csutil.hxx"
7 #include "latexparser.hxx" 47 #include "latexparser.hxx"
8 48
9 #ifndef W32 49 #ifndef W32
10 using namespace std; 50 using namespace std;
11 #endif 51 #endif
12 52
13 static struct { 53 static struct {
14 » const char * pat[2]; 54 const char* pat[2];
15 » int arg; 55 int arg;
16 } PATTERN[] = { 56 } PATTERN[] = {{{"\\(", "\\)"}, 0},
17 » { { "\\(", "\\)" } , 0 }, 57 {{"$$", "$$"}, 0},
18 » { { "$$", "$$" } , 0 }, 58 {{"$", "$"}, 0},
19 » { { "$", "$" } , 0 }, 59 {{"\\begin{math}", "\\end{math}"}, 0},
20 » { { "\\begin{math}", "\\end{math}" } , 0 }, 60 {{"\\[", "\\]"}, 0},
21 » { { "\\[", "\\]" } , 0 }, 61 {{"\\begin{displaymath}", "\\end{displaymath}"}, 0},
22 » { { "\\begin{displaymath}", "\\end{displaymath}" } , 0 }, 62 {{"\\begin{equation}", "\\end{equation}"}, 0},
23 » { { "\\begin{equation}", "\\end{equation}" } , 0 }, 63 {{"\\begin{equation*}", "\\end{equation*}"}, 0},
24 » { { "\\begin{equation*}", "\\end{equation*}" } , 0 }, 64 {{"\\cite", NULL}, 1},
25 » { { "\\cite", NULL } , 1 }, 65 {{"\\nocite", NULL}, 1},
26 » { { "\\nocite", NULL } , 1 }, 66 {{"\\index", NULL}, 1},
27 » { { "\\index", NULL } , 1 }, 67 {{"\\label", NULL}, 1},
28 » { { "\\label", NULL } , 1 }, 68 {{"\\ref", NULL}, 1},
29 » { { "\\ref", NULL } , 1 }, 69 {{"\\pageref", NULL}, 1},
30 » { { "\\pageref", NULL } , 1 }, 70 {{"\\autoref", NULL}, 1},
31 » { { "\\parbox", NULL } , 1 }, 71 {{"\\parbox", NULL}, 1},
32 » { { "\\begin{verbatim}", "\\end{verbatim}" } , 0 }, 72 {{"\\begin{verbatim}", "\\end{verbatim}"}, 0},
33 » { { "\\verb+", "+" } , 0 }, 73 {{"\\verb+", "+"}, 0},
34 » { { "\\verb|", "|" } , 0 }, 74 {{"\\verb|", "|"}, 0},
35 » { { "\\verb#", "#" } , 0 }, 75 {{"\\verb#", "#"}, 0},
36 » { { "\\verb*", "*" } , 0 }, 76 {{"\\verb*", "*"}, 0},
37 » { { "\\documentstyle", "\\begin{document}" } , 0 }, 77 {{"\\documentstyle", "\\begin{document}"}, 0},
38 » { { "\\documentclass", "\\begin{document}" } , 0 }, 78 {{"\\documentclass", "\\begin{document}"}, 0},
39 //» { { "\\documentclass", NULL } , 1 }, 79 //» { { "\\documentclass", NULL } , 1 },
40 » { { "\\usepackage", NULL } , 1 }, 80 {{"\\usepackage", NULL}, 1},
41 » { { "\\includeonly", NULL } , 1 }, 81 {{"\\includeonly", NULL}, 1},
42 » { { "\\include", NULL } , 1 }, 82 {{"\\include", NULL}, 1},
43 » { { "\\input", NULL } , 1 }, 83 {{"\\input", NULL}, 1},
44 » { { "\\vspace", NULL } , 1 }, 84 {{"\\vspace", NULL}, 1},
45 » { { "\\setlength", NULL } , 2 }, 85 {{"\\setlength", NULL}, 2},
46 » { { "\\addtolength", NULL } , 2 }, 86 {{"\\addtolength", NULL}, 2},
47 » { { "\\settowidth", NULL } , 2 }, 87 {{"\\settowidth", NULL}, 2},
48 » { { "\\rule", NULL } , 2 }, 88 {{"\\rule", NULL}, 2},
49 » { { "\\hspace", NULL } , 1 } , 89 {{"\\hspace", NULL}, 1},
50 » { { "\\vspace", NULL } , 1 } , 90 {{"\\vspace", NULL}, 1},
51 » { { "\\\\[", "]" } , 0 }, 91 {{"\\\\[", "]"}, 0},
52 » { { "\\pagebreak[", "]" } , 0 } , 92 {{"\\pagebreak[", "]"}, 0},
53 » { { "\\nopagebreak[", "]" } , 0 } , 93 {{"\\nopagebreak[", "]"}, 0},
54 » { { "\\enlargethispage", NULL } , 1 } , 94 {{"\\enlargethispage", NULL}, 1},
55 » { { "\\begin{tabular}", NULL } , 1 } , 95 {{"\\begin{tabular}", NULL}, 1},
56 » { { "\\addcontentsline", NULL } , 2 } , 96 {{"\\addcontentsline", NULL}, 2},
57 » { { "\\begin{thebibliography}", NULL } , 1 } , 97 {{"\\begin{thebibliography}", NULL}, 1},
58 » { { "\\bibliography", NULL } , 1 } , 98 {{"\\bibliography", NULL}, 1},
59 » { { "\\bibliographystyle", NULL } , 1 } , 99 {{"\\bibliographystyle", NULL}, 1},
60 » { { "\\bibitem", NULL } , 1 } , 100 {{"\\bibitem", NULL}, 1},
61 » { { "\\begin", NULL } , 1 } , 101 {{"\\begin", NULL}, 1},
62 » { { "\\end", NULL } , 1 } , 102 {{"\\end", NULL}, 1},
63 » { { "\\pagestyle", NULL } , 1 } , 103 {{"\\pagestyle", NULL}, 1},
64 » { { "\\pagenumbering", NULL } , 1 } , 104 {{"\\pagenumbering", NULL}, 1},
65 » { { "\\thispagestyle", NULL } , 1 } , 105 {{"\\thispagestyle", NULL}, 1},
66 » { { "\\newtheorem", NULL } , 2 }, 106 {{"\\newtheorem", NULL}, 2},
67 » { { "\\newcommand", NULL } , 2 }, 107 {{"\\newcommand", NULL}, 2},
68 » { { "\\renewcommand", NULL } , 2 }, 108 {{"\\renewcommand", NULL}, 2},
69 » { { "\\setcounter", NULL } , 2 }, 109 {{"\\setcounter", NULL}, 2},
70 » { { "\\addtocounter", NULL } , 1 }, 110 {{"\\addtocounter", NULL}, 1},
71 » { { "\\stepcounter", NULL } , 1 }, 111 {{"\\stepcounter", NULL}, 1},
72 » { { "\\selectlanguage", NULL } , 1 }, 112 {{"\\selectlanguage", NULL}, 1},
73 » { { "\\inputencoding", NULL } , 1 }, 113 {{"\\inputencoding", NULL}, 1},
74 » { { "\\hyphenation", NULL } , 1 }, 114 {{"\\hyphenation", NULL}, 1},
75 » { { "\\definecolor", NULL } , 3 }, 115 {{"\\definecolor", NULL}, 3},
76 » { { "\\color", NULL } , 1 }, 116 {{"\\color", NULL}, 1},
77 » { { "\\textcolor", NULL } , 1 }, 117 {{"\\textcolor", NULL}, 1},
78 » { { "\\pagecolor", NULL } , 1 }, 118 {{"\\pagecolor", NULL}, 1},
79 » { { "\\colorbox", NULL } , 2 }, 119 {{"\\colorbox", NULL}, 2},
80 » { { "\\fcolorbox", NULL } , 2 }, 120 {{"\\fcolorbox", NULL}, 2},
81 » { { "\\declaregraphicsextensions", NULL } , 1 }, 121 {{"\\declaregraphicsextensions", NULL}, 1},
82 » { { "\\psfig", NULL } , 1 }, 122 {{"\\psfig", NULL}, 1},
83 » { { "\\url", NULL } , 1 }, 123 {{"\\url", NULL}, 1},
84 » { { "\\eqref", NULL } , 1 }, 124 {{"\\eqref", NULL}, 1},
85 » { { "\\vskip", NULL } , 1 }, 125 {{"\\vskip", NULL}, 1},
86 » { { "\\vglue", NULL } , 1 }, 126 {{"\\vglue", NULL}, 1},
87 » { { "\'\'", NULL } , 1 } 127 {{"\'\'", NULL}, 1}};
88 };
89 128
90 #define PATTERN_LEN (sizeof(PATTERN) / sizeof(PATTERN[0])) 129 #define PATTERN_LEN (sizeof(PATTERN) / sizeof(PATTERN[0]))
91 130
92 LaTeXParser::LaTeXParser(const char * wordchars) 131 LaTeXParser::LaTeXParser(const char* wordchars)
93 { 132 : TextParser(wordchars)
94 » init(wordchars); 133 , pattern_num(0), depth(0), arg(0), opt(0) {
95 } 134 }
96 135
97 LaTeXParser::LaTeXParser(unsigned short * wordchars, int len) 136 LaTeXParser::LaTeXParser(const w_char* wordchars, int len)
98 { 137 : TextParser(wordchars, len)
99 » init(wordchars, len); 138 , pattern_num(0), depth(0), arg(0), opt(0) {
100 } 139 }
101 140
102 LaTeXParser::~LaTeXParser() 141 LaTeXParser::~LaTeXParser() {}
103 {
104 }
105 142
106 int LaTeXParser::look_pattern(int col) 143 int LaTeXParser::look_pattern(int col) {
107 { 144 for (unsigned int i = 0; i < PATTERN_LEN; i++) {
108 » for (unsigned int i = 0; i < PATTERN_LEN; i++) { 145 const char* j = line[actual].c_str() + head;
109 » » char * j = line[actual] + head; 146 const char* k = PATTERN[i].pat[col];
110 » » const char * k = PATTERN[i].pat[col]; 147 if (!k)
111 » » if (! k) continue; 148 continue;
112 » » while ((*k != '\0') && (tolower(*j) == *k)) { 149 while ((*k != '\0') && (tolower(*j) == *k)) {
113 » » » j++; 150 j++;
114 » » » k++; 151 k++;
115 » » } 152 }
116 » » if (*k == '\0') return i; 153 if (*k == '\0')
117 » } 154 return i;
118 » return -1; 155 }
156 return -1;
119 } 157 }
120 158
121 /* 159 /*
122 * LaTeXParser 160 * LaTeXParser
123 * 161 *
124 * state 0: not wordchar 162 * state 0: not wordchar
125 * state 1: wordchar 163 * state 1: wordchar
126 * state 2: comments 164 * state 2: comments
127 * state 3: commands 165 * state 3: commands
128 * state 4: commands with arguments 166 * state 4: commands with arguments
129 * state 5: % comment 167 * state 5: % comment
130 * 168 *
131 */ 169 */
132 170
171 bool LaTeXParser::next_token(std::string& t) {
172 t.clear();
173 int i;
174 int slash = 0;
175 int apostrophe;
176 for (;;) {
177 // fprintf(stderr,"depth: %d, state: %d, , arg: %d, token:
178 // %s\n",depth,state,arg,line[actual]+head);
133 179
134 char * LaTeXParser::next_token() 180 switch (state) {
135 { 181 case 0: // non word chars
136 » int i; 182 if ((pattern_num = look_pattern(0)) != -1) {
137 » int slash = 0; 183 if (PATTERN[pattern_num].pat[1]) {
138 » int apostrophe; 184 state = 2;
139 » for (;;) { 185 } else {
140 » » // fprintf(stderr,"depth: %d, state: %d, , arg: %d, token: %s\n" ,depth,state,arg,line[actual]+head); 186 state = 4;
141 » » 187 depth = 0;
142 » » switch (state) 188 arg = 0;
143 » » { 189 opt = 1;
144 » » case 0: // non word chars 190 }
145 » » » if ((pattern_num = look_pattern(0)) != -1) { 191 head += strlen(PATTERN[pattern_num].pat[0]) - 1;
146 » » » » if (PATTERN[pattern_num].pat[1]) { 192 } else if (line[actual][head] == '%') {
147 » » » » » state = 2; 193 state = 5;
148 » » » » } else { 194 } else if (is_wordchar(line[actual].c_str() + head)) {
149 » » » » » state = 4; 195 state = 1;
150 » » » » » depth = 0; 196 token = head;
151 » » » » » arg = 0; 197 } else if (line[actual][head] == '\\') {
152 » » » » » opt = 1; 198 if (line[actual][head + 1] == '\\' || // \\ (linebreak)
153 » » » » } 199 (line[actual][head + 1] == '$') || // \$ (dollar sign)
154 » » » » head += strlen(PATTERN[pattern_num].pat[0]) - 1; 200 (line[actual][head + 1] == '%')) { // \% (percent)
155 » » » } else if ((line[actual][head] == '%')) { 201 head++;
156 » » » » » state = 5; 202 break;
157 » » » } else if (is_wordchar(line[actual] + head)) { 203 }
158 » » » » state = 1; 204 state = 3;
159 » » » » token = head; 205 }
160 » » » } else if (line[actual][head] == '\\') { 206 break;
161 » » » » if (line[actual][head + 1] == '\\' || // \\ (li nebreak) 207 case 1: // wordchar
162 » » » » » (line[actual][head + 1] == '$') || // \$ (dollar sign) 208 apostrophe = 0;
163 » » » » » (line[actual][head + 1] == '%')) { // \% (percent) 209 if (!is_wordchar(line[actual].c_str() + head) ||
164 » » » » » head++; 210 (line[actual][head] == '\'' && line[actual][head + 1] == '\'' &&
165 » » » » » break; 211 ++apostrophe)) {
166 » » » » } 212 state = 0;
167 » » » » state = 3; 213 bool ok = alloc_token(token, &head, t);
168 » » » } else if (line[actual][head] == '%') { 214 if (apostrophe)
169 » » » » if ((head==0) || (line[actual][head - 1] != '\\' )) state = 5; 215 head += 2;
170 » » » } 216 if (ok)
171 » » » break; 217 return true;
172 » » case 1: // wordchar 218 }
173 » » » apostrophe = 0; 219 break;
174 » » » if (! is_wordchar(line[actual] + head) || 220 case 2: // comment, labels, etc
175 » » » (line[actual][head] == '\'' && line[actual][head+1] == '\'' && ++apostrophe)) { 221 if (((i = look_pattern(1)) != -1) &&
176 » » » » state = 0; 222 (strcmp(PATTERN[i].pat[1], PATTERN[pattern_num].pat[1]) == 0)) {
177 » » » » char * t = alloc_token(token, &head); 223 state = 0;
178 » » » » if (apostrophe) head += 2; 224 head += strlen(PATTERN[pattern_num].pat[1]) - 1;
179 » » » » if (t) return t; 225 }
180 » » » } 226 break;
181 » » » break; 227 case 3: // command
182 » » case 2: // comment, labels, etc 228 if ((tolower(line[actual][head]) < 'a') ||
183 » » » if (((i = look_pattern(1)) != -1) && 229 (tolower(line[actual][head]) > 'z')) {
184 » » » » (strcmp(PATTERN[i].pat[1],PATTERN[pattern_num].p at[1]) == 0)) { 230 state = 0;
185 » » » » » state = 0; 231 head--;
186 » » » » » head += strlen(PATTERN[pattern_num].pat[ 1]) - 1; 232 }
187 » » » } 233 break;
188 » » » break; 234 case 4: // command with arguments
189 » » case 3: // command 235 if (slash && (line[actual][head] != '\0')) {
190 » » » if ((tolower(line[actual][head]) < 'a') || (tolower(line [actual][head]) > 'z')) { 236 slash = 0;
191 » » » » state = 0; 237 head++;
192 » » » » head--; 238 break;
193 » » » } 239 } else if (line[actual][head] == '\\') {
194 » » » break; 240 slash = 1;
195 » » case 4: // command with arguments 241 } else if ((line[actual][head] == '{') ||
196 » » » if (slash && (line[actual][head] != '\0')) { 242 ((opt) && (line[actual][head] == '['))) {
197 » » » » slash = 0; 243 depth++;
198 » » » » head++; 244 opt = 0;
199 » » » » break; 245 } else if (line[actual][head] == '}') {
200 » » » } else if (line[actual][head]=='\\') { 246 depth--;
201 » » » » slash = 1; 247 if (depth == 0) {
202 » » » } else if ((line[actual][head] == '{') || 248 opt = 1;
203 » » » » ((opt) && (line[actual][head] == '['))) { 249 arg++;
204 » » » » » depth++; 250 }
205 » » » » » opt = 0; 251 if (((depth == 0) && (arg == PATTERN[pattern_num].arg)) ||
206 » » » } else if (line[actual][head] == '}') { 252 (depth < 0)) {
207 » » » » depth--; 253 state = 0; // XXX not handles the last optional arg.
208 » » » » if (depth == 0) { 254 }
209 » » » » » opt = 1; 255 } else if (line[actual][head] == ']')
210 » » » » » arg++; 256 depth--;
211 » » » » } 257 } // case
212 » » » » if (((depth == 0) && (arg == PATTERN[pattern_num ].arg)) || 258 if (next_char(line[actual].c_str(), &head)) {
213 » » » » » (depth < 0) ) { 259 if (state == 5)
214 » » » » » » state = 0; // XXX not handles th e last optional arg. 260 state = 0;
215 » » » » } 261 return false;
216 » » » } else if (line[actual][head] == ']') depth--; 262 }
217 » » } // case 263 }
218 if (next_char(line[actual], &head)) {
219 » » » if (state == 5) state = 0;
220 » » » return NULL;
221 » » }
222 » }
223 } 264 }
OLDNEW
« no previous file with comments | « third_party/hunspell/src/parsers/latexparser.hxx ('k') | third_party/hunspell/src/parsers/manparser.hxx » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698