| OLD | NEW |
| 1 /* ***** BEGIN LICENSE BLOCK ***** |
| 2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 |
| 3 * |
| 4 * The contents of this file are subject to the Mozilla Public License Version |
| 5 * 1.1 (the "License"); you may not use this file except in compliance with |
| 6 * the License. You may obtain a copy of the License at |
| 7 * http://www.mozilla.org/MPL/ |
| 8 * |
| 9 * Software distributed under the License is distributed on an "AS IS" basis, |
| 10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License |
| 11 * for the specific language governing rights and limitations under the |
| 12 * License. |
| 13 * |
| 14 * The Original Code is Hunspell, based on MySpell. |
| 15 * |
| 16 * The Initial Developers of the Original Code are |
| 17 * Kevin Hendricks (MySpell) and Németh László (Hunspell). |
| 18 * Portions created by the Initial Developers are Copyright (C) 2002-2005 |
| 19 * the Initial Developers. All Rights Reserved. |
| 20 * |
| 21 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, |
| 22 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, |
| 23 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, |
| 24 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, |
| 25 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen |
| 26 * |
| 27 * Alternatively, the contents of this file may be used under the terms of |
| 28 * either the GNU General Public License Version 2 or later (the "GPL"), or |
| 29 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), |
| 30 * in which case the provisions of the GPL or the LGPL are applicable instead |
| 31 * of those above. If you wish to allow use of your version of this file only |
| 32 * under the terms of either the GPL or the LGPL, and not to allow others to |
| 33 * use your version of this file under the terms of the MPL, indicate your |
| 34 * decision by deleting the provisions above and replace them with the notice |
| 35 * and other provisions required by the GPL or the LGPL. If you do not delete |
| 36 * the provisions above, a recipient may use your version of this file under |
| 37 * the terms of any one of the MPL, the GPL or the LGPL. |
| 38 * |
| 39 * ***** END LICENSE BLOCK ***** */ |
| 40 |
| 1 #include <cstdlib> | 41 #include <cstdlib> |
| 2 #include <cstring> | 42 #include <cstring> |
| 3 #include <cstdio> | 43 #include <cstdio> |
| 4 #include <ctype.h> | 44 #include <ctype.h> |
| 5 | 45 |
| 6 #include "../hunspell/csutil.hxx" | 46 #include "../hunspell/csutil.hxx" |
| 7 #include "latexparser.hxx" | 47 #include "latexparser.hxx" |
| 8 | 48 |
| 9 #ifndef W32 | 49 #ifndef W32 |
| 10 using namespace std; | 50 using namespace std; |
| 11 #endif | 51 #endif |
| 12 | 52 |
| 13 static struct { | 53 static struct { |
| 14 » const char * pat[2]; | 54 const char* pat[2]; |
| 15 » int arg; | 55 int arg; |
| 16 } PATTERN[] = { | 56 } PATTERN[] = {{{"\\(", "\\)"}, 0}, |
| 17 » { { "\\(", "\\)" } , 0 }, | 57 {{"$$", "$$"}, 0}, |
| 18 » { { "$$", "$$" } , 0 }, | 58 {{"$", "$"}, 0}, |
| 19 » { { "$", "$" } , 0 }, | 59 {{"\\begin{math}", "\\end{math}"}, 0}, |
| 20 » { { "\\begin{math}", "\\end{math}" } , 0 }, | 60 {{"\\[", "\\]"}, 0}, |
| 21 » { { "\\[", "\\]" } , 0 }, | 61 {{"\\begin{displaymath}", "\\end{displaymath}"}, 0}, |
| 22 » { { "\\begin{displaymath}", "\\end{displaymath}" } , 0 }, | 62 {{"\\begin{equation}", "\\end{equation}"}, 0}, |
| 23 » { { "\\begin{equation}", "\\end{equation}" } , 0 }, | 63 {{"\\begin{equation*}", "\\end{equation*}"}, 0}, |
| 24 » { { "\\begin{equation*}", "\\end{equation*}" } , 0 }, | 64 {{"\\cite", NULL}, 1}, |
| 25 » { { "\\cite", NULL } , 1 }, | 65 {{"\\nocite", NULL}, 1}, |
| 26 » { { "\\nocite", NULL } , 1 }, | 66 {{"\\index", NULL}, 1}, |
| 27 » { { "\\index", NULL } , 1 }, | 67 {{"\\label", NULL}, 1}, |
| 28 » { { "\\label", NULL } , 1 }, | 68 {{"\\ref", NULL}, 1}, |
| 29 » { { "\\ref", NULL } , 1 }, | 69 {{"\\pageref", NULL}, 1}, |
| 30 » { { "\\pageref", NULL } , 1 }, | 70 {{"\\autoref", NULL}, 1}, |
| 31 » { { "\\parbox", NULL } , 1 }, | 71 {{"\\parbox", NULL}, 1}, |
| 32 » { { "\\begin{verbatim}", "\\end{verbatim}" } , 0 }, | 72 {{"\\begin{verbatim}", "\\end{verbatim}"}, 0}, |
| 33 » { { "\\verb+", "+" } , 0 }, | 73 {{"\\verb+", "+"}, 0}, |
| 34 » { { "\\verb|", "|" } , 0 }, | 74 {{"\\verb|", "|"}, 0}, |
| 35 » { { "\\verb#", "#" } , 0 }, | 75 {{"\\verb#", "#"}, 0}, |
| 36 » { { "\\verb*", "*" } , 0 }, | 76 {{"\\verb*", "*"}, 0}, |
| 37 » { { "\\documentstyle", "\\begin{document}" } , 0 }, | 77 {{"\\documentstyle", "\\begin{document}"}, 0}, |
| 38 » { { "\\documentclass", "\\begin{document}" } , 0 }, | 78 {{"\\documentclass", "\\begin{document}"}, 0}, |
| 39 //» { { "\\documentclass", NULL } , 1 }, | 79 //» { { "\\documentclass", NULL } , 1 }, |
| 40 » { { "\\usepackage", NULL } , 1 }, | 80 {{"\\usepackage", NULL}, 1}, |
| 41 » { { "\\includeonly", NULL } , 1 }, | 81 {{"\\includeonly", NULL}, 1}, |
| 42 » { { "\\include", NULL } , 1 }, | 82 {{"\\include", NULL}, 1}, |
| 43 » { { "\\input", NULL } , 1 }, | 83 {{"\\input", NULL}, 1}, |
| 44 » { { "\\vspace", NULL } , 1 }, | 84 {{"\\vspace", NULL}, 1}, |
| 45 » { { "\\setlength", NULL } , 2 }, | 85 {{"\\setlength", NULL}, 2}, |
| 46 » { { "\\addtolength", NULL } , 2 }, | 86 {{"\\addtolength", NULL}, 2}, |
| 47 » { { "\\settowidth", NULL } , 2 }, | 87 {{"\\settowidth", NULL}, 2}, |
| 48 » { { "\\rule", NULL } , 2 }, | 88 {{"\\rule", NULL}, 2}, |
| 49 » { { "\\hspace", NULL } , 1 } , | 89 {{"\\hspace", NULL}, 1}, |
| 50 » { { "\\vspace", NULL } , 1 } , | 90 {{"\\vspace", NULL}, 1}, |
| 51 » { { "\\\\[", "]" } , 0 }, | 91 {{"\\\\[", "]"}, 0}, |
| 52 » { { "\\pagebreak[", "]" } , 0 } , | 92 {{"\\pagebreak[", "]"}, 0}, |
| 53 » { { "\\nopagebreak[", "]" } , 0 } , | 93 {{"\\nopagebreak[", "]"}, 0}, |
| 54 » { { "\\enlargethispage", NULL } , 1 } , | 94 {{"\\enlargethispage", NULL}, 1}, |
| 55 » { { "\\begin{tabular}", NULL } , 1 } , | 95 {{"\\begin{tabular}", NULL}, 1}, |
| 56 » { { "\\addcontentsline", NULL } , 2 } , | 96 {{"\\addcontentsline", NULL}, 2}, |
| 57 » { { "\\begin{thebibliography}", NULL } , 1 } , | 97 {{"\\begin{thebibliography}", NULL}, 1}, |
| 58 » { { "\\bibliography", NULL } , 1 } , | 98 {{"\\bibliography", NULL}, 1}, |
| 59 » { { "\\bibliographystyle", NULL } , 1 } , | 99 {{"\\bibliographystyle", NULL}, 1}, |
| 60 » { { "\\bibitem", NULL } , 1 } , | 100 {{"\\bibitem", NULL}, 1}, |
| 61 » { { "\\begin", NULL } , 1 } , | 101 {{"\\begin", NULL}, 1}, |
| 62 » { { "\\end", NULL } , 1 } , | 102 {{"\\end", NULL}, 1}, |
| 63 » { { "\\pagestyle", NULL } , 1 } , | 103 {{"\\pagestyle", NULL}, 1}, |
| 64 » { { "\\pagenumbering", NULL } , 1 } , | 104 {{"\\pagenumbering", NULL}, 1}, |
| 65 » { { "\\thispagestyle", NULL } , 1 } , | 105 {{"\\thispagestyle", NULL}, 1}, |
| 66 » { { "\\newtheorem", NULL } , 2 }, | 106 {{"\\newtheorem", NULL}, 2}, |
| 67 » { { "\\newcommand", NULL } , 2 }, | 107 {{"\\newcommand", NULL}, 2}, |
| 68 » { { "\\renewcommand", NULL } , 2 }, | 108 {{"\\renewcommand", NULL}, 2}, |
| 69 » { { "\\setcounter", NULL } , 2 }, | 109 {{"\\setcounter", NULL}, 2}, |
| 70 » { { "\\addtocounter", NULL } , 1 }, | 110 {{"\\addtocounter", NULL}, 1}, |
| 71 » { { "\\stepcounter", NULL } , 1 }, | 111 {{"\\stepcounter", NULL}, 1}, |
| 72 » { { "\\selectlanguage", NULL } , 1 }, | 112 {{"\\selectlanguage", NULL}, 1}, |
| 73 » { { "\\inputencoding", NULL } , 1 }, | 113 {{"\\inputencoding", NULL}, 1}, |
| 74 » { { "\\hyphenation", NULL } , 1 }, | 114 {{"\\hyphenation", NULL}, 1}, |
| 75 » { { "\\definecolor", NULL } , 3 }, | 115 {{"\\definecolor", NULL}, 3}, |
| 76 » { { "\\color", NULL } , 1 }, | 116 {{"\\color", NULL}, 1}, |
| 77 » { { "\\textcolor", NULL } , 1 }, | 117 {{"\\textcolor", NULL}, 1}, |
| 78 » { { "\\pagecolor", NULL } , 1 }, | 118 {{"\\pagecolor", NULL}, 1}, |
| 79 » { { "\\colorbox", NULL } , 2 }, | 119 {{"\\colorbox", NULL}, 2}, |
| 80 » { { "\\fcolorbox", NULL } , 2 }, | 120 {{"\\fcolorbox", NULL}, 2}, |
| 81 » { { "\\declaregraphicsextensions", NULL } , 1 }, | 121 {{"\\declaregraphicsextensions", NULL}, 1}, |
| 82 » { { "\\psfig", NULL } , 1 }, | 122 {{"\\psfig", NULL}, 1}, |
| 83 » { { "\\url", NULL } , 1 }, | 123 {{"\\url", NULL}, 1}, |
| 84 » { { "\\eqref", NULL } , 1 }, | 124 {{"\\eqref", NULL}, 1}, |
| 85 » { { "\\vskip", NULL } , 1 }, | 125 {{"\\vskip", NULL}, 1}, |
| 86 » { { "\\vglue", NULL } , 1 }, | 126 {{"\\vglue", NULL}, 1}, |
| 87 » { { "\'\'", NULL } , 1 } | 127 {{"\'\'", NULL}, 1}}; |
| 88 }; | |
| 89 | 128 |
| 90 #define PATTERN_LEN (sizeof(PATTERN) / sizeof(PATTERN[0])) | 129 #define PATTERN_LEN (sizeof(PATTERN) / sizeof(PATTERN[0])) |
| 91 | 130 |
| 92 LaTeXParser::LaTeXParser(const char * wordchars) | 131 LaTeXParser::LaTeXParser(const char* wordchars) |
| 93 { | 132 : TextParser(wordchars) |
| 94 » init(wordchars); | 133 , pattern_num(0), depth(0), arg(0), opt(0) { |
| 95 } | 134 } |
| 96 | 135 |
| 97 LaTeXParser::LaTeXParser(unsigned short * wordchars, int len) | 136 LaTeXParser::LaTeXParser(const w_char* wordchars, int len) |
| 98 { | 137 : TextParser(wordchars, len) |
| 99 » init(wordchars, len); | 138 , pattern_num(0), depth(0), arg(0), opt(0) { |
| 100 } | 139 } |
| 101 | 140 |
| 102 LaTeXParser::~LaTeXParser() | 141 LaTeXParser::~LaTeXParser() {} |
| 103 { | |
| 104 } | |
| 105 | 142 |
| 106 int LaTeXParser::look_pattern(int col) | 143 int LaTeXParser::look_pattern(int col) { |
| 107 { | 144 for (unsigned int i = 0; i < PATTERN_LEN; i++) { |
| 108 » for (unsigned int i = 0; i < PATTERN_LEN; i++) { | 145 const char* j = line[actual].c_str() + head; |
| 109 » » char * j = line[actual] + head; | 146 const char* k = PATTERN[i].pat[col]; |
| 110 » » const char * k = PATTERN[i].pat[col]; | 147 if (!k) |
| 111 » » if (! k) continue; | 148 continue; |
| 112 » » while ((*k != '\0') && (tolower(*j) == *k)) { | 149 while ((*k != '\0') && (tolower(*j) == *k)) { |
| 113 » » » j++; | 150 j++; |
| 114 » » » k++; | 151 k++; |
| 115 » » } | 152 } |
| 116 » » if (*k == '\0') return i; | 153 if (*k == '\0') |
| 117 » } | 154 return i; |
| 118 » return -1; | 155 } |
| 156 return -1; |
| 119 } | 157 } |
| 120 | 158 |
| 121 /* | 159 /* |
| 122 * LaTeXParser | 160 * LaTeXParser |
| 123 * | 161 * |
| 124 * state 0: not wordchar | 162 * state 0: not wordchar |
| 125 * state 1: wordchar | 163 * state 1: wordchar |
| 126 * state 2: comments | 164 * state 2: comments |
| 127 * state 3: commands | 165 * state 3: commands |
| 128 * state 4: commands with arguments | 166 * state 4: commands with arguments |
| 129 * state 5: % comment | 167 * state 5: % comment |
| 130 * | 168 * |
| 131 */ | 169 */ |
| 132 | 170 |
| 171 bool LaTeXParser::next_token(std::string& t) { |
| 172 t.clear(); |
| 173 int i; |
| 174 int slash = 0; |
| 175 int apostrophe; |
| 176 for (;;) { |
| 177 // fprintf(stderr,"depth: %d, state: %d, , arg: %d, token: |
| 178 // %s\n",depth,state,arg,line[actual]+head); |
| 133 | 179 |
| 134 char * LaTeXParser::next_token() | 180 switch (state) { |
| 135 { | 181 case 0: // non word chars |
| 136 » int i; | 182 if ((pattern_num = look_pattern(0)) != -1) { |
| 137 » int slash = 0; | 183 if (PATTERN[pattern_num].pat[1]) { |
| 138 » int apostrophe; | 184 state = 2; |
| 139 » for (;;) { | 185 } else { |
| 140 » » // fprintf(stderr,"depth: %d, state: %d, , arg: %d, token: %s\n"
,depth,state,arg,line[actual]+head); | 186 state = 4; |
| 141 » » | 187 depth = 0; |
| 142 » » switch (state) | 188 arg = 0; |
| 143 » » { | 189 opt = 1; |
| 144 » » case 0: // non word chars | 190 } |
| 145 » » » if ((pattern_num = look_pattern(0)) != -1) { | 191 head += strlen(PATTERN[pattern_num].pat[0]) - 1; |
| 146 » » » » if (PATTERN[pattern_num].pat[1]) { | 192 } else if (line[actual][head] == '%') { |
| 147 » » » » » state = 2; | 193 state = 5; |
| 148 » » » » } else { | 194 } else if (is_wordchar(line[actual].c_str() + head)) { |
| 149 » » » » » state = 4; | 195 state = 1; |
| 150 » » » » » depth = 0; | 196 token = head; |
| 151 » » » » » arg = 0; | 197 } else if (line[actual][head] == '\\') { |
| 152 » » » » » opt = 1; | 198 if (line[actual][head + 1] == '\\' || // \\ (linebreak) |
| 153 » » » » } | 199 (line[actual][head + 1] == '$') || // \$ (dollar sign) |
| 154 » » » » head += strlen(PATTERN[pattern_num].pat[0]) - 1; | 200 (line[actual][head + 1] == '%')) { // \% (percent) |
| 155 » » » } else if ((line[actual][head] == '%')) { | 201 head++; |
| 156 » » » » » state = 5; | 202 break; |
| 157 » » » } else if (is_wordchar(line[actual] + head)) { | 203 } |
| 158 » » » » state = 1; | 204 state = 3; |
| 159 » » » » token = head; | 205 } |
| 160 » » » } else if (line[actual][head] == '\\') { | 206 break; |
| 161 » » » » if (line[actual][head + 1] == '\\' || // \\ (li
nebreak) | 207 case 1: // wordchar |
| 162 » » » » » (line[actual][head + 1] == '$') || // \$
(dollar sign) | 208 apostrophe = 0; |
| 163 » » » » » (line[actual][head + 1] == '%')) { // \%
(percent) | 209 if (!is_wordchar(line[actual].c_str() + head) || |
| 164 » » » » » head++; | 210 (line[actual][head] == '\'' && line[actual][head + 1] == '\'' && |
| 165 » » » » » break; | 211 ++apostrophe)) { |
| 166 » » » » } | 212 state = 0; |
| 167 » » » » state = 3; | 213 bool ok = alloc_token(token, &head, t); |
| 168 » » » } else if (line[actual][head] == '%') { | 214 if (apostrophe) |
| 169 » » » » if ((head==0) || (line[actual][head - 1] != '\\'
)) state = 5; | 215 head += 2; |
| 170 » » » } | 216 if (ok) |
| 171 » » » break; | 217 return true; |
| 172 » » case 1: // wordchar | 218 } |
| 173 » » » apostrophe = 0; | 219 break; |
| 174 » » » if (! is_wordchar(line[actual] + head) || | 220 case 2: // comment, labels, etc |
| 175 » » » (line[actual][head] == '\'' && line[actual][head+1] ==
'\'' && ++apostrophe)) { | 221 if (((i = look_pattern(1)) != -1) && |
| 176 » » » » state = 0; | 222 (strcmp(PATTERN[i].pat[1], PATTERN[pattern_num].pat[1]) == 0)) { |
| 177 » » » » char * t = alloc_token(token, &head); | 223 state = 0; |
| 178 » » » » if (apostrophe) head += 2; | 224 head += strlen(PATTERN[pattern_num].pat[1]) - 1; |
| 179 » » » » if (t) return t; | 225 } |
| 180 » » » } | 226 break; |
| 181 » » » break; | 227 case 3: // command |
| 182 » » case 2: // comment, labels, etc | 228 if ((tolower(line[actual][head]) < 'a') || |
| 183 » » » if (((i = look_pattern(1)) != -1) && | 229 (tolower(line[actual][head]) > 'z')) { |
| 184 » » » » (strcmp(PATTERN[i].pat[1],PATTERN[pattern_num].p
at[1]) == 0)) { | 230 state = 0; |
| 185 » » » » » state = 0; | 231 head--; |
| 186 » » » » » head += strlen(PATTERN[pattern_num].pat[
1]) - 1; | 232 } |
| 187 » » » } | 233 break; |
| 188 » » » break; | 234 case 4: // command with arguments |
| 189 » » case 3: // command | 235 if (slash && (line[actual][head] != '\0')) { |
| 190 » » » if ((tolower(line[actual][head]) < 'a') || (tolower(line
[actual][head]) > 'z')) { | 236 slash = 0; |
| 191 » » » » state = 0; | 237 head++; |
| 192 » » » » head--; | 238 break; |
| 193 » » » } | 239 } else if (line[actual][head] == '\\') { |
| 194 » » » break; | 240 slash = 1; |
| 195 » » case 4: // command with arguments | 241 } else if ((line[actual][head] == '{') || |
| 196 » » » if (slash && (line[actual][head] != '\0')) { | 242 ((opt) && (line[actual][head] == '['))) { |
| 197 » » » » slash = 0; | 243 depth++; |
| 198 » » » » head++; | 244 opt = 0; |
| 199 » » » » break; | 245 } else if (line[actual][head] == '}') { |
| 200 » » » } else if (line[actual][head]=='\\') { | 246 depth--; |
| 201 » » » » slash = 1; | 247 if (depth == 0) { |
| 202 » » » } else if ((line[actual][head] == '{') || | 248 opt = 1; |
| 203 » » » » ((opt) && (line[actual][head] == '['))) { | 249 arg++; |
| 204 » » » » » depth++; | 250 } |
| 205 » » » » » opt = 0; | 251 if (((depth == 0) && (arg == PATTERN[pattern_num].arg)) || |
| 206 » » » } else if (line[actual][head] == '}') { | 252 (depth < 0)) { |
| 207 » » » » depth--; | 253 state = 0; // XXX not handles the last optional arg. |
| 208 » » » » if (depth == 0) { | 254 } |
| 209 » » » » » opt = 1; | 255 } else if (line[actual][head] == ']') |
| 210 » » » » » arg++; | 256 depth--; |
| 211 » » » » } | 257 } // case |
| 212 » » » » if (((depth == 0) && (arg == PATTERN[pattern_num
].arg)) || | 258 if (next_char(line[actual].c_str(), &head)) { |
| 213 » » » » » (depth < 0) ) { | 259 if (state == 5) |
| 214 » » » » » » state = 0; // XXX not handles th
e last optional arg. | 260 state = 0; |
| 215 » » » » } | 261 return false; |
| 216 » » » } else if (line[actual][head] == ']') depth--; | 262 } |
| 217 » » } // case | 263 } |
| 218 if (next_char(line[actual], &head)) { | |
| 219 » » » if (state == 5) state = 0; | |
| 220 » » » return NULL; | |
| 221 » » } | |
| 222 » } | |
| 223 } | 264 } |
| OLD | NEW |