| Index: third_party/hunspell/src/parsers/latexparser.cxx
|
| diff --git a/third_party/hunspell/src/parsers/latexparser.cxx b/third_party/hunspell/src/parsers/latexparser.cxx
|
| index 5ffe3fd4446f967112467e4b0fa96f59cf5c3031..6f720107da26ca6865b321c0979ec1145eed7eb3 100644
|
| --- a/third_party/hunspell/src/parsers/latexparser.cxx
|
| +++ b/third_party/hunspell/src/parsers/latexparser.cxx
|
| @@ -1,3 +1,43 @@
|
| +/* ***** BEGIN LICENSE BLOCK *****
|
| + * Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
| + *
|
| + * The contents of this file are subject to the Mozilla Public License Version
|
| + * 1.1 (the "License"); you may not use this file except in compliance with
|
| + * the License. You may obtain a copy of the License at
|
| + * http://www.mozilla.org/MPL/
|
| + *
|
| + * Software distributed under the License is distributed on an "AS IS" basis,
|
| + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
| + * for the specific language governing rights and limitations under the
|
| + * License.
|
| + *
|
| + * The Original Code is Hunspell, based on MySpell.
|
| + *
|
| + * The Initial Developers of the Original Code are
|
| + * Kevin Hendricks (MySpell) and Németh László (Hunspell).
|
| + * Portions created by the Initial Developers are Copyright (C) 2002-2005
|
| + * the Initial Developers. All Rights Reserved.
|
| + *
|
| + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
| + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
| + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
| + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
| + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
| + *
|
| + * Alternatively, the contents of this file may be used under the terms of
|
| + * either the GNU General Public License Version 2 or later (the "GPL"), or
|
| + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
| + * in which case the provisions of the GPL or the LGPL are applicable instead
|
| + * of those above. If you wish to allow use of your version of this file only
|
| + * under the terms of either the GPL or the LGPL, and not to allow others to
|
| + * use your version of this file under the terms of the MPL, indicate your
|
| + * decision by deleting the provisions above and replace them with the notice
|
| + * and other provisions required by the GPL or the LGPL. If you do not delete
|
| + * the provisions above, a recipient may use your version of this file under
|
| + * the terms of any one of the MPL, the GPL or the LGPL.
|
| + *
|
| + * ***** END LICENSE BLOCK ***** */
|
| +
|
| #include <cstdlib>
|
| #include <cstring>
|
| #include <cstdio>
|
| @@ -11,111 +51,109 @@ using namespace std;
|
| #endif
|
|
|
| static struct {
|
| - const char * pat[2];
|
| - int arg;
|
| -} PATTERN[] = {
|
| - { { "\\(", "\\)" } , 0 },
|
| - { { "$$", "$$" } , 0 },
|
| - { { "$", "$" } , 0 },
|
| - { { "\\begin{math}", "\\end{math}" } , 0 },
|
| - { { "\\[", "\\]" } , 0 },
|
| - { { "\\begin{displaymath}", "\\end{displaymath}" } , 0 },
|
| - { { "\\begin{equation}", "\\end{equation}" } , 0 },
|
| - { { "\\begin{equation*}", "\\end{equation*}" } , 0 },
|
| - { { "\\cite", NULL } , 1 },
|
| - { { "\\nocite", NULL } , 1 },
|
| - { { "\\index", NULL } , 1 },
|
| - { { "\\label", NULL } , 1 },
|
| - { { "\\ref", NULL } , 1 },
|
| - { { "\\pageref", NULL } , 1 },
|
| - { { "\\parbox", NULL } , 1 },
|
| - { { "\\begin{verbatim}", "\\end{verbatim}" } , 0 },
|
| - { { "\\verb+", "+" } , 0 },
|
| - { { "\\verb|", "|" } , 0 },
|
| - { { "\\verb#", "#" } , 0 },
|
| - { { "\\verb*", "*" } , 0 },
|
| - { { "\\documentstyle", "\\begin{document}" } , 0 },
|
| - { { "\\documentclass", "\\begin{document}" } , 0 },
|
| -// { { "\\documentclass", NULL } , 1 },
|
| - { { "\\usepackage", NULL } , 1 },
|
| - { { "\\includeonly", NULL } , 1 },
|
| - { { "\\include", NULL } , 1 },
|
| - { { "\\input", NULL } , 1 },
|
| - { { "\\vspace", NULL } , 1 },
|
| - { { "\\setlength", NULL } , 2 },
|
| - { { "\\addtolength", NULL } , 2 },
|
| - { { "\\settowidth", NULL } , 2 },
|
| - { { "\\rule", NULL } , 2 },
|
| - { { "\\hspace", NULL } , 1 } ,
|
| - { { "\\vspace", NULL } , 1 } ,
|
| - { { "\\\\[", "]" } , 0 },
|
| - { { "\\pagebreak[", "]" } , 0 } ,
|
| - { { "\\nopagebreak[", "]" } , 0 } ,
|
| - { { "\\enlargethispage", NULL } , 1 } ,
|
| - { { "\\begin{tabular}", NULL } , 1 } ,
|
| - { { "\\addcontentsline", NULL } , 2 } ,
|
| - { { "\\begin{thebibliography}", NULL } , 1 } ,
|
| - { { "\\bibliography", NULL } , 1 } ,
|
| - { { "\\bibliographystyle", NULL } , 1 } ,
|
| - { { "\\bibitem", NULL } , 1 } ,
|
| - { { "\\begin", NULL } , 1 } ,
|
| - { { "\\end", NULL } , 1 } ,
|
| - { { "\\pagestyle", NULL } , 1 } ,
|
| - { { "\\pagenumbering", NULL } , 1 } ,
|
| - { { "\\thispagestyle", NULL } , 1 } ,
|
| - { { "\\newtheorem", NULL } , 2 },
|
| - { { "\\newcommand", NULL } , 2 },
|
| - { { "\\renewcommand", NULL } , 2 },
|
| - { { "\\setcounter", NULL } , 2 },
|
| - { { "\\addtocounter", NULL } , 1 },
|
| - { { "\\stepcounter", NULL } , 1 },
|
| - { { "\\selectlanguage", NULL } , 1 },
|
| - { { "\\inputencoding", NULL } , 1 },
|
| - { { "\\hyphenation", NULL } , 1 },
|
| - { { "\\definecolor", NULL } , 3 },
|
| - { { "\\color", NULL } , 1 },
|
| - { { "\\textcolor", NULL } , 1 },
|
| - { { "\\pagecolor", NULL } , 1 },
|
| - { { "\\colorbox", NULL } , 2 },
|
| - { { "\\fcolorbox", NULL } , 2 },
|
| - { { "\\declaregraphicsextensions", NULL } , 1 },
|
| - { { "\\psfig", NULL } , 1 },
|
| - { { "\\url", NULL } , 1 },
|
| - { { "\\eqref", NULL } , 1 },
|
| - { { "\\vskip", NULL } , 1 },
|
| - { { "\\vglue", NULL } , 1 },
|
| - { { "\'\'", NULL } , 1 }
|
| -};
|
| + const char* pat[2];
|
| + int arg;
|
| +} PATTERN[] = {{{"\\(", "\\)"}, 0},
|
| + {{"$$", "$$"}, 0},
|
| + {{"$", "$"}, 0},
|
| + {{"\\begin{math}", "\\end{math}"}, 0},
|
| + {{"\\[", "\\]"}, 0},
|
| + {{"\\begin{displaymath}", "\\end{displaymath}"}, 0},
|
| + {{"\\begin{equation}", "\\end{equation}"}, 0},
|
| + {{"\\begin{equation*}", "\\end{equation*}"}, 0},
|
| + {{"\\cite", NULL}, 1},
|
| + {{"\\nocite", NULL}, 1},
|
| + {{"\\index", NULL}, 1},
|
| + {{"\\label", NULL}, 1},
|
| + {{"\\ref", NULL}, 1},
|
| + {{"\\pageref", NULL}, 1},
|
| + {{"\\autoref", NULL}, 1},
|
| + {{"\\parbox", NULL}, 1},
|
| + {{"\\begin{verbatim}", "\\end{verbatim}"}, 0},
|
| + {{"\\verb+", "+"}, 0},
|
| + {{"\\verb|", "|"}, 0},
|
| + {{"\\verb#", "#"}, 0},
|
| + {{"\\verb*", "*"}, 0},
|
| + {{"\\documentstyle", "\\begin{document}"}, 0},
|
| + {{"\\documentclass", "\\begin{document}"}, 0},
|
| + // { { "\\documentclass", NULL } , 1 },
|
| + {{"\\usepackage", NULL}, 1},
|
| + {{"\\includeonly", NULL}, 1},
|
| + {{"\\include", NULL}, 1},
|
| + {{"\\input", NULL}, 1},
|
| + {{"\\vspace", NULL}, 1},
|
| + {{"\\setlength", NULL}, 2},
|
| + {{"\\addtolength", NULL}, 2},
|
| + {{"\\settowidth", NULL}, 2},
|
| + {{"\\rule", NULL}, 2},
|
| + {{"\\hspace", NULL}, 1},
|
| + {{"\\vspace", NULL}, 1},
|
| + {{"\\\\[", "]"}, 0},
|
| + {{"\\pagebreak[", "]"}, 0},
|
| + {{"\\nopagebreak[", "]"}, 0},
|
| + {{"\\enlargethispage", NULL}, 1},
|
| + {{"\\begin{tabular}", NULL}, 1},
|
| + {{"\\addcontentsline", NULL}, 2},
|
| + {{"\\begin{thebibliography}", NULL}, 1},
|
| + {{"\\bibliography", NULL}, 1},
|
| + {{"\\bibliographystyle", NULL}, 1},
|
| + {{"\\bibitem", NULL}, 1},
|
| + {{"\\begin", NULL}, 1},
|
| + {{"\\end", NULL}, 1},
|
| + {{"\\pagestyle", NULL}, 1},
|
| + {{"\\pagenumbering", NULL}, 1},
|
| + {{"\\thispagestyle", NULL}, 1},
|
| + {{"\\newtheorem", NULL}, 2},
|
| + {{"\\newcommand", NULL}, 2},
|
| + {{"\\renewcommand", NULL}, 2},
|
| + {{"\\setcounter", NULL}, 2},
|
| + {{"\\addtocounter", NULL}, 1},
|
| + {{"\\stepcounter", NULL}, 1},
|
| + {{"\\selectlanguage", NULL}, 1},
|
| + {{"\\inputencoding", NULL}, 1},
|
| + {{"\\hyphenation", NULL}, 1},
|
| + {{"\\definecolor", NULL}, 3},
|
| + {{"\\color", NULL}, 1},
|
| + {{"\\textcolor", NULL}, 1},
|
| + {{"\\pagecolor", NULL}, 1},
|
| + {{"\\colorbox", NULL}, 2},
|
| + {{"\\fcolorbox", NULL}, 2},
|
| + {{"\\declaregraphicsextensions", NULL}, 1},
|
| + {{"\\psfig", NULL}, 1},
|
| + {{"\\url", NULL}, 1},
|
| + {{"\\eqref", NULL}, 1},
|
| + {{"\\vskip", NULL}, 1},
|
| + {{"\\vglue", NULL}, 1},
|
| + {{"\'\'", NULL}, 1}};
|
|
|
| #define PATTERN_LEN (sizeof(PATTERN) / sizeof(PATTERN[0]))
|
|
|
| -LaTeXParser::LaTeXParser(const char * wordchars)
|
| -{
|
| - init(wordchars);
|
| +LaTeXParser::LaTeXParser(const char* wordchars)
|
| + : TextParser(wordchars)
|
| + , pattern_num(0), depth(0), arg(0), opt(0) {
|
| }
|
|
|
| -LaTeXParser::LaTeXParser(unsigned short * wordchars, int len)
|
| -{
|
| - init(wordchars, len);
|
| +LaTeXParser::LaTeXParser(const w_char* wordchars, int len)
|
| + : TextParser(wordchars, len)
|
| + , pattern_num(0), depth(0), arg(0), opt(0) {
|
| }
|
|
|
| -LaTeXParser::~LaTeXParser()
|
| -{
|
| -}
|
| +LaTeXParser::~LaTeXParser() {}
|
|
|
| -int LaTeXParser::look_pattern(int col)
|
| -{
|
| - for (unsigned int i = 0; i < PATTERN_LEN; i++) {
|
| - char * j = line[actual] + head;
|
| - const char * k = PATTERN[i].pat[col];
|
| - if (! k) continue;
|
| - while ((*k != '\0') && (tolower(*j) == *k)) {
|
| - j++;
|
| - k++;
|
| - }
|
| - if (*k == '\0') return i;
|
| - }
|
| - return -1;
|
| +int LaTeXParser::look_pattern(int col) {
|
| + for (unsigned int i = 0; i < PATTERN_LEN; i++) {
|
| + const char* j = line[actual].c_str() + head;
|
| + const char* k = PATTERN[i].pat[col];
|
| + if (!k)
|
| + continue;
|
| + while ((*k != '\0') && (tolower(*j) == *k)) {
|
| + j++;
|
| + k++;
|
| + }
|
| + if (*k == '\0')
|
| + return i;
|
| + }
|
| + return -1;
|
| }
|
|
|
| /*
|
| @@ -124,100 +162,103 @@ int LaTeXParser::look_pattern(int col)
|
| * state 0: not wordchar
|
| * state 1: wordchar
|
| * state 2: comments
|
| - * state 3: commands
|
| + * state 3: commands
|
| * state 4: commands with arguments
|
| * state 5: % comment
|
| *
|
| */
|
|
|
| +bool LaTeXParser::next_token(std::string& t) {
|
| + t.clear();
|
| + int i;
|
| + int slash = 0;
|
| + int apostrophe;
|
| + for (;;) {
|
| + // fprintf(stderr,"depth: %d, state: %d, , arg: %d, token:
|
| + // %s\n",depth,state,arg,line[actual]+head);
|
|
|
| -char * LaTeXParser::next_token()
|
| -{
|
| - int i;
|
| - int slash = 0;
|
| - int apostrophe;
|
| - for (;;) {
|
| - // fprintf(stderr,"depth: %d, state: %d, , arg: %d, token: %s\n",depth,state,arg,line[actual]+head);
|
| -
|
| - switch (state)
|
| - {
|
| - case 0: // non word chars
|
| - if ((pattern_num = look_pattern(0)) != -1) {
|
| - if (PATTERN[pattern_num].pat[1]) {
|
| - state = 2;
|
| - } else {
|
| - state = 4;
|
| - depth = 0;
|
| - arg = 0;
|
| - opt = 1;
|
| - }
|
| - head += strlen(PATTERN[pattern_num].pat[0]) - 1;
|
| - } else if ((line[actual][head] == '%')) {
|
| - state = 5;
|
| - } else if (is_wordchar(line[actual] + head)) {
|
| - state = 1;
|
| - token = head;
|
| - } else if (line[actual][head] == '\\') {
|
| - if (line[actual][head + 1] == '\\' || // \\ (linebreak)
|
| - (line[actual][head + 1] == '$') || // \$ (dollar sign)
|
| - (line[actual][head + 1] == '%')) { // \% (percent)
|
| - head++;
|
| - break;
|
| - }
|
| - state = 3;
|
| - } else if (line[actual][head] == '%') {
|
| - if ((head==0) || (line[actual][head - 1] != '\\')) state = 5;
|
| - }
|
| - break;
|
| - case 1: // wordchar
|
| - apostrophe = 0;
|
| - if (! is_wordchar(line[actual] + head) ||
|
| - (line[actual][head] == '\'' && line[actual][head+1] == '\'' && ++apostrophe)) {
|
| - state = 0;
|
| - char * t = alloc_token(token, &head);
|
| - if (apostrophe) head += 2;
|
| - if (t) return t;
|
| - }
|
| - break;
|
| - case 2: // comment, labels, etc
|
| - if (((i = look_pattern(1)) != -1) &&
|
| - (strcmp(PATTERN[i].pat[1],PATTERN[pattern_num].pat[1]) == 0)) {
|
| - state = 0;
|
| - head += strlen(PATTERN[pattern_num].pat[1]) - 1;
|
| - }
|
| - break;
|
| - case 3: // command
|
| - if ((tolower(line[actual][head]) < 'a') || (tolower(line[actual][head]) > 'z')) {
|
| - state = 0;
|
| - head--;
|
| - }
|
| - break;
|
| - case 4: // command with arguments
|
| - if (slash && (line[actual][head] != '\0')) {
|
| - slash = 0;
|
| - head++;
|
| - break;
|
| - } else if (line[actual][head]=='\\') {
|
| - slash = 1;
|
| - } else if ((line[actual][head] == '{') ||
|
| - ((opt) && (line[actual][head] == '['))) {
|
| - depth++;
|
| - opt = 0;
|
| - } else if (line[actual][head] == '}') {
|
| - depth--;
|
| - if (depth == 0) {
|
| - opt = 1;
|
| - arg++;
|
| - }
|
| - if (((depth == 0) && (arg == PATTERN[pattern_num].arg)) ||
|
| - (depth < 0) ) {
|
| - state = 0; // XXX not handles the last optional arg.
|
| - }
|
| - } else if (line[actual][head] == ']') depth--;
|
| - } // case
|
| - if (next_char(line[actual], &head)) {
|
| - if (state == 5) state = 0;
|
| - return NULL;
|
| - }
|
| - }
|
| + switch (state) {
|
| + case 0: // non word chars
|
| + if ((pattern_num = look_pattern(0)) != -1) {
|
| + if (PATTERN[pattern_num].pat[1]) {
|
| + state = 2;
|
| + } else {
|
| + state = 4;
|
| + depth = 0;
|
| + arg = 0;
|
| + opt = 1;
|
| + }
|
| + head += strlen(PATTERN[pattern_num].pat[0]) - 1;
|
| + } else if (line[actual][head] == '%') {
|
| + state = 5;
|
| + } else if (is_wordchar(line[actual].c_str() + head)) {
|
| + state = 1;
|
| + token = head;
|
| + } else if (line[actual][head] == '\\') {
|
| + if (line[actual][head + 1] == '\\' || // \\ (linebreak)
|
| + (line[actual][head + 1] == '$') || // \$ (dollar sign)
|
| + (line[actual][head + 1] == '%')) { // \% (percent)
|
| + head++;
|
| + break;
|
| + }
|
| + state = 3;
|
| + }
|
| + break;
|
| + case 1: // wordchar
|
| + apostrophe = 0;
|
| + if (!is_wordchar(line[actual].c_str() + head) ||
|
| + (line[actual][head] == '\'' && line[actual][head + 1] == '\'' &&
|
| + ++apostrophe)) {
|
| + state = 0;
|
| + bool ok = alloc_token(token, &head, t);
|
| + if (apostrophe)
|
| + head += 2;
|
| + if (ok)
|
| + return true;
|
| + }
|
| + break;
|
| + case 2: // comment, labels, etc
|
| + if (((i = look_pattern(1)) != -1) &&
|
| + (strcmp(PATTERN[i].pat[1], PATTERN[pattern_num].pat[1]) == 0)) {
|
| + state = 0;
|
| + head += strlen(PATTERN[pattern_num].pat[1]) - 1;
|
| + }
|
| + break;
|
| + case 3: // command
|
| + if ((tolower(line[actual][head]) < 'a') ||
|
| + (tolower(line[actual][head]) > 'z')) {
|
| + state = 0;
|
| + head--;
|
| + }
|
| + break;
|
| + case 4: // command with arguments
|
| + if (slash && (line[actual][head] != '\0')) {
|
| + slash = 0;
|
| + head++;
|
| + break;
|
| + } else if (line[actual][head] == '\\') {
|
| + slash = 1;
|
| + } else if ((line[actual][head] == '{') ||
|
| + ((opt) && (line[actual][head] == '['))) {
|
| + depth++;
|
| + opt = 0;
|
| + } else if (line[actual][head] == '}') {
|
| + depth--;
|
| + if (depth == 0) {
|
| + opt = 1;
|
| + arg++;
|
| + }
|
| + if (((depth == 0) && (arg == PATTERN[pattern_num].arg)) ||
|
| + (depth < 0)) {
|
| + state = 0; // XXX not handles the last optional arg.
|
| + }
|
| + } else if (line[actual][head] == ']')
|
| + depth--;
|
| + } // case
|
| + if (next_char(line[actual].c_str(), &head)) {
|
| + if (state == 5)
|
| + state = 0;
|
| + return false;
|
| + }
|
| + }
|
| }
|
|
|