Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(12)

Unified Diff: third_party/hunspell/src/parsers/htmlparser.cxx

Issue 2544793003: [spellcheck] Updated Hunspell to 1.5.4 (Closed)
Patch Set: Test Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/hunspell/src/parsers/htmlparser.hxx ('k') | third_party/hunspell/src/parsers/latexparser.hxx » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/hunspell/src/parsers/htmlparser.cxx
diff --git a/third_party/hunspell/src/parsers/htmlparser.cxx b/third_party/hunspell/src/parsers/htmlparser.cxx
index 341be4e8948b0aee66621b301b6a17a6341ca271..7509c651b3ba317778cfd56828ce7ca2b7888ebf 100644
--- a/third_party/hunspell/src/parsers/htmlparser.cxx
+++ b/third_party/hunspell/src/parsers/htmlparser.cxx
@@ -1,3 +1,43 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
#include <cstdlib>
#include <cstring>
#include <cstdio>
@@ -6,146 +46,42 @@
#include "../hunspell/csutil.hxx"
#include "htmlparser.hxx"
-
#ifndef W32
using namespace std;
#endif
-enum { ST_NON_WORD, ST_WORD, ST_TAG, ST_CHAR_ENTITY, ST_OTHER_TAG, ST_ATTRIB };
-
-static const char * PATTERN[][2] = {
- { "<script", "</script>" },
- { "<style", "</style>" },
- { "<code", "</code>" },
- { "<samp", "</samp>" },
- { "<kbd", "</kbd>" },
- { "<var", "</var>" },
- { "<listing", "</listing>" },
- { "<address", "</address>" },
- { "<pre", "</pre>" },
- { "<!--", "-->" },
- { "<[cdata[", "]]>" }, // XML comment
- { "<", ">" }
-};
-
-#define PATTERN_LEN (sizeof(PATTERN) / (sizeof(char *) * 2))
-
-static const char * PATTERN2[][2] = {
- { "<img", "alt=" }, // ALT and TITLE attrib handled spec.
- { "<img", "title=" },
- { "<a ", "title=" }
-};
-
-#define PATTERN_LEN2 (sizeof(PATTERN2) / (sizeof(char *) * 2))
-
-HTMLParser::HTMLParser(const char * wordchars)
-{
- init(wordchars);
+static const char* PATTERN[][2] = {{"<script", "</script>"},
+ {"<style", "</style>"},
+ {"<code", "</code>"},
+ {"<samp", "</samp>"},
+ {"<kbd", "</kbd>"},
+ {"<var", "</var>"},
+ {"<listing", "</listing>"},
+ {"<address", "</address>"},
+ {"<pre", "</pre>"},
+ {"<!--", "-->"},
+ {"<[cdata[", "]]>"}, // XML comment
+ {"<", ">"}};
+
+#define PATTERN_LEN (sizeof(PATTERN) / (sizeof(char*) * 2))
+
+static const char* PATTERN2[][2] = {
+ {"<img", "alt="}, // ALT and TITLE attrib handled spec.
+ {"<img", "title="},
+ {"<a ", "title="}};
+
+#define PATTERN_LEN2 (sizeof(PATTERN2) / (sizeof(char*) * 2))
+
+HTMLParser::HTMLParser(const char* wordchars)
+ : XMLParser(wordchars) {
}
-HTMLParser::HTMLParser(unsigned short * wordchars, int len)
-{
- init(wordchars, len);
+HTMLParser::HTMLParser(const w_char* wordchars, int len)
+ : XMLParser(wordchars, len) {
}
-HTMLParser::~HTMLParser()
-{
-}
-
-
-int HTMLParser::look_pattern(const char * p[][2], unsigned int len, int column)
-{
- for (unsigned int i = 0; i < len; i++) {
- char * j = line[actual] + head;
- const char * k = p[i][column];
- while ((*k != '\0') && (tolower(*j) == *k)) {
- j++;
- k++;
- }
- if (*k == '\0') return i;
- }
- return -1;
+bool HTMLParser::next_token(std::string& t) {
+ return XMLParser::next_token(PATTERN, PATTERN_LEN, PATTERN2, PATTERN_LEN2, t);
}
-/*
- * HTML parser
- *
- */
-
-
-char * HTMLParser::next_token()
-{
- const char * latin1;
-
- for (;;) {
- //fprintf(stderr, "%d:%c:%s\n", state, line[actual][head], line[actual]);
- //getch();
- switch (state)
- {
- case ST_NON_WORD: // non word chars
- prevstate = ST_NON_WORD;
- if ((pattern_num = look_pattern(PATTERN, PATTERN_LEN, 0)) != -1) {
- checkattr = 0;
- if ((pattern2_num = look_pattern(PATTERN2, PATTERN_LEN2, 0)) != -1) {
- checkattr = 1;
- }
- state = ST_TAG;
- } else if (is_wordchar(line[actual] + head)) {
- state = ST_WORD;
- token = head;
- } else if ((latin1 = get_latin1(line[actual] + head))) {
- state = ST_WORD;
- token = head;
- head += strlen(latin1);
- } else if (line[actual][head] == '&') {
- state = ST_CHAR_ENTITY;
- }
- break;
- case ST_WORD: // wordchar
- if ((latin1 = get_latin1(line[actual] + head))) {
- head += strlen(latin1);
- } else if (! is_wordchar(line[actual] + head)) {
- state = prevstate;
- char * t = alloc_token(token, &head);
- if (t) return t;
- }
- break;
- case ST_TAG: // comment, labels, etc
- int i;
- if ((checkattr == 1) && ((i = look_pattern(PATTERN2, PATTERN_LEN2, 1)) != -1)
- && (strcmp(PATTERN2[i][0],PATTERN2[pattern2_num][0]) == 0)) {
- checkattr = 2;
- } else if ((checkattr > 0) && (line[actual][head] == '>')) {
- state = ST_NON_WORD;
- } else if (((i = look_pattern(PATTERN, PATTERN_LEN, 1)) != -1) &&
- (strcmp(PATTERN[i][1],PATTERN[pattern_num][1]) == 0)) {
- state = ST_NON_WORD;
- head += strlen(PATTERN[pattern_num][1]) - 1;
- } else if ( (strcmp(PATTERN[pattern_num][0], "<") == 0) &&
- ((line[actual][head] == '"') || (line[actual][head] == '\''))) {
- quotmark = line[actual][head];
- state = ST_ATTRIB;
- }
- break;
- case ST_ATTRIB: // non word chars
- prevstate = ST_ATTRIB;
- if (line[actual][head] == quotmark) {
- state = ST_TAG;
- if (checkattr == 2) checkattr = 1;
- // for IMG ALT
- } else if (is_wordchar(line[actual] + head) && (checkattr == 2)) {
- state = ST_WORD;
- token = head;
- } else if (line[actual][head] == '&') {
- state = ST_CHAR_ENTITY;
- }
- break;
- case ST_CHAR_ENTITY: // SGML element
- if ((tolower(line[actual][head]) == ';')) {
- state = prevstate;
- head--;
- }
- }
- if (next_char(line[actual], &head)) return NULL;
- }
-}
+HTMLParser::~HTMLParser() {}
« no previous file with comments | « third_party/hunspell/src/parsers/htmlparser.hxx ('k') | third_party/hunspell/src/parsers/latexparser.hxx » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698