third_party/hunspell_new/src/parsers/htmlparser.cxx - Issue 1135173004: Rename third_party/hunspell_new back to third_party/hunspell.

Unified Diff: third_party/hunspell_new/src/parsers/htmlparser.cxx

Issue 1135173004: Rename third_party/hunspell_new back to third_party/hunspell. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 5 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: third_party/hunspell_new/src/parsers/htmlparser.cxx

diff --git a/third_party/hunspell_new/src/parsers/htmlparser.cxx b/third_party/hunspell_new/src/parsers/htmlparser.cxx

deleted file mode 100644

index 341be4e8948b0aee66621b301b6a17a6341ca271..0000000000000000000000000000000000000000

--- a/third_party/hunspell_new/src/parsers/htmlparser.cxx

+++ /dev/null

@@ -1,151 +0,0 @@

-#include <cstdlib>

-#include <cstring>

-#include <cstdio>

-#include <ctype.h>

-#include "../hunspell/csutil.hxx"

-#include "htmlparser.hxx"

-#ifndef W32

-using namespace std;

-#endif

-enum { ST_NON_WORD, ST_WORD, ST_TAG, ST_CHAR_ENTITY, ST_OTHER_TAG, ST_ATTRIB };

-static const char * PATTERN[][2] = {

- { "<script", "</script>" },

- { "<style", "</style>" },

- { "<code", "</code>" },

- { "<samp", "</samp>" },

- { "<kbd", "</kbd>" },

- { "<var", "</var>" },

- { "<listing", "</listing>" },

- { "<address", "</address>" },

- { "<pre", "</pre>" },

- { "" },

- { "<[cdata[", "]]>" }, // XML comment

- { "<", ">" }

-};

-#define PATTERN_LEN (sizeof(PATTERN) / (sizeof(char *) * 2))

-static const char * PATTERN2[][2] = {

- { "<img", "alt=" }, // ALT and TITLE attrib handled spec.

- { "<img", "title=" },

- { "<a ", "title=" }

-};

-#define PATTERN_LEN2 (sizeof(PATTERN2) / (sizeof(char *) * 2))

-HTMLParser::HTMLParser(const char * wordchars)

- init(wordchars);

-HTMLParser::HTMLParser(unsigned short * wordchars, int len)

- init(wordchars, len);

-HTMLParser::~HTMLParser()

-int HTMLParser::look_pattern(const char * p[][2], unsigned int len, int column)

- for (unsigned int i = 0; i < len; i++) {

- char * j = line[actual] + head;

- const char * k = p[i][column];

- while ((*k != '\0') && (tolower(*j) == *k)) {

- j++;

- k++;

- }

- if (*k == '\0') return i;

- }

- return -1;

-/*

- * HTML parser

- *

- */

-char * HTMLParser::next_token()

- const char * latin1;

- for (;;) {

- //fprintf(stderr, "%d:%c:%s\n", state, line[actual][head], line[actual]);

- //getch();

- switch (state)

- {

- case ST_NON_WORD: // non word chars

- prevstate = ST_NON_WORD;

- if ((pattern_num = look_pattern(PATTERN, PATTERN_LEN, 0)) != -1) {

- checkattr = 0;

- if ((pattern2_num = look_pattern(PATTERN2, PATTERN_LEN2, 0)) != -1) {

- checkattr = 1;

- }

- state = ST_TAG;

- } else if (is_wordchar(line[actual] + head)) {

- state = ST_WORD;

- token = head;

- } else if ((latin1 = get_latin1(line[actual] + head))) {

- state = ST_WORD;

- token = head;

- head += strlen(latin1);

- } else if (line[actual][head] == '&') {

- state = ST_CHAR_ENTITY;

- }

- break;

- case ST_WORD: // wordchar

- if ((latin1 = get_latin1(line[actual] + head))) {

- head += strlen(latin1);

- } else if (! is_wordchar(line[actual] + head)) {

- state = prevstate;

- char * t = alloc_token(token, &head);

- if (t) return t;

- }

- break;

- case ST_TAG: // comment, labels, etc

- int i;

- if ((checkattr == 1) && ((i = look_pattern(PATTERN2, PATTERN_LEN2, 1)) != -1)

- && (strcmp(PATTERN2[i][0],PATTERN2[pattern2_num][0]) == 0)) {

- checkattr = 2;

- } else if ((checkattr > 0) && (line[actual][head] == '>')) {

- state = ST_NON_WORD;

- } else if (((i = look_pattern(PATTERN, PATTERN_LEN, 1)) != -1) &&

- (strcmp(PATTERN[i][1],PATTERN[pattern_num][1]) == 0)) {

- state = ST_NON_WORD;

- head += strlen(PATTERN[pattern_num][1]) - 1;

- } else if ( (strcmp(PATTERN[pattern_num][0], "<") == 0) &&

- ((line[actual][head] == '"') || (line[actual][head] == '\''))) {

- quotmark = line[actual][head];

- state = ST_ATTRIB;

- }

- break;

- case ST_ATTRIB: // non word chars

- prevstate = ST_ATTRIB;

- if (line[actual][head] == quotmark) {

- state = ST_TAG;

- if (checkattr == 2) checkattr = 1;

- // for IMG ALT

- } else if (is_wordchar(line[actual] + head) && (checkattr == 2)) {

- state = ST_WORD;

- token = head;

- } else if (line[actual][head] == '&') {

- state = ST_CHAR_ENTITY;

- }

- break;

- case ST_CHAR_ENTITY: // SGML element

- if ((tolower(line[actual][head]) == ';')) {

- state = prevstate;

- head--;

- }

- if (next_char(line[actual], &head)) return NULL;

- }

« no previous file with comments | « third_party/hunspell_new/src/parsers/htmlparser.hxx ('k') | third_party/hunspell_new/src/parsers/latexparser.hxx » ('j') | no next file with comments »