OLD | NEW |
| (Empty) |
1 /* | |
2 * parser classes for MySpell | |
3 * | |
4 * implemented: text, HTML, TeX | |
5 * | |
6 * Copyright (C) 2002, Laszlo Nemeth | |
7 * | |
8 */ | |
9 | |
10 #ifndef _TEXTPARSER_HXX_ | |
11 #define _TEXTPARSER_HXX_ | |
12 | |
13 // set sum of actual and previous lines | |
14 #define MAXPREVLINE 4 | |
15 | |
16 #ifndef MAXLNLEN | |
17 #define MAXLNLEN 8192 | |
18 #endif | |
19 | |
20 /* | |
21 * Base Text Parser | |
22 * | |
23 */ | |
24 | |
25 class TextParser | |
26 { | |
27 | |
28 protected: | |
29 void init(const char *); | |
30 void init(unsigned short * wordchars, int len); | |
31 int wordcharacters[256]; // for detection of the word boundari
es | |
32 char line[MAXPREVLINE][MAXLNLEN]; // parsed and previous lines | |
33 char urlline[MAXLNLEN]; // mask for url detection | |
34 int checkurl; | |
35 int actual; // actual line | |
36 int head; // head position | |
37 int token; // begin of token | |
38 int state; // state of automata | |
39 int utf8; // UTF-8 character encoding | |
40 int next_char(char * line, int * pos); | |
41 unsigned short * wordchars_utf16; | |
42 int wclen; | |
43 | |
44 public: | |
45 | |
46 TextParser(); | |
47 TextParser(unsigned short * wordchars, int len); | |
48 TextParser(const char * wc); | |
49 virtual ~TextParser(); | |
50 | |
51 void put_line(char * line); | |
52 char * get_line(); | |
53 char * get_prevline(int n); | |
54 virtual char * next_token(); | |
55 int change_token(const char * word); | |
56 void set_url_checking(int check); | |
57 | |
58 int get_tokenpos(); | |
59 int is_wordchar(char * w); | |
60 const char * get_latin1(char * s); | |
61 char * next_char(); | |
62 int tokenize_urls(); | |
63 void check_urls(); | |
64 int get_url(int token_pos, int * head); | |
65 char * alloc_token(int token, int * head); | |
66 }; | |
67 | |
68 #endif | |
69 | |
OLD | NEW |