| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * parser classes for MySpell | |
| 3 * | |
| 4 * implemented: text, HTML, TeX | |
| 5 * | |
| 6 * Copyright (C) 2002, Laszlo Nemeth | |
| 7 * | |
| 8 */ | |
| 9 | |
| 10 #ifndef _TEXTPARSER_HXX_ | |
| 11 #define _TEXTPARSER_HXX_ | |
| 12 | |
| 13 // set sum of actual and previous lines | |
| 14 #define MAXPREVLINE 4 | |
| 15 | |
| 16 #ifndef MAXLNLEN | |
| 17 #define MAXLNLEN 8192 | |
| 18 #endif | |
| 19 | |
| 20 /* | |
| 21 * Base Text Parser | |
| 22 * | |
| 23 */ | |
| 24 | |
| 25 class TextParser | |
| 26 { | |
| 27 | |
| 28 protected: | |
| 29 void init(const char *); | |
| 30 void init(unsigned short * wordchars, int len); | |
| 31 int wordcharacters[256]; // for detection of the word boundari
es | |
| 32 char line[MAXPREVLINE][MAXLNLEN]; // parsed and previous lines | |
| 33 char urlline[MAXLNLEN]; // mask for url detection | |
| 34 int checkurl; | |
| 35 int actual; // actual line | |
| 36 int head; // head position | |
| 37 int token; // begin of token | |
| 38 int state; // state of automata | |
| 39 int utf8; // UTF-8 character encoding | |
| 40 int next_char(char * line, int * pos); | |
| 41 unsigned short * wordchars_utf16; | |
| 42 int wclen; | |
| 43 | |
| 44 public: | |
| 45 | |
| 46 TextParser(); | |
| 47 TextParser(unsigned short * wordchars, int len); | |
| 48 TextParser(const char * wc); | |
| 49 virtual ~TextParser(); | |
| 50 | |
| 51 void put_line(char * line); | |
| 52 char * get_line(); | |
| 53 char * get_prevline(int n); | |
| 54 virtual char * next_token(); | |
| 55 int change_token(const char * word); | |
| 56 void set_url_checking(int check); | |
| 57 | |
| 58 int get_tokenpos(); | |
| 59 int is_wordchar(char * w); | |
| 60 const char * get_latin1(char * s); | |
| 61 char * next_char(); | |
| 62 int tokenize_urls(); | |
| 63 void check_urls(); | |
| 64 int get_url(int token_pos, int * head); | |
| 65 char * alloc_token(int token, int * head); | |
| 66 }; | |
| 67 | |
| 68 #endif | |
| 69 | |
| OLD | NEW |