OLD | NEW |
1 #ifndef __CSUTILHXX__ | 1 #ifndef __CSUTILHXX__ |
2 #define __CSUTILHXX__ | 2 #define __CSUTILHXX__ |
3 | 3 |
4 // First some base level utility routines | 4 // First some base level utility routines |
5 | 5 |
6 typedef struct { | 6 #include "w_char.hxx" |
7 unsigned char l; | 7 |
8 unsigned char h; | 8 // casing |
9 } w_char; | 9 #define NOCAP 0 |
| 10 #define INITCAP 1 |
| 11 #define ALLCAP 2 |
| 12 #define HUHCAP 3 |
| 13 #define HUHINITCAP 4 |
| 14 |
| 15 // default encoding and keystring |
| 16 #define SPELL_ENCODING "ISO8859-1" |
| 17 #define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm" |
| 18 |
| 19 // default morphological fields |
| 20 #define MORPH_STEM "st:" |
| 21 #define MORPH_ALLOMORPH "al:" |
| 22 #define MORPH_POS "po:" |
| 23 #define MORPH_DERI_PFX "dp:" |
| 24 #define MORPH_INFL_PFX "ip:" |
| 25 #define MORPH_TERM_PFX "tp:" |
| 26 #define MORPH_DERI_SFX "ds:" |
| 27 #define MORPH_INFL_SFX "is:" |
| 28 #define MORPH_TERM_SFX "ts:" |
| 29 #define MORPH_SURF_PFX "sp:" |
| 30 #define MORPH_FREQ "fr:" |
| 31 #define MORPH_PHON "ph:" |
| 32 #define MORPH_HYPH "hy:" |
| 33 #define MORPH_PART "pa:" |
| 34 #define MORPH_FLAG "fl:" |
| 35 #define MORPH_HENTRY "_H:" |
| 36 #define MORPH_TAG_LEN strlen(MORPH_STEM) |
| 37 |
| 38 #define MSEP_FLD ' ' |
| 39 #define MSEP_REC '\n' |
| 40 #define MSEP_ALT '\v' |
| 41 |
| 42 // default flags |
| 43 #define DEFAULTFLAGS 65510 |
| 44 #define FORBIDDENWORD 65510 |
| 45 #define ONLYUPCASEFLAG 65511 |
| 46 |
| 47 // hash entry macros |
| 48 #define HENTRY_DATA(h) (h->var ? ((h->var & H_OPT_ALIASM) ? \ |
| 49 get_stored_pointer(&(h->word) + h->blen + 1) : &(h->word) + h->blen + 1) : N
ULL) |
| 50 // NULL-free version for warning-free OOo build |
| 51 #define HENTRY_DATA2(h) (h->var ? ((h->var & H_OPT_ALIASM) ? \ |
| 52 get_stored_pointer(&(h->word) + h->blen + 1) : &(h->word) + h->blen + 1) : "
") |
| 53 #define HENTRY_FIND(h,p) (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL) |
| 54 |
| 55 #define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h)) |
10 | 56 |
11 // convert UTF-16 characters to UTF-8 | 57 // convert UTF-16 characters to UTF-8 |
12 char * u16_u8(char * dest, int size, const w_char * src, int srclen); | 58 char * u16_u8(char * dest, int size, const w_char * src, int srclen); |
13 | 59 |
14 // convert UTF-8 characters to UTF-16 | 60 // convert UTF-8 characters to UTF-16 |
15 int u8_u16(w_char * dest, int size, const char * src); | 61 int u8_u16(w_char * dest, int size, const char * src); |
16 | 62 |
17 // sort 2-byte vector | 63 // sort 2-byte vector |
18 void flag_qsort(unsigned short flags[], int begin, int end); | 64 void flag_qsort(unsigned short flags[], int begin, int end); |
19 | 65 |
20 // binary search in 2-byte vector | 66 // binary search in 2-byte vector |
21 int flag_bsearch(unsigned short flags[], unsigned short flag, int right); | 67 int flag_bsearch(unsigned short flags[], unsigned short flag, int right); |
22 | 68 |
23 // remove end of line char(s) | 69 // remove end of line char(s) |
24 void mychomp(char * s); | 70 void mychomp(char * s); |
25 | 71 |
26 // duplicate string | 72 // duplicate string |
27 char * mystrdup(const char * s); | 73 char * mystrdup(const char * s); |
28 | 74 |
| 75 // strcat for limited length destination string |
| 76 char * mystrcat(char * dest, const char * st, int max); |
| 77 |
29 // duplicate reverse of string | 78 // duplicate reverse of string |
30 char * myrevstrdup(const char * s); | 79 char * myrevstrdup(const char * s); |
31 | 80 |
32 // parse into tokens with char delimiter | 81 // parse into tokens with char delimiter |
33 char * mystrsep(char ** sptr, const char delim); | 82 char * mystrsep(char ** sptr, const char delim); |
34 // parse into tokens with char delimiter | 83 // parse into tokens with char delimiter |
35 char * mystrsep2(char ** sptr, const char delim); | 84 char * mystrsep2(char ** sptr, const char delim); |
36 | 85 |
37 // parse into tokens with char delimiter | 86 // parse into tokens with char delimiter |
38 char * mystrrep(char *, const char *, const char *); | 87 char * mystrrep(char *, const char *, const char *); |
39 | 88 |
40 // append s to ends of every lines in text | 89 // append s to ends of every lines in text |
41 void strlinecat(char * lines, const char * s); | 90 void strlinecat(char * lines, const char * s); |
42 | 91 |
43 // tokenize into lines with new line | 92 // tokenize into lines with new line |
44 int line_tok(const char * text, char *** lines); | 93 int line_tok(const char * text, char *** lines, char breakchar); |
45 | 94 |
46 // tokenize into lines with new line and uniq in place | 95 // tokenize into lines with new line and uniq in place |
47 char * line_uniq(char * text); | 96 char * line_uniq(char * text, char breakchar); |
| 97 char * line_uniq_app(char ** text, char breakchar); |
48 | 98 |
49 // change \n to c in place | 99 // change oldchar to newchar in place |
50 char * line_join(char * text, char c); | 100 char * tr(char * text, char oldc, char newc); |
51 | |
52 // leave only last {[^}]*} pattern in string | |
53 char * delete_zeros(char * morphout); | |
54 | 101 |
55 // reverse word | 102 // reverse word |
56 int reverseword(char *); | 103 int reverseword(char *); |
57 | 104 |
58 // reverse word | 105 // reverse word |
59 int reverseword_utf(char *); | 106 int reverseword_utf(char *); |
60 | 107 |
| 108 // remove duplicates |
| 109 int uniqlist(char ** list, int n); |
| 110 |
| 111 // free character array list |
| 112 void freelist(char *** list, int n); |
| 113 |
61 // character encoding information | 114 // character encoding information |
62 struct cs_info { | 115 struct cs_info { |
63 unsigned char ccase; | 116 unsigned char ccase; |
64 unsigned char clower; | 117 unsigned char clower; |
65 unsigned char cupper; | 118 unsigned char cupper; |
66 }; | 119 }; |
67 | 120 |
68 // Unicode character encoding information | 121 // Unicode character encoding information |
69 struct unicode_info { | 122 struct unicode_info { |
70 unsigned short c; | 123 unsigned short c; |
(...skipping 23 matching lines...) Expand all Loading... |
94 struct lang_map { | 147 struct lang_map { |
95 const char * lang; | 148 const char * lang; |
96 const char * def_enc; | 149 const char * def_enc; |
97 int num; | 150 int num; |
98 }; | 151 }; |
99 | 152 |
100 struct cs_info * get_current_cs(const char * es); | 153 struct cs_info * get_current_cs(const char * es); |
101 | 154 |
102 const char * get_default_enc(const char * lang); | 155 const char * get_default_enc(const char * lang); |
103 | 156 |
| 157 // get language identifiers of language codes |
104 int get_lang_num(const char * lang); | 158 int get_lang_num(const char * lang); |
105 | 159 |
| 160 // get characters of the given 8bit encoding with lower- and uppercase forms |
| 161 char * get_casechars(const char * enc); |
| 162 |
106 // convert null terminated string to all caps using encoding | 163 // convert null terminated string to all caps using encoding |
107 void enmkallcap(char * d, const char * p, const char * encoding); | 164 void enmkallcap(char * d, const char * p, const char * encoding); |
108 | 165 |
109 // convert null terminated string to all little using encoding | 166 // convert null terminated string to all little using encoding |
110 void enmkallsmall(char * d, const char * p, const char * encoding); | 167 void enmkallsmall(char * d, const char * p, const char * encoding); |
111 | 168 |
112 // convert null terminated string to have intial capital using encoding | 169 // convert null terminated string to have intial capital using encoding |
113 void enmkinitcap(char * d, const char * p, const char * encoding); | 170 void enmkinitcap(char * d, const char * p, const char * encoding); |
114 | 171 |
115 // convert null terminated string to all caps | 172 // convert null terminated string to all caps |
116 void mkallcap(char * p, const struct cs_info * csconv); | 173 void mkallcap(char * p, const struct cs_info * csconv); |
117 | 174 |
118 // convert null terminated string to all little | 175 // convert null terminated string to all little |
119 void mkallsmall(char * p, const struct cs_info * csconv); | 176 void mkallsmall(char * p, const struct cs_info * csconv); |
120 | 177 |
121 // convert null terminated string to have intial capital | 178 // convert null terminated string to have intial capital |
122 void mkinitcap(char * p, const struct cs_info * csconv); | 179 void mkinitcap(char * p, const struct cs_info * csconv); |
123 | 180 |
124 // convert first nc characters of UTF-8 string to little | 181 // convert first nc characters of UTF-8 string to little |
125 void mkallsmall_utf(w_char * u, int nc, int langnum); | 182 void mkallsmall_utf(w_char * u, int nc, int langnum); |
126 | 183 |
127 // convert first nc characters of UTF-8 string to capital | 184 // convert first nc characters of UTF-8 string to capital |
128 void mkallcap_utf(w_char * u, int nc, int langnum); | 185 void mkallcap_utf(w_char * u, int nc, int langnum); |
129 | 186 |
| 187 // get type of capitalization |
| 188 int get_captype(char * q, int nl, cs_info *); |
| 189 |
| 190 // get type of capitalization (UTF-8) |
| 191 int get_captype_utf8(w_char * q, int nl, int langnum); |
| 192 |
130 // strip all ignored characters in the string | 193 // strip all ignored characters in the string |
131 void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int i
gnored_len); | 194 void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int i
gnored_len); |
132 | 195 |
133 // strip all ignored characters in the string | 196 // strip all ignored characters in the string |
134 void remove_ignored_chars(char * word, char * ignored_chars); | 197 void remove_ignored_chars(char * word, char * ignored_chars); |
135 | 198 |
136 int parse_string(char * line, char ** out, const char * name); | 199 int parse_string(char * line, char ** out, int ln); |
137 | 200 |
138 int parse_array(char * line, char ** out, | 201 int parse_array(char * line, char ** out, unsigned short ** out_utf16, |
139 unsigned short ** out_utf16, int * out_utf16_len, const char * name, int
utf8); | 202 int * out_utf16_len, int utf8, int ln); |
| 203 |
| 204 int fieldlen(const char * r); |
| 205 char * copy_field(char * dest, const char * morph, const char * var); |
| 206 |
| 207 int morphcmp(const char * s, const char * t); |
| 208 |
| 209 int get_sfxcount(const char * morph); |
| 210 |
| 211 // conversion function for protected memory |
| 212 void store_pointer(char * dest, char * source); |
| 213 |
| 214 // conversion function for protected memory |
| 215 char * get_stored_pointer(char * s); |
140 | 216 |
141 #endif | 217 #endif |
OLD | NEW |