| OLD | NEW |
| 1 #ifndef __CSUTILHXX__ | 1 #ifndef __CSUTILHXX__ |
| 2 #define __CSUTILHXX__ | 2 #define __CSUTILHXX__ |
| 3 | 3 |
| 4 // First some base level utility routines | 4 // First some base level utility routines |
| 5 | 5 |
| 6 typedef struct { | 6 #include "w_char.hxx" |
| 7 unsigned char l; | 7 |
| 8 unsigned char h; | 8 // casing |
| 9 } w_char; | 9 #define NOCAP 0 |
| 10 #define INITCAP 1 |
| 11 #define ALLCAP 2 |
| 12 #define HUHCAP 3 |
| 13 #define HUHINITCAP 4 |
| 14 |
| 15 // default encoding and keystring |
| 16 #define SPELL_ENCODING "ISO8859-1" |
| 17 #define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm" |
| 18 |
| 19 // default morphological fields |
| 20 #define MORPH_STEM "st:" |
| 21 #define MORPH_ALLOMORPH "al:" |
| 22 #define MORPH_POS "po:" |
| 23 #define MORPH_DERI_PFX "dp:" |
| 24 #define MORPH_INFL_PFX "ip:" |
| 25 #define MORPH_TERM_PFX "tp:" |
| 26 #define MORPH_DERI_SFX "ds:" |
| 27 #define MORPH_INFL_SFX "is:" |
| 28 #define MORPH_TERM_SFX "ts:" |
| 29 #define MORPH_SURF_PFX "sp:" |
| 30 #define MORPH_FREQ "fr:" |
| 31 #define MORPH_PHON "ph:" |
| 32 #define MORPH_HYPH "hy:" |
| 33 #define MORPH_PART "pa:" |
| 34 #define MORPH_FLAG "fl:" |
| 35 #define MORPH_HENTRY "_H:" |
| 36 #define MORPH_TAG_LEN strlen(MORPH_STEM) |
| 37 |
| 38 #define MSEP_FLD ' ' |
| 39 #define MSEP_REC '\n' |
| 40 #define MSEP_ALT '\v' |
| 41 |
| 42 // default flags |
| 43 #define DEFAULTFLAGS 65510 |
| 44 #define FORBIDDENWORD 65510 |
| 45 #define ONLYUPCASEFLAG 65511 |
| 46 |
| 47 // hash entry macros |
| 48 #define HENTRY_DATA(h) (h->var ? ((h->var & H_OPT_ALIASM) ? \ |
| 49 get_stored_pointer(&(h->word) + h->blen + 1) : &(h->word) + h->blen + 1) : N
ULL) |
| 50 // NULL-free version for warning-free OOo build |
| 51 #define HENTRY_DATA2(h) (h->var ? ((h->var & H_OPT_ALIASM) ? \ |
| 52 get_stored_pointer(&(h->word) + h->blen + 1) : &(h->word) + h->blen + 1) : "
") |
| 53 #define HENTRY_FIND(h,p) (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL) |
| 54 |
| 55 #define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h)) |
| 10 | 56 |
| 11 // convert UTF-16 characters to UTF-8 | 57 // convert UTF-16 characters to UTF-8 |
| 12 char * u16_u8(char * dest, int size, const w_char * src, int srclen); | 58 char * u16_u8(char * dest, int size, const w_char * src, int srclen); |
| 13 | 59 |
| 14 // convert UTF-8 characters to UTF-16 | 60 // convert UTF-8 characters to UTF-16 |
| 15 int u8_u16(w_char * dest, int size, const char * src); | 61 int u8_u16(w_char * dest, int size, const char * src); |
| 16 | 62 |
| 17 // sort 2-byte vector | 63 // sort 2-byte vector |
| 18 void flag_qsort(unsigned short flags[], int begin, int end); | 64 void flag_qsort(unsigned short flags[], int begin, int end); |
| 19 | 65 |
| 20 // binary search in 2-byte vector | 66 // binary search in 2-byte vector |
| 21 int flag_bsearch(unsigned short flags[], unsigned short flag, int right); | 67 int flag_bsearch(unsigned short flags[], unsigned short flag, int right); |
| 22 | 68 |
| 23 // remove end of line char(s) | 69 // remove end of line char(s) |
| 24 void mychomp(char * s); | 70 void mychomp(char * s); |
| 25 | 71 |
| 26 // duplicate string | 72 // duplicate string |
| 27 char * mystrdup(const char * s); | 73 char * mystrdup(const char * s); |
| 28 | 74 |
| 75 // strcat for limited length destination string |
| 76 char * mystrcat(char * dest, const char * st, int max); |
| 77 |
| 29 // duplicate reverse of string | 78 // duplicate reverse of string |
| 30 char * myrevstrdup(const char * s); | 79 char * myrevstrdup(const char * s); |
| 31 | 80 |
| 32 // parse into tokens with char delimiter | 81 // parse into tokens with char delimiter |
| 33 char * mystrsep(char ** sptr, const char delim); | 82 char * mystrsep(char ** sptr, const char delim); |
| 34 // parse into tokens with char delimiter | 83 // parse into tokens with char delimiter |
| 35 char * mystrsep2(char ** sptr, const char delim); | 84 char * mystrsep2(char ** sptr, const char delim); |
| 36 | 85 |
| 37 // parse into tokens with char delimiter | 86 // parse into tokens with char delimiter |
| 38 char * mystrrep(char *, const char *, const char *); | 87 char * mystrrep(char *, const char *, const char *); |
| 39 | 88 |
| 40 // append s to ends of every lines in text | 89 // append s to ends of every lines in text |
| 41 void strlinecat(char * lines, const char * s); | 90 void strlinecat(char * lines, const char * s); |
| 42 | 91 |
| 43 // tokenize into lines with new line | 92 // tokenize into lines with new line |
| 44 int line_tok(const char * text, char *** lines); | 93 int line_tok(const char * text, char *** lines, char breakchar); |
| 45 | 94 |
| 46 // tokenize into lines with new line and uniq in place | 95 // tokenize into lines with new line and uniq in place |
| 47 char * line_uniq(char * text); | 96 char * line_uniq(char * text, char breakchar); |
| 97 char * line_uniq_app(char ** text, char breakchar); |
| 48 | 98 |
| 49 // change \n to c in place | 99 // change oldchar to newchar in place |
| 50 char * line_join(char * text, char c); | 100 char * tr(char * text, char oldc, char newc); |
| 51 | |
| 52 // leave only last {[^}]*} pattern in string | |
| 53 char * delete_zeros(char * morphout); | |
| 54 | 101 |
| 55 // reverse word | 102 // reverse word |
| 56 int reverseword(char *); | 103 int reverseword(char *); |
| 57 | 104 |
| 58 // reverse word | 105 // reverse word |
| 59 int reverseword_utf(char *); | 106 int reverseword_utf(char *); |
| 60 | 107 |
| 108 // remove duplicates |
| 109 int uniqlist(char ** list, int n); |
| 110 |
| 111 // free character array list |
| 112 void freelist(char *** list, int n); |
| 113 |
| 61 // character encoding information | 114 // character encoding information |
| 62 struct cs_info { | 115 struct cs_info { |
| 63 unsigned char ccase; | 116 unsigned char ccase; |
| 64 unsigned char clower; | 117 unsigned char clower; |
| 65 unsigned char cupper; | 118 unsigned char cupper; |
| 66 }; | 119 }; |
| 67 | 120 |
| 68 // Unicode character encoding information | 121 // Unicode character encoding information |
| 69 struct unicode_info { | 122 struct unicode_info { |
| 70 unsigned short c; | 123 unsigned short c; |
| (...skipping 23 matching lines...) Expand all Loading... |
| 94 struct lang_map { | 147 struct lang_map { |
| 95 const char * lang; | 148 const char * lang; |
| 96 const char * def_enc; | 149 const char * def_enc; |
| 97 int num; | 150 int num; |
| 98 }; | 151 }; |
| 99 | 152 |
| 100 struct cs_info * get_current_cs(const char * es); | 153 struct cs_info * get_current_cs(const char * es); |
| 101 | 154 |
| 102 const char * get_default_enc(const char * lang); | 155 const char * get_default_enc(const char * lang); |
| 103 | 156 |
| 157 // get language identifiers of language codes |
| 104 int get_lang_num(const char * lang); | 158 int get_lang_num(const char * lang); |
| 105 | 159 |
| 160 // get characters of the given 8bit encoding with lower- and uppercase forms |
| 161 char * get_casechars(const char * enc); |
| 162 |
| 106 // convert null terminated string to all caps using encoding | 163 // convert null terminated string to all caps using encoding |
| 107 void enmkallcap(char * d, const char * p, const char * encoding); | 164 void enmkallcap(char * d, const char * p, const char * encoding); |
| 108 | 165 |
| 109 // convert null terminated string to all little using encoding | 166 // convert null terminated string to all little using encoding |
| 110 void enmkallsmall(char * d, const char * p, const char * encoding); | 167 void enmkallsmall(char * d, const char * p, const char * encoding); |
| 111 | 168 |
| 112 // convert null terminated string to have intial capital using encoding | 169 // convert null terminated string to have intial capital using encoding |
| 113 void enmkinitcap(char * d, const char * p, const char * encoding); | 170 void enmkinitcap(char * d, const char * p, const char * encoding); |
| 114 | 171 |
| 115 // convert null terminated string to all caps | 172 // convert null terminated string to all caps |
| 116 void mkallcap(char * p, const struct cs_info * csconv); | 173 void mkallcap(char * p, const struct cs_info * csconv); |
| 117 | 174 |
| 118 // convert null terminated string to all little | 175 // convert null terminated string to all little |
| 119 void mkallsmall(char * p, const struct cs_info * csconv); | 176 void mkallsmall(char * p, const struct cs_info * csconv); |
| 120 | 177 |
| 121 // convert null terminated string to have intial capital | 178 // convert null terminated string to have intial capital |
| 122 void mkinitcap(char * p, const struct cs_info * csconv); | 179 void mkinitcap(char * p, const struct cs_info * csconv); |
| 123 | 180 |
| 124 // convert first nc characters of UTF-8 string to little | 181 // convert first nc characters of UTF-8 string to little |
| 125 void mkallsmall_utf(w_char * u, int nc, int langnum); | 182 void mkallsmall_utf(w_char * u, int nc, int langnum); |
| 126 | 183 |
| 127 // convert first nc characters of UTF-8 string to capital | 184 // convert first nc characters of UTF-8 string to capital |
| 128 void mkallcap_utf(w_char * u, int nc, int langnum); | 185 void mkallcap_utf(w_char * u, int nc, int langnum); |
| 129 | 186 |
| 187 // get type of capitalization |
| 188 int get_captype(char * q, int nl, cs_info *); |
| 189 |
| 190 // get type of capitalization (UTF-8) |
| 191 int get_captype_utf8(w_char * q, int nl, int langnum); |
| 192 |
| 130 // strip all ignored characters in the string | 193 // strip all ignored characters in the string |
| 131 void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int i
gnored_len); | 194 void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int i
gnored_len); |
| 132 | 195 |
| 133 // strip all ignored characters in the string | 196 // strip all ignored characters in the string |
| 134 void remove_ignored_chars(char * word, char * ignored_chars); | 197 void remove_ignored_chars(char * word, char * ignored_chars); |
| 135 | 198 |
| 136 int parse_string(char * line, char ** out, const char * name); | 199 int parse_string(char * line, char ** out, int ln); |
| 137 | 200 |
| 138 int parse_array(char * line, char ** out, | 201 int parse_array(char * line, char ** out, unsigned short ** out_utf16, |
| 139 unsigned short ** out_utf16, int * out_utf16_len, const char * name, int
utf8); | 202 int * out_utf16_len, int utf8, int ln); |
| 203 |
| 204 int fieldlen(const char * r); |
| 205 char * copy_field(char * dest, const char * morph, const char * var); |
| 206 |
| 207 int morphcmp(const char * s, const char * t); |
| 208 |
| 209 int get_sfxcount(const char * morph); |
| 210 |
| 211 // conversion function for protected memory |
| 212 void store_pointer(char * dest, char * source); |
| 213 |
| 214 // conversion function for protected memory |
| 215 char * get_stored_pointer(char * s); |
| 140 | 216 |
| 141 #endif | 217 #endif |
| OLD | NEW |