| OLD | NEW |
| (Empty) |
| 1 #ifndef __CSUTILHXX__ | |
| 2 #define __CSUTILHXX__ | |
| 3 | |
| 4 #include "hunvisapi.h" | |
| 5 | |
| 6 // First some base level utility routines | |
| 7 | |
| 8 #include <string.h> | |
| 9 #include "w_char.hxx" | |
| 10 #include "htypes.hxx" | |
| 11 | |
| 12 #ifdef MOZILLA_CLIENT | |
| 13 #include "nscore.h" // for mozalloc headers | |
| 14 #endif | |
| 15 | |
| 16 // casing | |
| 17 #define NOCAP 0 | |
| 18 #define INITCAP 1 | |
| 19 #define ALLCAP 2 | |
| 20 #define HUHCAP 3 | |
| 21 #define HUHINITCAP 4 | |
| 22 | |
| 23 // default encoding and keystring | |
| 24 #define SPELL_ENCODING "ISO8859-1" | |
| 25 #define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm" | |
| 26 | |
| 27 // default morphological fields | |
| 28 #define MORPH_STEM "st:" | |
| 29 #define MORPH_ALLOMORPH "al:" | |
| 30 #define MORPH_POS "po:" | |
| 31 #define MORPH_DERI_PFX "dp:" | |
| 32 #define MORPH_INFL_PFX "ip:" | |
| 33 #define MORPH_TERM_PFX "tp:" | |
| 34 #define MORPH_DERI_SFX "ds:" | |
| 35 #define MORPH_INFL_SFX "is:" | |
| 36 #define MORPH_TERM_SFX "ts:" | |
| 37 #define MORPH_SURF_PFX "sp:" | |
| 38 #define MORPH_FREQ "fr:" | |
| 39 #define MORPH_PHON "ph:" | |
| 40 #define MORPH_HYPH "hy:" | |
| 41 #define MORPH_PART "pa:" | |
| 42 #define MORPH_FLAG "fl:" | |
| 43 #define MORPH_HENTRY "_H:" | |
| 44 #define MORPH_TAG_LEN strlen(MORPH_STEM) | |
| 45 | |
| 46 #define MSEP_FLD ' ' | |
| 47 #define MSEP_REC '\n' | |
| 48 #define MSEP_ALT '\v' | |
| 49 | |
| 50 // default flags | |
| 51 #define DEFAULTFLAGS 65510 | |
| 52 #define FORBIDDENWORD 65510 | |
| 53 #define ONLYUPCASEFLAG 65511 | |
| 54 | |
| 55 // convert UTF-16 characters to UTF-8 | |
| 56 LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src
, int srclen); | |
| 57 | |
| 58 // convert UTF-8 characters to UTF-16 | |
| 59 LIBHUNSPELL_DLL_EXPORTED int u8_u16(w_char * dest, int size, const char * src); | |
| 60 | |
| 61 // sort 2-byte vector | |
| 62 LIBHUNSPELL_DLL_EXPORTED void flag_qsort(unsigned short flags[], int begin, int
end); | |
| 63 | |
| 64 // binary search in 2-byte vector | |
| 65 LIBHUNSPELL_DLL_EXPORTED int flag_bsearch(unsigned short flags[], unsigned short
flag, int right); | |
| 66 | |
| 67 // remove end of line char(s) | |
| 68 LIBHUNSPELL_DLL_EXPORTED void mychomp(char * s); | |
| 69 | |
| 70 // duplicate string | |
| 71 LIBHUNSPELL_DLL_EXPORTED char * mystrdup(const char * s); | |
| 72 | |
| 73 // strcat for limited length destination string | |
| 74 LIBHUNSPELL_DLL_EXPORTED char * mystrcat(char * dest, const char * st, int max); | |
| 75 | |
| 76 // duplicate reverse of string | |
| 77 LIBHUNSPELL_DLL_EXPORTED char * myrevstrdup(const char * s); | |
| 78 | |
| 79 // parse into tokens with char delimiter | |
| 80 LIBHUNSPELL_DLL_EXPORTED char * mystrsep(char ** sptr, const char delim); | |
| 81 // parse into tokens with char delimiter | |
| 82 LIBHUNSPELL_DLL_EXPORTED char * mystrsep2(char ** sptr, const char delim); | |
| 83 | |
| 84 // parse into tokens with char delimiter | |
| 85 LIBHUNSPELL_DLL_EXPORTED char * mystrrep(char *, const char *, const char *); | |
| 86 | |
| 87 // append s to ends of every lines in text | |
| 88 LIBHUNSPELL_DLL_EXPORTED void strlinecat(char * lines, const char * s); | |
| 89 | |
| 90 // tokenize into lines with new line | |
| 91 LIBHUNSPELL_DLL_EXPORTED int line_tok(const char * text, char *** lines, char br
eakchar); | |
| 92 | |
| 93 // tokenize into lines with new line and uniq in place | |
| 94 LIBHUNSPELL_DLL_EXPORTED char * line_uniq(char * text, char breakchar); | |
| 95 LIBHUNSPELL_DLL_EXPORTED char * line_uniq_app(char ** text, char breakchar); | |
| 96 | |
| 97 // change oldchar to newchar in place | |
| 98 LIBHUNSPELL_DLL_EXPORTED char * tr(char * text, char oldc, char newc); | |
| 99 | |
| 100 // reverse word | |
| 101 LIBHUNSPELL_DLL_EXPORTED int reverseword(char *); | |
| 102 | |
| 103 // reverse word | |
| 104 LIBHUNSPELL_DLL_EXPORTED int reverseword_utf(char *); | |
| 105 | |
| 106 // remove duplicates | |
| 107 LIBHUNSPELL_DLL_EXPORTED int uniqlist(char ** list, int n); | |
| 108 | |
| 109 // free character array list | |
| 110 LIBHUNSPELL_DLL_EXPORTED void freelist(char *** list, int n); | |
| 111 | |
| 112 // character encoding information | |
| 113 struct cs_info { | |
| 114 unsigned char ccase; | |
| 115 unsigned char clower; | |
| 116 unsigned char cupper; | |
| 117 }; | |
| 118 | |
| 119 LIBHUNSPELL_DLL_EXPORTED int initialize_utf_tbl(); | |
| 120 LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl(); | |
| 121 LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c, int lan
gnum); | |
| 122 LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c, int lan
gnum); | |
| 123 LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c); | |
| 124 | |
| 125 LIBHUNSPELL_DLL_EXPORTED struct cs_info * get_current_cs(const char * es); | |
| 126 | |
| 127 // get language identifiers of language codes | |
| 128 LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char * lang); | |
| 129 | |
| 130 // get characters of the given 8bit encoding with lower- and uppercase forms | |
| 131 LIBHUNSPELL_DLL_EXPORTED char * get_casechars(const char * enc); | |
| 132 | |
| 133 // convert null terminated string to all caps using encoding | |
| 134 LIBHUNSPELL_DLL_EXPORTED void enmkallcap(char * d, const char * p, const char *
encoding); | |
| 135 | |
| 136 // convert null terminated string to all little using encoding | |
| 137 LIBHUNSPELL_DLL_EXPORTED void enmkallsmall(char * d, const char * p, const char
* encoding); | |
| 138 | |
| 139 // convert null terminated string to have initial capital using encoding | |
| 140 LIBHUNSPELL_DLL_EXPORTED void enmkinitcap(char * d, const char * p, const char *
encoding); | |
| 141 | |
| 142 // convert null terminated string to all caps | |
| 143 LIBHUNSPELL_DLL_EXPORTED void mkallcap(char * p, const struct cs_info * csconv); | |
| 144 | |
| 145 // convert null terminated string to all little | |
| 146 LIBHUNSPELL_DLL_EXPORTED void mkallsmall(char * p, const struct cs_info * csconv
); | |
| 147 | |
| 148 // convert null terminated string to have initial capital | |
| 149 LIBHUNSPELL_DLL_EXPORTED void mkinitcap(char * p, const struct cs_info * csconv)
; | |
| 150 | |
| 151 // convert first nc characters of UTF-8 string to little | |
| 152 LIBHUNSPELL_DLL_EXPORTED void mkallsmall_utf(w_char * u, int nc, int langnum); | |
| 153 | |
| 154 // convert first nc characters of UTF-8 string to capital | |
| 155 LIBHUNSPELL_DLL_EXPORTED void mkallcap_utf(w_char * u, int nc, int langnum); | |
| 156 | |
| 157 // get type of capitalization | |
| 158 LIBHUNSPELL_DLL_EXPORTED int get_captype(char * q, int nl, cs_info *); | |
| 159 | |
| 160 // get type of capitalization (UTF-8) | |
| 161 LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(w_char * q, int nl, int langnum); | |
| 162 | |
| 163 // strip all ignored characters in the string | |
| 164 LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars_utf(char * word, unsigned sho
rt ignored_chars[], int ignored_len); | |
| 165 | |
| 166 // strip all ignored characters in the string | |
| 167 LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars(char * word, char * ignored_c
hars); | |
| 168 | |
| 169 LIBHUNSPELL_DLL_EXPORTED int parse_string(char * line, char ** out, int ln); | |
| 170 | |
| 171 LIBHUNSPELL_DLL_EXPORTED int parse_array(char * line, char ** out, unsigned shor
t ** out_utf16, | |
| 172 int * out_utf16_len, int utf8, int ln); | |
| 173 | |
| 174 LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char * r); | |
| 175 LIBHUNSPELL_DLL_EXPORTED char * copy_field(char * dest, const char * morph, cons
t char * var); | |
| 176 | |
| 177 LIBHUNSPELL_DLL_EXPORTED int morphcmp(const char * s, const char * t); | |
| 178 | |
| 179 LIBHUNSPELL_DLL_EXPORTED int get_sfxcount(const char * morph); | |
| 180 | |
| 181 // conversion function for protected memory | |
| 182 LIBHUNSPELL_DLL_EXPORTED void store_pointer(char * dest, char * source); | |
| 183 | |
| 184 // conversion function for protected memory | |
| 185 LIBHUNSPELL_DLL_EXPORTED char * get_stored_pointer(const char * s); | |
| 186 | |
| 187 // hash entry macros | |
| 188 LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry *h) | |
| 189 { | |
| 190 char *ret; | |
| 191 if (!h->var) | |
| 192 ret = NULL; | |
| 193 else if (h->var & H_OPT_ALIASM) | |
| 194 ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1); | |
| 195 else | |
| 196 ret = HENTRY_WORD(h) + h->blen + 1; | |
| 197 return ret; | |
| 198 } | |
| 199 | |
| 200 // NULL-free version for warning-free OOo build | |
| 201 LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(const struct hentry *h) | |
| 202 { | |
| 203 const char *ret; | |
| 204 if (!h->var) | |
| 205 ret = ""; | |
| 206 else if (h->var & H_OPT_ALIASM) | |
| 207 ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1); | |
| 208 else | |
| 209 ret = HENTRY_WORD(h) + h->blen + 1; | |
| 210 return ret; | |
| 211 } | |
| 212 | |
| 213 LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry *h, const char *
p) | |
| 214 { | |
| 215 return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL); | |
| 216 } | |
| 217 | |
| 218 #define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h)) | |
| 219 | |
| 220 #endif | |
| OLD | NEW |