| OLD | NEW |
| (Empty) |
| 1 # | |
| 2 # 2014 Jun 09 | |
| 3 # | |
| 4 # The author disclaims copyright to this source code. In place of | |
| 5 # a legal notice, here is a blessing: | |
| 6 # | |
| 7 # May you do good and not evil. | |
| 8 # May you find forgiveness for yourself and forgive others. | |
| 9 # May you share freely, never taking more than you give. | |
| 10 # | |
| 11 #------------------------------------------------------------------------- | |
| 12 # | |
| 13 # This script generates the implementations of the following C functions, | |
| 14 # which are part of the porter tokenizer implementation: | |
| 15 # | |
| 16 # static int fts5PorterStep1B(char *aBuf, int *pnBuf); | |
| 17 # static int fts5PorterStep1B2(char *aBuf, int *pnBuf); | |
| 18 # static int fts5PorterStep2(char *aBuf, int *pnBuf); | |
| 19 # static int fts5PorterStep3(char *aBuf, int *pnBuf); | |
| 20 # static int fts5PorterStep4(char *aBuf, int *pnBuf); | |
| 21 # | |
| 22 | |
| 23 set O(Step1B2) { | |
| 24 { at {} ate 1 } | |
| 25 { bl {} ble 1 } | |
| 26 { iz {} ize 1 } | |
| 27 } | |
| 28 | |
| 29 set O(Step1B) { | |
| 30 { "eed" fts5Porter_MGt0 "ee" 0 } | |
| 31 { "ed" fts5Porter_Vowel "" 1 } | |
| 32 { "ing" fts5Porter_Vowel "" 1 } | |
| 33 } | |
| 34 | |
| 35 set O(Step2) { | |
| 36 { "ational" fts5Porter_MGt0 "ate" } | |
| 37 { "tional" fts5Porter_MGt0 "tion" } | |
| 38 { "enci" fts5Porter_MGt0 "ence" } | |
| 39 { "anci" fts5Porter_MGt0 "ance" } | |
| 40 { "izer" fts5Porter_MGt0 "ize" } | |
| 41 { "logi" fts5Porter_MGt0 "log" } | |
| 42 { "bli" fts5Porter_MGt0 "ble" } | |
| 43 { "alli" fts5Porter_MGt0 "al" } | |
| 44 { "entli" fts5Porter_MGt0 "ent" } | |
| 45 { "eli" fts5Porter_MGt0 "e" } | |
| 46 { "ousli" fts5Porter_MGt0 "ous" } | |
| 47 { "ization" fts5Porter_MGt0 "ize" } | |
| 48 { "ation" fts5Porter_MGt0 "ate" } | |
| 49 { "ator" fts5Porter_MGt0 "ate" } | |
| 50 { "alism" fts5Porter_MGt0 "al" } | |
| 51 { "iveness" fts5Porter_MGt0 "ive" } | |
| 52 { "fulness" fts5Porter_MGt0 "ful" } | |
| 53 { "ousness" fts5Porter_MGt0 "ous" } | |
| 54 { "aliti" fts5Porter_MGt0 "al" } | |
| 55 { "iviti" fts5Porter_MGt0 "ive" } | |
| 56 { "biliti" fts5Porter_MGt0 "ble" } | |
| 57 } | |
| 58 | |
| 59 set O(Step3) { | |
| 60 { "icate" fts5Porter_MGt0 "ic" } | |
| 61 { "ative" fts5Porter_MGt0 "" } | |
| 62 { "alize" fts5Porter_MGt0 "al" } | |
| 63 { "iciti" fts5Porter_MGt0 "ic" } | |
| 64 { "ical" fts5Porter_MGt0 "ic" } | |
| 65 { "ful" fts5Porter_MGt0 "" } | |
| 66 { "ness" fts5Porter_MGt0 "" } | |
| 67 } | |
| 68 | |
| 69 set O(Step4) { | |
| 70 { "al" fts5Porter_MGt1 "" } | |
| 71 { "ance" fts5Porter_MGt1 "" } | |
| 72 { "ence" fts5Porter_MGt1 "" } | |
| 73 { "er" fts5Porter_MGt1 "" } | |
| 74 { "ic" fts5Porter_MGt1 "" } | |
| 75 { "able" fts5Porter_MGt1 "" } | |
| 76 { "ible" fts5Porter_MGt1 "" } | |
| 77 { "ant" fts5Porter_MGt1 "" } | |
| 78 { "ement" fts5Porter_MGt1 "" } | |
| 79 { "ment" fts5Porter_MGt1 "" } | |
| 80 { "ent" fts5Porter_MGt1 "" } | |
| 81 { "ion" fts5Porter_MGt1_and_S_or_T "" } | |
| 82 { "ou" fts5Porter_MGt1 "" } | |
| 83 { "ism" fts5Porter_MGt1 "" } | |
| 84 { "ate" fts5Porter_MGt1 "" } | |
| 85 { "iti" fts5Porter_MGt1 "" } | |
| 86 { "ous" fts5Porter_MGt1 "" } | |
| 87 { "ive" fts5Porter_MGt1 "" } | |
| 88 { "ize" fts5Porter_MGt1 "" } | |
| 89 } | |
| 90 | |
| 91 proc sort_cb {lhs rhs} { | |
| 92 set L [string range [lindex $lhs 0] end-1 end-1] | |
| 93 set R [string range [lindex $rhs 0] end-1 end-1] | |
| 94 string compare $L $R | |
| 95 } | |
| 96 | |
| 97 proc create_step_function {name data} { | |
| 98 | |
| 99 set T(function) { | |
| 100 static int fts5Porter${name}(char *aBuf, int *pnBuf){ | |
| 101 int ret = 0; | |
| 102 int nBuf = *pnBuf; | |
| 103 switch( aBuf[nBuf-2] ){ | |
| 104 ${switchbody} | |
| 105 } | |
| 106 return ret; | |
| 107 } | |
| 108 } | |
| 109 | |
| 110 set T(case) { | |
| 111 case '${k}': | |
| 112 ${ifstmts} | |
| 113 break; | |
| 114 } | |
| 115 | |
| 116 set T(if_0_0_0) { | |
| 117 if( ${match} ){ | |
| 118 *pnBuf = nBuf - $n; | |
| 119 } | |
| 120 } | |
| 121 set T(if_1_0_0) { | |
| 122 if( ${match} ){ | |
| 123 if( ${cond} ){ | |
| 124 *pnBuf = nBuf - $n; | |
| 125 } | |
| 126 } | |
| 127 } | |
| 128 set T(if_0_1_0) { | |
| 129 if( ${match} ){ | |
| 130 ${memcpy} | |
| 131 *pnBuf = nBuf - $n + $nRep; | |
| 132 } | |
| 133 } | |
| 134 set T(if_1_1_0) { | |
| 135 if( ${match} ){ | |
| 136 if( ${cond} ){ | |
| 137 ${memcpy} | |
| 138 *pnBuf = nBuf - $n + $nRep; | |
| 139 } | |
| 140 } | |
| 141 } | |
| 142 set T(if_1_0_1) { | |
| 143 if( ${match} ){ | |
| 144 if( ${cond} ){ | |
| 145 *pnBuf = nBuf - $n; | |
| 146 ret = 1; | |
| 147 } | |
| 148 } | |
| 149 } | |
| 150 set T(if_0_1_1) { | |
| 151 if( ${match} ){ | |
| 152 ${memcpy} | |
| 153 *pnBuf = nBuf - $n + $nRep; | |
| 154 ret = 1; | |
| 155 } | |
| 156 } | |
| 157 set T(if_1_1_1) { | |
| 158 if( ${match} ){ | |
| 159 if( ${cond} ){ | |
| 160 ${memcpy} | |
| 161 *pnBuf = nBuf - $n + $nRep; | |
| 162 ret = 1; | |
| 163 } | |
| 164 } | |
| 165 } | |
| 166 | |
| 167 set switchbody "" | |
| 168 | |
| 169 foreach I $data { | |
| 170 set k [string range [lindex $I 0] end-1 end-1] | |
| 171 lappend aCase($k) $I | |
| 172 } | |
| 173 foreach k [lsort [array names aCase]] { | |
| 174 set ifstmts "" | |
| 175 foreach I $aCase($k) { | |
| 176 set zSuffix [lindex $I 0] ;# Suffix text for this rule | |
| 177 set zRep [lindex $I 2] ;# Replacement text for rule | |
| 178 set xCond [lindex $I 1] ;# Condition callback (or "") | |
| 179 | |
| 180 set n [string length $zSuffix] | |
| 181 set nRep [string length $zRep] | |
| 182 | |
| 183 set match "nBuf>$n && 0==memcmp(\"$zSuffix\", &aBuf\[nBuf-$n\], $n)" | |
| 184 set memcpy "memcpy(&aBuf\[nBuf-$n\], \"$zRep\", $nRep);" | |
| 185 set cond "${xCond}(aBuf, nBuf-$n)" | |
| 186 | |
| 187 set bMemcpy [expr {$nRep>0}] | |
| 188 set bCond [expr {$xCond!=""}] | |
| 189 set bRet [expr {[llength $I]>3 && [lindex $I 3]}] | |
| 190 | |
| 191 set t $T(if_${bCond}_${bMemcpy}_${bRet}) | |
| 192 lappend ifstmts [string trim [subst -nocommands $t]] | |
| 193 } | |
| 194 | |
| 195 set ifstmts [join $ifstmts "else "] | |
| 196 | |
| 197 append switchbody [subst -nocommands $T(case)] | |
| 198 } | |
| 199 | |
| 200 | |
| 201 puts [subst -nocommands $T(function)] | |
| 202 } | |
| 203 | |
| 204 | |
| 205 puts [string trim { | |
| 206 /************************************************************************** | |
| 207 *************************************************************************** | |
| 208 ** GENERATED CODE STARTS HERE (mkportersteps.tcl) | |
| 209 */ | |
| 210 }] | |
| 211 foreach step [array names O] { | |
| 212 create_step_function $step $O($step) | |
| 213 } | |
| 214 puts [string trim { | |
| 215 /* | |
| 216 ** GENERATED CODE ENDS HERE (mkportersteps.tcl) | |
| 217 *************************************************************************** | |
| 218 **************************************************************************/ | |
| 219 }] | |
| 220 | |
| 221 | |
| 222 | |
| OLD | NEW |