OLD | NEW |
(Empty) | |
| 1 # |
| 2 # 2014 Jun 09 |
| 3 # |
| 4 # The author disclaims copyright to this source code. In place of |
| 5 # a legal notice, here is a blessing: |
| 6 # |
| 7 # May you do good and not evil. |
| 8 # May you find forgiveness for yourself and forgive others. |
| 9 # May you share freely, never taking more than you give. |
| 10 # |
| 11 #------------------------------------------------------------------------- |
| 12 # |
| 13 # This script generates the implementations of the following C functions, |
| 14 # which are part of the porter tokenizer implementation: |
| 15 # |
| 16 # static int fts5PorterStep1B(char *aBuf, int *pnBuf); |
| 17 # static int fts5PorterStep1B2(char *aBuf, int *pnBuf); |
| 18 # static int fts5PorterStep2(char *aBuf, int *pnBuf); |
| 19 # static int fts5PorterStep3(char *aBuf, int *pnBuf); |
| 20 # static int fts5PorterStep4(char *aBuf, int *pnBuf); |
| 21 # |
| 22 |
| 23 set O(Step1B2) { |
| 24 { at {} ate 1 } |
| 25 { bl {} ble 1 } |
| 26 { iz {} ize 1 } |
| 27 } |
| 28 |
| 29 set O(Step1B) { |
| 30 { "eed" fts5Porter_MGt0 "ee" 0 } |
| 31 { "ed" fts5Porter_Vowel "" 1 } |
| 32 { "ing" fts5Porter_Vowel "" 1 } |
| 33 } |
| 34 |
| 35 set O(Step2) { |
| 36 { "ational" fts5Porter_MGt0 "ate" } |
| 37 { "tional" fts5Porter_MGt0 "tion" } |
| 38 { "enci" fts5Porter_MGt0 "ence" } |
| 39 { "anci" fts5Porter_MGt0 "ance" } |
| 40 { "izer" fts5Porter_MGt0 "ize" } |
| 41 { "logi" fts5Porter_MGt0 "log" } |
| 42 { "bli" fts5Porter_MGt0 "ble" } |
| 43 { "alli" fts5Porter_MGt0 "al" } |
| 44 { "entli" fts5Porter_MGt0 "ent" } |
| 45 { "eli" fts5Porter_MGt0 "e" } |
| 46 { "ousli" fts5Porter_MGt0 "ous" } |
| 47 { "ization" fts5Porter_MGt0 "ize" } |
| 48 { "ation" fts5Porter_MGt0 "ate" } |
| 49 { "ator" fts5Porter_MGt0 "ate" } |
| 50 { "alism" fts5Porter_MGt0 "al" } |
| 51 { "iveness" fts5Porter_MGt0 "ive" } |
| 52 { "fulness" fts5Porter_MGt0 "ful" } |
| 53 { "ousness" fts5Porter_MGt0 "ous" } |
| 54 { "aliti" fts5Porter_MGt0 "al" } |
| 55 { "iviti" fts5Porter_MGt0 "ive" } |
| 56 { "biliti" fts5Porter_MGt0 "ble" } |
| 57 } |
| 58 |
| 59 set O(Step3) { |
| 60 { "icate" fts5Porter_MGt0 "ic" } |
| 61 { "ative" fts5Porter_MGt0 "" } |
| 62 { "alize" fts5Porter_MGt0 "al" } |
| 63 { "iciti" fts5Porter_MGt0 "ic" } |
| 64 { "ical" fts5Porter_MGt0 "ic" } |
| 65 { "ful" fts5Porter_MGt0 "" } |
| 66 { "ness" fts5Porter_MGt0 "" } |
| 67 } |
| 68 |
| 69 set O(Step4) { |
| 70 { "al" fts5Porter_MGt1 "" } |
| 71 { "ance" fts5Porter_MGt1 "" } |
| 72 { "ence" fts5Porter_MGt1 "" } |
| 73 { "er" fts5Porter_MGt1 "" } |
| 74 { "ic" fts5Porter_MGt1 "" } |
| 75 { "able" fts5Porter_MGt1 "" } |
| 76 { "ible" fts5Porter_MGt1 "" } |
| 77 { "ant" fts5Porter_MGt1 "" } |
| 78 { "ement" fts5Porter_MGt1 "" } |
| 79 { "ment" fts5Porter_MGt1 "" } |
| 80 { "ent" fts5Porter_MGt1 "" } |
| 81 { "ion" fts5Porter_MGt1_and_S_or_T "" } |
| 82 { "ou" fts5Porter_MGt1 "" } |
| 83 { "ism" fts5Porter_MGt1 "" } |
| 84 { "ate" fts5Porter_MGt1 "" } |
| 85 { "iti" fts5Porter_MGt1 "" } |
| 86 { "ous" fts5Porter_MGt1 "" } |
| 87 { "ive" fts5Porter_MGt1 "" } |
| 88 { "ize" fts5Porter_MGt1 "" } |
| 89 } |
| 90 |
| 91 proc sort_cb {lhs rhs} { |
| 92 set L [string range [lindex $lhs 0] end-1 end-1] |
| 93 set R [string range [lindex $rhs 0] end-1 end-1] |
| 94 string compare $L $R |
| 95 } |
| 96 |
| 97 proc create_step_function {name data} { |
| 98 |
| 99 set T(function) { |
| 100 static int fts5Porter${name}(char *aBuf, int *pnBuf){ |
| 101 int ret = 0; |
| 102 int nBuf = *pnBuf; |
| 103 switch( aBuf[nBuf-2] ){ |
| 104 ${switchbody} |
| 105 } |
| 106 return ret; |
| 107 } |
| 108 } |
| 109 |
| 110 set T(case) { |
| 111 case '${k}': |
| 112 ${ifstmts} |
| 113 break; |
| 114 } |
| 115 |
| 116 set T(if_0_0_0) { |
| 117 if( ${match} ){ |
| 118 *pnBuf = nBuf - $n; |
| 119 } |
| 120 } |
| 121 set T(if_1_0_0) { |
| 122 if( ${match} ){ |
| 123 if( ${cond} ){ |
| 124 *pnBuf = nBuf - $n; |
| 125 } |
| 126 } |
| 127 } |
| 128 set T(if_0_1_0) { |
| 129 if( ${match} ){ |
| 130 ${memcpy} |
| 131 *pnBuf = nBuf - $n + $nRep; |
| 132 } |
| 133 } |
| 134 set T(if_1_1_0) { |
| 135 if( ${match} ){ |
| 136 if( ${cond} ){ |
| 137 ${memcpy} |
| 138 *pnBuf = nBuf - $n + $nRep; |
| 139 } |
| 140 } |
| 141 } |
| 142 set T(if_1_0_1) { |
| 143 if( ${match} ){ |
| 144 if( ${cond} ){ |
| 145 *pnBuf = nBuf - $n; |
| 146 ret = 1; |
| 147 } |
| 148 } |
| 149 } |
| 150 set T(if_0_1_1) { |
| 151 if( ${match} ){ |
| 152 ${memcpy} |
| 153 *pnBuf = nBuf - $n + $nRep; |
| 154 ret = 1; |
| 155 } |
| 156 } |
| 157 set T(if_1_1_1) { |
| 158 if( ${match} ){ |
| 159 if( ${cond} ){ |
| 160 ${memcpy} |
| 161 *pnBuf = nBuf - $n + $nRep; |
| 162 ret = 1; |
| 163 } |
| 164 } |
| 165 } |
| 166 |
| 167 set switchbody "" |
| 168 |
| 169 foreach I $data { |
| 170 set k [string range [lindex $I 0] end-1 end-1] |
| 171 lappend aCase($k) $I |
| 172 } |
| 173 foreach k [lsort [array names aCase]] { |
| 174 set ifstmts "" |
| 175 foreach I $aCase($k) { |
| 176 set zSuffix [lindex $I 0] ;# Suffix text for this rule |
| 177 set zRep [lindex $I 2] ;# Replacement text for rule |
| 178 set xCond [lindex $I 1] ;# Condition callback (or "") |
| 179 |
| 180 set n [string length $zSuffix] |
| 181 set nRep [string length $zRep] |
| 182 |
| 183 set match "nBuf>$n && 0==memcmp(\"$zSuffix\", &aBuf\[nBuf-$n\], $n)" |
| 184 set memcpy "memcpy(&aBuf\[nBuf-$n\], \"$zRep\", $nRep);" |
| 185 set cond "${xCond}(aBuf, nBuf-$n)" |
| 186 |
| 187 set bMemcpy [expr {$nRep>0}] |
| 188 set bCond [expr {$xCond!=""}] |
| 189 set bRet [expr {[llength $I]>3 && [lindex $I 3]}] |
| 190 |
| 191 set t $T(if_${bCond}_${bMemcpy}_${bRet}) |
| 192 lappend ifstmts [string trim [subst -nocommands $t]] |
| 193 } |
| 194 |
| 195 set ifstmts [join $ifstmts "else "] |
| 196 |
| 197 append switchbody [subst -nocommands $T(case)] |
| 198 } |
| 199 |
| 200 |
| 201 puts [subst -nocommands $T(function)] |
| 202 } |
| 203 |
| 204 |
| 205 puts [string trim { |
| 206 /************************************************************************** |
| 207 *************************************************************************** |
| 208 ** GENERATED CODE STARTS HERE (mkportersteps.tcl) |
| 209 */ |
| 210 }] |
| 211 foreach step [array names O] { |
| 212 create_step_function $step $O($step) |
| 213 } |
| 214 puts [string trim { |
| 215 /* |
| 216 ** GENERATED CODE ENDS HERE (mkportersteps.tcl) |
| 217 *************************************************************************** |
| 218 **************************************************************************/ |
| 219 }] |
| 220 |
| 221 |
| 222 |
OLD | NEW |