OLD | NEW |
| (Empty) |
1 # | |
2 # 2014 Jun 09 | |
3 # | |
4 # The author disclaims copyright to this source code. In place of | |
5 # a legal notice, here is a blessing: | |
6 # | |
7 # May you do good and not evil. | |
8 # May you find forgiveness for yourself and forgive others. | |
9 # May you share freely, never taking more than you give. | |
10 # | |
11 #------------------------------------------------------------------------- | |
12 # | |
13 # This script generates the implementations of the following C functions, | |
14 # which are part of the porter tokenizer implementation: | |
15 # | |
16 # static int fts5PorterStep1B(char *aBuf, int *pnBuf); | |
17 # static int fts5PorterStep1B2(char *aBuf, int *pnBuf); | |
18 # static int fts5PorterStep2(char *aBuf, int *pnBuf); | |
19 # static int fts5PorterStep3(char *aBuf, int *pnBuf); | |
20 # static int fts5PorterStep4(char *aBuf, int *pnBuf); | |
21 # | |
22 | |
23 set O(Step1B2) { | |
24 { at {} ate 1 } | |
25 { bl {} ble 1 } | |
26 { iz {} ize 1 } | |
27 } | |
28 | |
29 set O(Step1B) { | |
30 { "eed" fts5Porter_MGt0 "ee" 0 } | |
31 { "ed" fts5Porter_Vowel "" 1 } | |
32 { "ing" fts5Porter_Vowel "" 1 } | |
33 } | |
34 | |
35 set O(Step2) { | |
36 { "ational" fts5Porter_MGt0 "ate" } | |
37 { "tional" fts5Porter_MGt0 "tion" } | |
38 { "enci" fts5Porter_MGt0 "ence" } | |
39 { "anci" fts5Porter_MGt0 "ance" } | |
40 { "izer" fts5Porter_MGt0 "ize" } | |
41 { "logi" fts5Porter_MGt0 "log" } | |
42 { "bli" fts5Porter_MGt0 "ble" } | |
43 { "alli" fts5Porter_MGt0 "al" } | |
44 { "entli" fts5Porter_MGt0 "ent" } | |
45 { "eli" fts5Porter_MGt0 "e" } | |
46 { "ousli" fts5Porter_MGt0 "ous" } | |
47 { "ization" fts5Porter_MGt0 "ize" } | |
48 { "ation" fts5Porter_MGt0 "ate" } | |
49 { "ator" fts5Porter_MGt0 "ate" } | |
50 { "alism" fts5Porter_MGt0 "al" } | |
51 { "iveness" fts5Porter_MGt0 "ive" } | |
52 { "fulness" fts5Porter_MGt0 "ful" } | |
53 { "ousness" fts5Porter_MGt0 "ous" } | |
54 { "aliti" fts5Porter_MGt0 "al" } | |
55 { "iviti" fts5Porter_MGt0 "ive" } | |
56 { "biliti" fts5Porter_MGt0 "ble" } | |
57 } | |
58 | |
59 set O(Step3) { | |
60 { "icate" fts5Porter_MGt0 "ic" } | |
61 { "ative" fts5Porter_MGt0 "" } | |
62 { "alize" fts5Porter_MGt0 "al" } | |
63 { "iciti" fts5Porter_MGt0 "ic" } | |
64 { "ical" fts5Porter_MGt0 "ic" } | |
65 { "ful" fts5Porter_MGt0 "" } | |
66 { "ness" fts5Porter_MGt0 "" } | |
67 } | |
68 | |
69 set O(Step4) { | |
70 { "al" fts5Porter_MGt1 "" } | |
71 { "ance" fts5Porter_MGt1 "" } | |
72 { "ence" fts5Porter_MGt1 "" } | |
73 { "er" fts5Porter_MGt1 "" } | |
74 { "ic" fts5Porter_MGt1 "" } | |
75 { "able" fts5Porter_MGt1 "" } | |
76 { "ible" fts5Porter_MGt1 "" } | |
77 { "ant" fts5Porter_MGt1 "" } | |
78 { "ement" fts5Porter_MGt1 "" } | |
79 { "ment" fts5Porter_MGt1 "" } | |
80 { "ent" fts5Porter_MGt1 "" } | |
81 { "ion" fts5Porter_MGt1_and_S_or_T "" } | |
82 { "ou" fts5Porter_MGt1 "" } | |
83 { "ism" fts5Porter_MGt1 "" } | |
84 { "ate" fts5Porter_MGt1 "" } | |
85 { "iti" fts5Porter_MGt1 "" } | |
86 { "ous" fts5Porter_MGt1 "" } | |
87 { "ive" fts5Porter_MGt1 "" } | |
88 { "ize" fts5Porter_MGt1 "" } | |
89 } | |
90 | |
91 proc sort_cb {lhs rhs} { | |
92 set L [string range [lindex $lhs 0] end-1 end-1] | |
93 set R [string range [lindex $rhs 0] end-1 end-1] | |
94 string compare $L $R | |
95 } | |
96 | |
97 proc create_step_function {name data} { | |
98 | |
99 set T(function) { | |
100 static int fts5Porter${name}(char *aBuf, int *pnBuf){ | |
101 int ret = 0; | |
102 int nBuf = *pnBuf; | |
103 switch( aBuf[nBuf-2] ){ | |
104 ${switchbody} | |
105 } | |
106 return ret; | |
107 } | |
108 } | |
109 | |
110 set T(case) { | |
111 case '${k}': | |
112 ${ifstmts} | |
113 break; | |
114 } | |
115 | |
116 set T(if_0_0_0) { | |
117 if( ${match} ){ | |
118 *pnBuf = nBuf - $n; | |
119 } | |
120 } | |
121 set T(if_1_0_0) { | |
122 if( ${match} ){ | |
123 if( ${cond} ){ | |
124 *pnBuf = nBuf - $n; | |
125 } | |
126 } | |
127 } | |
128 set T(if_0_1_0) { | |
129 if( ${match} ){ | |
130 ${memcpy} | |
131 *pnBuf = nBuf - $n + $nRep; | |
132 } | |
133 } | |
134 set T(if_1_1_0) { | |
135 if( ${match} ){ | |
136 if( ${cond} ){ | |
137 ${memcpy} | |
138 *pnBuf = nBuf - $n + $nRep; | |
139 } | |
140 } | |
141 } | |
142 set T(if_1_0_1) { | |
143 if( ${match} ){ | |
144 if( ${cond} ){ | |
145 *pnBuf = nBuf - $n; | |
146 ret = 1; | |
147 } | |
148 } | |
149 } | |
150 set T(if_0_1_1) { | |
151 if( ${match} ){ | |
152 ${memcpy} | |
153 *pnBuf = nBuf - $n + $nRep; | |
154 ret = 1; | |
155 } | |
156 } | |
157 set T(if_1_1_1) { | |
158 if( ${match} ){ | |
159 if( ${cond} ){ | |
160 ${memcpy} | |
161 *pnBuf = nBuf - $n + $nRep; | |
162 ret = 1; | |
163 } | |
164 } | |
165 } | |
166 | |
167 set switchbody "" | |
168 | |
169 foreach I $data { | |
170 set k [string range [lindex $I 0] end-1 end-1] | |
171 lappend aCase($k) $I | |
172 } | |
173 foreach k [lsort [array names aCase]] { | |
174 set ifstmts "" | |
175 foreach I $aCase($k) { | |
176 set zSuffix [lindex $I 0] ;# Suffix text for this rule | |
177 set zRep [lindex $I 2] ;# Replacement text for rule | |
178 set xCond [lindex $I 1] ;# Condition callback (or "") | |
179 | |
180 set n [string length $zSuffix] | |
181 set nRep [string length $zRep] | |
182 | |
183 set match "nBuf>$n && 0==memcmp(\"$zSuffix\", &aBuf\[nBuf-$n\], $n)" | |
184 set memcpy "memcpy(&aBuf\[nBuf-$n\], \"$zRep\", $nRep);" | |
185 set cond "${xCond}(aBuf, nBuf-$n)" | |
186 | |
187 set bMemcpy [expr {$nRep>0}] | |
188 set bCond [expr {$xCond!=""}] | |
189 set bRet [expr {[llength $I]>3 && [lindex $I 3]}] | |
190 | |
191 set t $T(if_${bCond}_${bMemcpy}_${bRet}) | |
192 lappend ifstmts [string trim [subst -nocommands $t]] | |
193 } | |
194 | |
195 set ifstmts [join $ifstmts "else "] | |
196 | |
197 append switchbody [subst -nocommands $T(case)] | |
198 } | |
199 | |
200 | |
201 puts [subst -nocommands $T(function)] | |
202 } | |
203 | |
204 | |
205 puts [string trim { | |
206 /************************************************************************** | |
207 *************************************************************************** | |
208 ** GENERATED CODE STARTS HERE (mkportersteps.tcl) | |
209 */ | |
210 }] | |
211 foreach step [array names O] { | |
212 create_step_function $step $O($step) | |
213 } | |
214 puts [string trim { | |
215 /* | |
216 ** GENERATED CODE ENDS HERE (mkportersteps.tcl) | |
217 *************************************************************************** | |
218 **************************************************************************/ | |
219 }] | |
220 | |
221 | |
222 | |
OLD | NEW |