OLD | NEW |
| (Empty) |
1 # 2014 Dec 19 | |
2 # | |
3 # The author disclaims copyright to this source code. In place of | |
4 # a legal notice, here is a blessing: | |
5 # | |
6 # May you do good and not evil. | |
7 # May you find forgiveness for yourself and forgive others. | |
8 # May you share freely, never taking more than you give. | |
9 # | |
10 #*********************************************************************** | |
11 # | |
12 | |
13 if {![info exists testdir]} { | |
14 set testdir [file join [file dirname [info script]] .. .. .. test] | |
15 } | |
16 source $testdir/tester.tcl | |
17 | |
18 catch { | |
19 sqlite3_fts5_may_be_corrupt 0 | |
20 reset_db | |
21 } | |
22 | |
23 proc fts5_test_poslist {cmd} { | |
24 set res [list] | |
25 for {set i 0} {$i < [$cmd xInstCount]} {incr i} { | |
26 lappend res [string map {{ } .} [$cmd xInst $i]] | |
27 } | |
28 set res | |
29 } | |
30 | |
31 proc fts5_test_columnsize {cmd} { | |
32 set res [list] | |
33 for {set i 0} {$i < [$cmd xColumnCount]} {incr i} { | |
34 lappend res [$cmd xColumnSize $i] | |
35 } | |
36 set res | |
37 } | |
38 | |
39 proc fts5_test_columntext {cmd} { | |
40 set res [list] | |
41 for {set i 0} {$i < [$cmd xColumnCount]} {incr i} { | |
42 lappend res [$cmd xColumnText $i] | |
43 } | |
44 set res | |
45 } | |
46 | |
47 proc fts5_test_columntotalsize {cmd} { | |
48 set res [list] | |
49 for {set i 0} {$i < [$cmd xColumnCount]} {incr i} { | |
50 lappend res [$cmd xColumnTotalSize $i] | |
51 } | |
52 set res | |
53 } | |
54 | |
55 proc test_append_token {varname token iStart iEnd} { | |
56 upvar $varname var | |
57 lappend var $token | |
58 return "SQLITE_OK" | |
59 } | |
60 proc fts5_test_tokenize {cmd} { | |
61 set res [list] | |
62 for {set i 0} {$i < [$cmd xColumnCount]} {incr i} { | |
63 set tokens [list] | |
64 $cmd xTokenize [$cmd xColumnText $i] [list test_append_token tokens] | |
65 lappend res $tokens | |
66 } | |
67 set res | |
68 } | |
69 | |
70 proc fts5_test_rowcount {cmd} { | |
71 $cmd xRowCount | |
72 } | |
73 | |
74 proc test_queryphrase_cb {cnt cmd} { | |
75 upvar $cnt L | |
76 for {set i 0} {$i < [$cmd xInstCount]} {incr i} { | |
77 foreach {ip ic io} [$cmd xInst $i] break | |
78 set A($ic) 1 | |
79 } | |
80 foreach ic [array names A] { | |
81 lset L $ic [expr {[lindex $L $ic] + 1}] | |
82 } | |
83 } | |
84 proc fts5_test_queryphrase {cmd} { | |
85 set res [list] | |
86 for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} { | |
87 set cnt [list] | |
88 for {set j 0} {$j < [$cmd xColumnCount]} {incr j} { lappend cnt 0 } | |
89 $cmd xQueryPhrase $i [list test_queryphrase_cb cnt] | |
90 lappend res $cnt | |
91 } | |
92 set res | |
93 } | |
94 | |
95 proc fts5_test_phrasecount {cmd} { | |
96 $cmd xPhraseCount | |
97 } | |
98 | |
99 proc fts5_test_all {cmd} { | |
100 set res [list] | |
101 lappend res columnsize [fts5_test_columnsize $cmd] | |
102 lappend res columntext [fts5_test_columntext $cmd] | |
103 lappend res columntotalsize [fts5_test_columntotalsize $cmd] | |
104 lappend res poslist [fts5_test_poslist $cmd] | |
105 lappend res tokenize [fts5_test_tokenize $cmd] | |
106 lappend res rowcount [fts5_test_rowcount $cmd] | |
107 set res | |
108 } | |
109 | |
110 proc fts5_aux_test_functions {db} { | |
111 foreach f { | |
112 fts5_test_columnsize | |
113 fts5_test_columntext | |
114 fts5_test_columntotalsize | |
115 fts5_test_poslist | |
116 fts5_test_tokenize | |
117 fts5_test_rowcount | |
118 fts5_test_all | |
119 | |
120 fts5_test_queryphrase | |
121 fts5_test_phrasecount | |
122 } { | |
123 sqlite3_fts5_create_function $db $f $f | |
124 } | |
125 } | |
126 | |
127 proc fts5_level_segs {tbl} { | |
128 set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10" | |
129 set ret [list] | |
130 foreach L [lrange [db one $sql] 1 end] { | |
131 lappend ret [expr [llength $L] - 3] | |
132 } | |
133 set ret | |
134 } | |
135 | |
136 proc fts5_level_segids {tbl} { | |
137 set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10" | |
138 set ret [list] | |
139 foreach L [lrange [db one $sql] 1 end] { | |
140 set lvl [list] | |
141 foreach S [lrange $L 3 end] { | |
142 regexp {id=([1234567890]*)} $S -> segid | |
143 lappend lvl $segid | |
144 } | |
145 lappend ret $lvl | |
146 } | |
147 set ret | |
148 } | |
149 | |
150 proc fts5_rnddoc {n} { | |
151 set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j] | |
152 set doc [list] | |
153 for {set i 0} {$i < $n} {incr i} { | |
154 lappend doc "x[string map $map [format %.3d [expr int(rand()*1000)]]]" | |
155 } | |
156 set doc | |
157 } | |
158 | |
159 #------------------------------------------------------------------------- | |
160 # Usage: | |
161 # | |
162 # nearset aCol ?-pc VARNAME? ?-near N? ?-col C? -- phrase1 phrase2... | |
163 # | |
164 # This command is used to test if a document (set of column values) matches | |
165 # the logical equivalent of a single FTS5 NEAR() clump and, if so, return | |
166 # the equivalent of an FTS5 position list. | |
167 # | |
168 # Parameter $aCol is passed a list of the column values for the document | |
169 # to test. Parameters $phrase1 and so on are the phrases. | |
170 # | |
171 # The result is a list of phrase hits. Each phrase hit is formatted as | |
172 # three integers separated by "." characters, in the following format: | |
173 # | |
174 # <phrase number> . <column number> . <token offset> | |
175 # | |
176 # Options: | |
177 # | |
178 # -near N (NEAR distance. Default 10) | |
179 # -col C (List of column indexes to match against) | |
180 # -pc VARNAME (variable in caller frame to use for phrase numbering) | |
181 # | |
182 proc nearset {aCol args} { | |
183 set O(-near) 10 | |
184 set O(-col) {} | |
185 set O(-pc) "" | |
186 | |
187 set nOpt [lsearch -exact $args --] | |
188 if {$nOpt<0} { error "no -- option" } | |
189 | |
190 foreach {k v} [lrange $args 0 [expr $nOpt-1]] { | |
191 if {[info exists O($k)]==0} { error "unrecognized option $k" } | |
192 set O($k) $v | |
193 } | |
194 | |
195 if {$O(-pc) == ""} { | |
196 set counter 0 | |
197 } else { | |
198 upvar $O(-pc) counter | |
199 } | |
200 | |
201 # Set $phraselist to be a list of phrases. $nPhrase its length. | |
202 set phraselist [lrange $args [expr $nOpt+1] end] | |
203 set nPhrase [llength $phraselist] | |
204 | |
205 for {set j 0} {$j < [llength $aCol]} {incr j} { | |
206 for {set i 0} {$i < $nPhrase} {incr i} { | |
207 set A($j,$i) [list] | |
208 } | |
209 } | |
210 | |
211 set iCol -1 | |
212 foreach col $aCol { | |
213 incr iCol | |
214 if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue | |
215 set nToken [llength $col] | |
216 | |
217 set iFL [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)] | |
218 for { } {$iFL < $nToken} {incr iFL} { | |
219 for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { | |
220 set B($iPhrase) [list] | |
221 } | |
222 | |
223 for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { | |
224 set p [lindex $phraselist $iPhrase] | |
225 set nPm1 [expr {[llength $p] - 1}] | |
226 set iFirst [expr $iFL - $O(-near) - [llength $p]] | |
227 | |
228 for {set i $iFirst} {$i <= $iFL} {incr i} { | |
229 if {[lrange $col $i [expr $i+$nPm1]] == $p} { lappend B($iPhrase) $i } | |
230 } | |
231 if {[llength $B($iPhrase)] == 0} break | |
232 } | |
233 | |
234 if {$iPhrase==$nPhrase} { | |
235 for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { | |
236 set A($iCol,$iPhrase) [concat $A($iCol,$iPhrase) $B($iPhrase)] | |
237 set A($iCol,$iPhrase) [lsort -integer -uniq $A($iCol,$iPhrase)] | |
238 } | |
239 } | |
240 } | |
241 } | |
242 | |
243 set res [list] | |
244 #puts [array names A] | |
245 | |
246 for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { | |
247 for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} { | |
248 foreach a $A($iCol,$iPhrase) { | |
249 lappend res "$counter.$iCol.$a" | |
250 } | |
251 } | |
252 incr counter | |
253 } | |
254 | |
255 #puts $res | |
256 sort_poslist $res | |
257 } | |
258 | |
259 #------------------------------------------------------------------------- | |
260 # Usage: | |
261 # | |
262 # sort_poslist LIST | |
263 # | |
264 # Sort a position list of the type returned by command [nearset] | |
265 # | |
266 proc sort_poslist {L} { | |
267 lsort -command instcompare $L | |
268 } | |
269 proc instcompare {lhs rhs} { | |
270 foreach {p1 c1 o1} [split $lhs .] {} | |
271 foreach {p2 c2 o2} [split $rhs .] {} | |
272 | |
273 set res [expr $c1 - $c2] | |
274 if {$res==0} { set res [expr $o1 - $o2] } | |
275 if {$res==0} { set res [expr $p1 - $p2] } | |
276 | |
277 return $res | |
278 } | |
279 | |
280 #------------------------------------------------------------------------- | |
281 # Logical operators used by the commands returned by fts5_tcl_expr(). | |
282 # | |
283 proc AND {args} { | |
284 foreach a $args { | |
285 if {[llength $a]==0} { return [list] } | |
286 } | |
287 sort_poslist [concat {*}$args] | |
288 } | |
289 proc OR {args} { | |
290 sort_poslist [concat {*}$args] | |
291 } | |
292 proc NOT {a b} { | |
293 if {[llength $b]>0} { return [list] } | |
294 return $a | |
295 } | |
296 | |
297 #------------------------------------------------------------------------- | |
298 # This command is similar to [split], except that it also provides the | |
299 # start and end offsets of each token. For example: | |
300 # | |
301 # [fts5_tokenize_split "abc d ef"] -> {abc 0 3 d 4 5 ef 6 8} | |
302 # | |
303 | |
304 proc gobble_whitespace {textvar} { | |
305 upvar $textvar t | |
306 regexp {([ ]*)(.*)} $t -> space t | |
307 return [string length $space] | |
308 } | |
309 | |
310 proc gobble_text {textvar wordvar} { | |
311 upvar $textvar t | |
312 upvar $wordvar w | |
313 regexp {([^ ]*)(.*)} $t -> w t | |
314 return [string length $w] | |
315 } | |
316 | |
317 proc fts5_tokenize_split {text} { | |
318 set token "" | |
319 set ret [list] | |
320 set iOff [gobble_whitespace text] | |
321 while {[set nToken [gobble_text text word]]} { | |
322 lappend ret $word $iOff [expr $iOff+$nToken] | |
323 incr iOff $nToken | |
324 incr iOff [gobble_whitespace text] | |
325 } | |
326 | |
327 set ret | |
328 } | |
329 | |
OLD | NEW |