OLD | NEW |
1 # 2009 November 04 | 1 # 2009 November 04 |
2 # | 2 # |
3 # The author disclaims copyright to this source code. In place of | 3 # The author disclaims copyright to this source code. In place of |
4 # a legal notice, here is a blessing: | 4 # a legal notice, here is a blessing: |
5 # | 5 # |
6 # May you do good and not evil. | 6 # May you do good and not evil. |
7 # May you find forgiveness for yourself and forgive others. | 7 # May you find forgiveness for yourself and forgive others. |
8 # May you share freely, never taking more than you give. | 8 # May you share freely, never taking more than you give. |
9 # | 9 # |
10 #*********************************************************************** | 10 #*********************************************************************** |
11 # | 11 # |
12 # This file contains common code used the fts3 tests. At one point | 12 # This file contains common code used the fts3 tests. At one point |
13 # equivalent functionality was implemented in C code. But it is easier | 13 # equivalent functionality was implemented in C code. But it is easier |
14 # to use Tcl. | 14 # to use Tcl. |
15 # | 15 # |
16 | 16 |
17 #------------------------------------------------------------------------- | 17 #------------------------------------------------------------------------- |
| 18 # INSTRUCTIONS |
| 19 # |
| 20 # The following commands are available: |
| 21 # |
| 22 # fts3_build_db_1 N |
| 23 # Using database handle [db] create an FTS4 table named t1 and populate |
| 24 # it with N rows of data. N must be less than 10,000. Refer to the |
| 25 # header comments above the proc implementation below for details. |
| 26 # |
| 27 # fts3_build_db_2 N |
| 28 # Using database handle [db] create an FTS4 table named t2 and populate |
| 29 # it with N rows of data. N must be less than 100,000. Refer to the |
| 30 # header comments above the proc implementation below for details. |
| 31 # |
| 32 # fts3_integrity_check TBL |
| 33 # TBL must be an FTS table in the database currently opened by handle |
| 34 # [db]. This proc loads and tokenizes all documents within the table, |
| 35 # then checks that the current contents of the FTS index matches the |
| 36 # results. |
| 37 # |
| 38 # fts3_terms TBL WHERE |
| 39 # Todo. |
| 40 # |
| 41 # fts3_doclist TBL TERM WHERE |
| 42 # Todo. |
| 43 # |
| 44 # |
| 45 # |
| 46 |
| 47 #------------------------------------------------------------------------- |
| 48 # USAGE: fts3_build_db_1 SWITCHES N |
| 49 # |
| 50 # Build a sample FTS table in the database opened by database connection |
| 51 # [db]. The name of the new table is "t1". |
| 52 # |
| 53 proc fts3_build_db_1 {args} { |
| 54 |
| 55 set default(-module) fts4 |
| 56 |
| 57 set nArg [llength $args] |
| 58 if {($nArg%2)==0} { |
| 59 error "wrong # args: should be \"fts3_build_db_1 ?switches? n\"" |
| 60 } |
| 61 |
| 62 set n [lindex $args [expr $nArg-1]] |
| 63 array set opts [array get default] |
| 64 array set opts [lrange $args 0 [expr $nArg-2]] |
| 65 foreach k [array names opts] { |
| 66 if {0==[info exists default($k)]} { error "unknown option: $k" } |
| 67 } |
| 68 |
| 69 if {$n > 10000} {error "n must be <= 10000"} |
| 70 db eval "CREATE VIRTUAL TABLE t1 USING $opts(-module) (x, y)" |
| 71 |
| 72 set xwords [list zero one two three four five six seven eight nine ten] |
| 73 set ywords [list alpha beta gamma delta epsilon zeta eta theta iota kappa] |
| 74 |
| 75 for {set i 0} {$i < $n} {incr i} { |
| 76 set x "" |
| 77 set y "" |
| 78 |
| 79 set x [list] |
| 80 lappend x [lindex $xwords [expr ($i / 1000) % 10]] |
| 81 lappend x [lindex $xwords [expr ($i / 100) % 10]] |
| 82 lappend x [lindex $xwords [expr ($i / 10) % 10]] |
| 83 lappend x [lindex $xwords [expr ($i / 1) % 10]] |
| 84 |
| 85 set y [list] |
| 86 lappend y [lindex $ywords [expr ($i / 1000) % 10]] |
| 87 lappend y [lindex $ywords [expr ($i / 100) % 10]] |
| 88 lappend y [lindex $ywords [expr ($i / 10) % 10]] |
| 89 lappend y [lindex $ywords [expr ($i / 1) % 10]] |
| 90 |
| 91 db eval { INSERT INTO t1(docid, x, y) VALUES($i, $x, $y) } |
| 92 } |
| 93 } |
| 94 |
| 95 #------------------------------------------------------------------------- |
| 96 # USAGE: fts3_build_db_2 N ARGS |
| 97 # |
| 98 # Build a sample FTS table in the database opened by database connection |
| 99 # [db]. The name of the new table is "t2". |
| 100 # |
| 101 proc fts3_build_db_2 {args} { |
| 102 |
| 103 set default(-module) fts4 |
| 104 set default(-extra) "" |
| 105 |
| 106 set nArg [llength $args] |
| 107 if {($nArg%2)==0} { |
| 108 error "wrong # args: should be \"fts3_build_db_1 ?switches? n\"" |
| 109 } |
| 110 |
| 111 set n [lindex $args [expr $nArg-1]] |
| 112 array set opts [array get default] |
| 113 array set opts [lrange $args 0 [expr $nArg-2]] |
| 114 foreach k [array names opts] { |
| 115 if {0==[info exists default($k)]} { error "unknown option: $k" } |
| 116 } |
| 117 |
| 118 if {$n > 100000} {error "n must be <= 100000"} |
| 119 |
| 120 set sql "CREATE VIRTUAL TABLE t2 USING $opts(-module) (content" |
| 121 if {$opts(-extra) != ""} { |
| 122 append sql ", " $opts(-extra) |
| 123 } |
| 124 append sql ")" |
| 125 db eval $sql |
| 126 |
| 127 set chars [list a b c d e f g h i j k l m n o p q r s t u v w x y z ""] |
| 128 |
| 129 for {set i 0} {$i < $n} {incr i} { |
| 130 set word "" |
| 131 set nChar [llength $chars] |
| 132 append word [lindex $chars [expr {($i / 1) % $nChar}]] |
| 133 append word [lindex $chars [expr {($i / $nChar) % $nChar}]] |
| 134 append word [lindex $chars [expr {($i / ($nChar*$nChar)) % $nChar}]] |
| 135 |
| 136 db eval { INSERT INTO t2(docid, content) VALUES($i, $word) } |
| 137 } |
| 138 } |
| 139 |
| 140 #------------------------------------------------------------------------- |
18 # USAGE: fts3_integrity_check TBL | 141 # USAGE: fts3_integrity_check TBL |
19 # | 142 # |
20 # This proc is used to verify that the full-text index is consistent with | 143 # This proc is used to verify that the full-text index is consistent with |
21 # the contents of the fts3 table. In other words, it checks that the | 144 # the contents of the fts3 table. In other words, it checks that the |
22 # data in the %_contents table matches that in the %_segdir and %_segments | 145 # data in the %_contents table matches that in the %_segdir and %_segments |
23 # tables. | 146 # tables. |
24 # | 147 # |
25 # This is not an efficient procedure. It uses a lot of memory and a lot | 148 # This is not an efficient procedure. It uses a lot of memory and a lot |
26 # of CPU. But it is better than not checking at all. | 149 # of CPU. But it is better than not checking at all. |
27 # | 150 # |
(...skipping 11 matching lines...) Expand all Loading... |
39 # [unset] the $C array entry. | 162 # [unset] the $C array entry. |
40 # | 163 # |
41 # 3) Check that array $C is now empty. | 164 # 3) Check that array $C is now empty. |
42 # | 165 # |
43 # | 166 # |
44 proc fts3_integrity_check {tbl} { | 167 proc fts3_integrity_check {tbl} { |
45 | 168 |
46 fts3_read2 $tbl 1 A | 169 fts3_read2 $tbl 1 A |
47 | 170 |
48 foreach zTerm [array names A] { | 171 foreach zTerm [array names A] { |
| 172 #puts $zTerm |
49 foreach doclist $A($zTerm) { | 173 foreach doclist $A($zTerm) { |
50 set docid 0 | 174 set docid 0 |
51 while {[string length $doclist]>0} { | 175 while {[string length $doclist]>0} { |
52 set iCol 0 | 176 set iCol 0 |
53 set iPos 0 | 177 set iPos 0 |
54 set lPos [list] | 178 set lPos [list] |
55 set lCol [list] | 179 set lCol [list] |
56 | 180 |
57 # First varint of a doclist-entry is the docid. Delta-compressed | 181 # First varint of a doclist-entry is the docid. Delta-compressed |
58 # with respect to the docid of the previous entry. | 182 # with respect to the docid of the previous entry. |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
90 set iDoc $E(docid) | 214 set iDoc $E(docid) |
91 foreach col [lrange $E(*) 1 end] { | 215 foreach col [lrange $E(*) 1 end] { |
92 set c $E($col) | 216 set c $E($col) |
93 set sql {SELECT fts3_tokenizer_test('simple', $c)} | 217 set sql {SELECT fts3_tokenizer_test('simple', $c)} |
94 | 218 |
95 foreach {pos term dummy} [db one $sql] { | 219 foreach {pos term dummy} [db one $sql] { |
96 if {![info exists C($iDoc,$iCol,$pos)]} { | 220 if {![info exists C($iDoc,$iCol,$pos)]} { |
97 set es "Error at docid=$iDoc col=$iCol pos=$pos. Index is missing" | 221 set es "Error at docid=$iDoc col=$iCol pos=$pos. Index is missing" |
98 lappend errors $es | 222 lappend errors $es |
99 } else { | 223 } else { |
100 if {$C($iDoc,$iCol,$pos) != "$term"} { | 224 if {[string compare $C($iDoc,$iCol,$pos) $term]} { |
101 set es "Error at docid=$iDoc col=$iCol pos=$pos. Index " | 225 set es "Error at docid=$iDoc col=$iCol pos=$pos. Index " |
102 append es "has \"$C($iDoc,$iCol,$pos)\", document has \"$term\"" | 226 append es "has \"$C($iDoc,$iCol,$pos)\", document has \"$term\"" |
103 lappend errors $es | 227 lappend errors $es |
104 } | 228 } |
105 unset C($iDoc,$iCol,$pos) | 229 unset C($iDoc,$iCol,$pos) |
106 } | 230 } |
107 } | 231 } |
108 incr iCol | 232 incr iCol |
109 } | 233 } |
110 } | 234 } |
(...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
226 proc fts3_readleaf {blob} { | 350 proc fts3_readleaf {blob} { |
227 set zPrev "" | 351 set zPrev "" |
228 set terms [list] | 352 set terms [list] |
229 | 353 |
230 while {[string length $blob] > 0} { | 354 while {[string length $blob] > 0} { |
231 set nPrefix [gobble_varint blob] | 355 set nPrefix [gobble_varint blob] |
232 set nSuffix [gobble_varint blob] | 356 set nSuffix [gobble_varint blob] |
233 | 357 |
234 set zTerm [string range $zPrev 0 [expr $nPrefix-1]] | 358 set zTerm [string range $zPrev 0 [expr $nPrefix-1]] |
235 append zTerm [gobble_string blob $nSuffix] | 359 append zTerm [gobble_string blob $nSuffix] |
236 set doclist [gobble_string blob [gobble_varint blob]] | 360 set nDoclist [gobble_varint blob] |
| 361 set doclist [gobble_string blob $nDoclist] |
237 | 362 |
238 lappend terms $zTerm $doclist | 363 lappend terms $zTerm $doclist |
239 set zPrev $zTerm | 364 set zPrev $zTerm |
240 } | 365 } |
241 | 366 |
242 return $terms | 367 return $terms |
243 } | 368 } |
244 | 369 |
245 proc fts3_read2 {tbl where varname} { | 370 proc fts3_read2 {tbl where varname} { |
246 upvar $varname a | 371 upvar $varname a |
247 array unset a | 372 array unset a |
248 db eval " SELECT start_block, leaves_end_block, root | 373 db eval " SELECT start_block, leaves_end_block, root |
249 FROM ${tbl}_segdir WHERE $where | 374 FROM ${tbl}_segdir WHERE $where |
250 ORDER BY level ASC, idx DESC | 375 ORDER BY level ASC, idx DESC |
251 " { | 376 " { |
252 if {$start_block == 0} { | 377 set c 0 |
| 378 binary scan $root c c |
| 379 if {$c==0} { |
253 foreach {t d} [fts3_readleaf $root] { lappend a($t) $d } | 380 foreach {t d} [fts3_readleaf $root] { lappend a($t) $d } |
254 } else { | 381 } else { |
255 db eval " SELECT block | 382 db eval " SELECT block |
256 FROM ${tbl}_segments | 383 FROM ${tbl}_segments |
257 WHERE blockid>=$start_block AND blockid<=$leaves_end_block | 384 WHERE blockid>=$start_block AND blockid<=$leaves_end_block |
258 ORDER BY blockid | 385 ORDER BY blockid |
259 " { | 386 " { |
260 foreach {t d} [fts3_readleaf $block] { lappend a($t) $d } | 387 foreach {t d} [fts3_readleaf $block] { lappend a($t) $d } |
261 | |
262 } | 388 } |
263 } | 389 } |
264 } | 390 } |
265 } | 391 } |
266 | 392 |
267 proc fts3_read {tbl where varname} { | 393 proc fts3_read {tbl where varname} { |
268 upvar $varname a | 394 upvar $varname a |
269 array unset a | 395 array unset a |
270 db eval " SELECT start_block, leaves_end_block, root | 396 db eval " SELECT start_block, leaves_end_block, root |
271 FROM ${tbl}_segdir WHERE $where | 397 FROM ${tbl}_segdir WHERE $where |
272 ORDER BY level DESC, idx ASC | 398 ORDER BY level DESC, idx ASC |
273 " { | 399 " { |
274 if {$start_block == 0} { | 400 if {$start_block == 0} { |
275 foreach {t d} [fts3_readleaf $root] { lappend a($t) $d } | 401 foreach {t d} [fts3_readleaf $root] { lappend a($t) $d } |
276 } else { | 402 } else { |
277 db eval " SELECT block | 403 db eval " SELECT block |
278 FROM ${tbl}_segments | 404 FROM ${tbl}_segments |
279 WHERE blockid>=$start_block AND blockid<$leaves_end_block | 405 WHERE blockid>=$start_block AND blockid<$leaves_end_block |
280 ORDER BY blockid | 406 ORDER BY blockid |
281 " { | 407 " { |
282 foreach {t d} [fts3_readleaf $block] { lappend a($t) $d } | 408 foreach {t d} [fts3_readleaf $block] { lappend a($t) $d } |
283 | 409 |
284 } | 410 } |
285 } | 411 } |
286 } | 412 } |
287 } | 413 } |
288 | 414 |
289 ########################################################################## | 415 ########################################################################## |
290 | 416 |
OLD | NEW |