Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(54)

Side by Side Diff: third_party/sqlite/src/ext/fts5/test/fts5_common.tcl

Issue 2751253002: [sql] Import SQLite 3.17.0. (Closed)
Patch Set: also clang on Linux i386 Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # 2014 Dec 19 1 # 2014 Dec 19
2 # 2 #
3 # The author disclaims copyright to this source code. In place of 3 # The author disclaims copyright to this source code. In place of
4 # a legal notice, here is a blessing: 4 # a legal notice, here is a blessing:
5 # 5 #
6 # May you do good and not evil. 6 # May you do good and not evil.
7 # May you find forgiveness for yourself and forgive others. 7 # May you find forgiveness for yourself and forgive others.
8 # May you share freely, never taking more than you give. 8 # May you share freely, never taking more than you give.
9 # 9 #
10 #*********************************************************************** 10 #***********************************************************************
11 # 11 #
12 12
13 if {![info exists testdir]} { 13 if {![info exists testdir]} {
14 set testdir [file join [file dirname [info script]] .. .. .. test] 14 set testdir [file join [file dirname [info script]] .. .. .. test]
15 } 15 }
16 source $testdir/tester.tcl 16 source $testdir/tester.tcl
17 17
18 ifcapable !fts5 {
19 proc return_if_no_fts5 {} {
20 finish_test
21 return -code return
22 }
23 return
24 } else {
25 proc return_if_no_fts5 {} {}
26 }
27
18 catch { 28 catch {
19 sqlite3_fts5_may_be_corrupt 0 29 sqlite3_fts5_may_be_corrupt 0
20 reset_db 30 reset_db
21 } 31 }
22 32
23 proc fts5_test_poslist {cmd} { 33 proc fts5_test_poslist {cmd} {
24 set res [list] 34 set res [list]
25 for {set i 0} {$i < [$cmd xInstCount]} {incr i} { 35 for {set i 0} {$i < [$cmd xInstCount]} {incr i} {
26 lappend res [string map {{ } .} [$cmd xInst $i]] 36 lappend res [string map {{ } .} [$cmd xInst $i]]
27 } 37 }
28 set res 38 set res
29 } 39 }
30 40
41 proc fts5_test_poslist2 {cmd} {
42 set res [list]
43
44 for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} {
45 $cmd xPhraseForeach $i c o {
46 lappend res $i.$c.$o
47 }
48 }
49
50 #set res
51 sort_poslist $res
52 }
53
54 proc fts5_test_collist {cmd} {
55 set res [list]
56
57 for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} {
58 $cmd xPhraseColumnForeach $i c { lappend res $i.$c }
59 }
60
61 set res
62 }
63
31 proc fts5_test_columnsize {cmd} { 64 proc fts5_test_columnsize {cmd} {
32 set res [list] 65 set res [list]
33 for {set i 0} {$i < [$cmd xColumnCount]} {incr i} { 66 for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
34 lappend res [$cmd xColumnSize $i] 67 lappend res [$cmd xColumnSize $i]
35 } 68 }
36 set res 69 set res
37 } 70 }
38 71
39 proc fts5_test_columntext {cmd} { 72 proc fts5_test_columntext {cmd} {
40 set res [list] 73 set res [list]
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
106 lappend res rowcount [fts5_test_rowcount $cmd] 139 lappend res rowcount [fts5_test_rowcount $cmd]
107 set res 140 set res
108 } 141 }
109 142
110 proc fts5_aux_test_functions {db} { 143 proc fts5_aux_test_functions {db} {
111 foreach f { 144 foreach f {
112 fts5_test_columnsize 145 fts5_test_columnsize
113 fts5_test_columntext 146 fts5_test_columntext
114 fts5_test_columntotalsize 147 fts5_test_columntotalsize
115 fts5_test_poslist 148 fts5_test_poslist
149 fts5_test_poslist2
150 fts5_test_collist
116 fts5_test_tokenize 151 fts5_test_tokenize
117 fts5_test_rowcount 152 fts5_test_rowcount
118 fts5_test_all 153 fts5_test_all
119 154
120 fts5_test_queryphrase 155 fts5_test_queryphrase
121 fts5_test_phrasecount 156 fts5_test_phrasecount
122 } { 157 } {
123 sqlite3_fts5_create_function $db $f $f 158 sqlite3_fts5_create_function $db $f $f
124 } 159 }
125 } 160 }
126 161
162 proc fts5_segcount {tbl} {
163 set N 0
164 foreach n [fts5_level_segs $tbl] { incr N $n }
165 set N
166 }
167
127 proc fts5_level_segs {tbl} { 168 proc fts5_level_segs {tbl} {
128 set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10" 169 set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10"
129 set ret [list] 170 set ret [list]
130 foreach L [lrange [db one $sql] 1 end] { 171 foreach L [lrange [db one $sql] 1 end] {
131 lappend ret [expr [llength $L] - 3] 172 lappend ret [expr [llength $L] - 3]
132 } 173 }
133 set ret 174 set ret
134 } 175 }
135 176
136 proc fts5_level_segids {tbl} { 177 proc fts5_level_segids {tbl} {
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
171 # The result is a list of phrase hits. Each phrase hit is formatted as 212 # The result is a list of phrase hits. Each phrase hit is formatted as
172 # three integers separated by "." characters, in the following format: 213 # three integers separated by "." characters, in the following format:
173 # 214 #
174 # <phrase number> . <column number> . <token offset> 215 # <phrase number> . <column number> . <token offset>
175 # 216 #
176 # Options: 217 # Options:
177 # 218 #
178 # -near N (NEAR distance. Default 10) 219 # -near N (NEAR distance. Default 10)
179 # -col C (List of column indexes to match against) 220 # -col C (List of column indexes to match against)
180 # -pc VARNAME (variable in caller frame to use for phrase numbering) 221 # -pc VARNAME (variable in caller frame to use for phrase numbering)
222 # -dict VARNAME (array in caller frame to use for synonyms)
181 # 223 #
182 proc nearset {aCol args} { 224 proc nearset {aCol args} {
225
226 # Process the command line options.
227 #
183 set O(-near) 10 228 set O(-near) 10
184 set O(-col) {} 229 set O(-col) {}
185 set O(-pc) "" 230 set O(-pc) ""
231 set O(-dict) ""
186 232
187 set nOpt [lsearch -exact $args --] 233 set nOpt [lsearch -exact $args --]
188 if {$nOpt<0} { error "no -- option" } 234 if {$nOpt<0} { error "no -- option" }
189 235
236 # Set $lPhrase to be a list of phrases. $nPhrase its length.
237 set lPhrase [lrange $args [expr $nOpt+1] end]
238 set nPhrase [llength $lPhrase]
239
190 foreach {k v} [lrange $args 0 [expr $nOpt-1]] { 240 foreach {k v} [lrange $args 0 [expr $nOpt-1]] {
191 if {[info exists O($k)]==0} { error "unrecognized option $k" } 241 if {[info exists O($k)]==0} { error "unrecognized option $k" }
192 set O($k) $v 242 set O($k) $v
193 } 243 }
194 244
195 if {$O(-pc) == ""} { 245 if {$O(-pc) == ""} {
196 set counter 0 246 set counter 0
197 } else { 247 } else {
198 upvar $O(-pc) counter 248 upvar $O(-pc) counter
199 } 249 }
200 250
201 # Set $phraselist to be a list of phrases. $nPhrase its length. 251 if {$O(-dict)!=""} { upvar $O(-dict) aDict }
202 set phraselist [lrange $args [expr $nOpt+1] end]
203 set nPhrase [llength $phraselist]
204 252
205 for {set j 0} {$j < [llength $aCol]} {incr j} { 253 for {set j 0} {$j < [llength $aCol]} {incr j} {
206 for {set i 0} {$i < $nPhrase} {incr i} { 254 for {set i 0} {$i < $nPhrase} {incr i} {
207 set A($j,$i) [list] 255 set A($j,$i) [list]
208 } 256 }
209 } 257 }
210 258
211 set iCol -1 259 # Loop through each column of the current row.
212 foreach col $aCol { 260 for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} {
213 incr iCol 261
262 # If there is a column filter, test whether this column is excluded. If
263 # so, skip to the next iteration of this loop. Otherwise, set zCol to the
264 # column value and nToken to the number of tokens that comprise it.
214 if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue 265 if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue
215 set nToken [llength $col] 266 set zCol [lindex $aCol $iCol]
267 set nToken [llength $zCol]
216 268
217 set iFL [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)] 269 # Each iteration of the following loop searches a substring of the
218 for { } {$iFL < $nToken} {incr iFL} { 270 # column value for phrase matches. The last token of the substring
219 for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { 271 # is token $iLast of the column value. The first token is:
220 set B($iPhrase) [list] 272 #
221 } 273 # iFirst = ($iLast - $O(-near) - 1)
274 #
275 # where $sz is the length of the phrase being searched for. A phrase
276 # counts as matching the substring if its first token lies on or before
277 # $iLast and its last token on or after $iFirst.
278 #
279 # For example, if the query is "NEAR(a+b c, 2)" and the column value:
280 #
281 # "x x x x A B x x C x"
282 # 0 1 2 3 4 5 6 7 8 9"
283 #
284 # when (iLast==8 && iFirst=5) the range will contain both phrases and
285 # so both instances can be added to the output poslists.
286 #
287 set iLast [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)]
288 for { } {$iLast < $nToken} {incr iLast} {
289
290 catch { array unset B }
222 291
223 for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { 292 for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
224 set p [lindex $phraselist $iPhrase] 293 set p [lindex $lPhrase $iPhrase]
225 set nPm1 [expr {[llength $p] - 1}] 294 set nPm1 [expr {[llength $p] - 1}]
226 set iFirst [expr $iFL - $O(-near) - [llength $p]] 295 set iFirst [expr $iLast - $O(-near) - [llength $p]]
227 296
228 for {set i $iFirst} {$i <= $iFL} {incr i} { 297 for {set i $iFirst} {$i <= $iLast} {incr i} {
229 if {[lrange $col $i [expr $i+$nPm1]] == $p} { lappend B($iPhrase) $i } 298 set lCand [lrange $zCol $i [expr $i+$nPm1]]
299 set bMatch 1
300 foreach tok $p term $lCand {
301 if {[nearset_match aDict $tok $term]==0} { set bMatch 0 ; break }
302 }
303 if {$bMatch} { lappend B($iPhrase) $i }
230 } 304 }
231 if {[llength $B($iPhrase)] == 0} break 305
306 if {![info exists B($iPhrase)]} break
232 } 307 }
233 308
234 if {$iPhrase==$nPhrase} { 309 if {$iPhrase==$nPhrase} {
235 for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { 310 for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
236 set A($iCol,$iPhrase) [concat $A($iCol,$iPhrase) $B($iPhrase)] 311 set A($iCol,$iPhrase) [concat $A($iCol,$iPhrase) $B($iPhrase)]
237 set A($iCol,$iPhrase) [lsort -integer -uniq $A($iCol,$iPhrase)] 312 set A($iCol,$iPhrase) [lsort -integer -uniq $A($iCol,$iPhrase)]
238 } 313 }
239 } 314 }
240 } 315 }
241 } 316 }
242 317
243 set res [list] 318 set res [list]
244 #puts [array names A] 319 #puts [array names A]
245 320
246 for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { 321 for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
247 for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} { 322 for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} {
248 foreach a $A($iCol,$iPhrase) { 323 foreach a $A($iCol,$iPhrase) {
249 lappend res "$counter.$iCol.$a" 324 lappend res "$counter.$iCol.$a"
250 } 325 }
251 } 326 }
252 incr counter 327 incr counter
253 } 328 }
254 329
255 #puts $res 330 #puts "$aCol -> $res"
256 sort_poslist $res 331 sort_poslist $res
257 } 332 }
258 333
334 proc nearset_match {aDictVar tok term} {
335 if {[string match $tok $term]} { return 1 }
336
337 upvar $aDictVar aDict
338 if {[info exists aDict($tok)]} {
339 foreach s $aDict($tok) {
340 if {[string match $s $term]} { return 1 }
341 }
342 }
343 return 0;
344 }
345
259 #------------------------------------------------------------------------- 346 #-------------------------------------------------------------------------
260 # Usage: 347 # Usage:
261 # 348 #
262 # sort_poslist LIST 349 # sort_poslist LIST
263 # 350 #
264 # Sort a position list of the type returned by command [nearset] 351 # Sort a position list of the type returned by command [nearset]
265 # 352 #
266 proc sort_poslist {L} { 353 proc sort_poslist {L} {
267 lsort -command instcompare $L 354 lsort -command instcompare $L
268 } 355 }
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
320 set iOff [gobble_whitespace text] 407 set iOff [gobble_whitespace text]
321 while {[set nToken [gobble_text text word]]} { 408 while {[set nToken [gobble_text text word]]} {
322 lappend ret $word $iOff [expr $iOff+$nToken] 409 lappend ret $word $iOff [expr $iOff+$nToken]
323 incr iOff $nToken 410 incr iOff $nToken
324 incr iOff [gobble_whitespace text] 411 incr iOff [gobble_whitespace text]
325 } 412 }
326 413
327 set ret 414 set ret
328 } 415 }
329 416
417 #-------------------------------------------------------------------------
418 #
419 proc foreach_detail_mode {prefix script} {
420 set saved $::testprefix
421 foreach d [list full col none] {
422 set s [string map [list %DETAIL% $d] $script]
423 set ::detail $d
424 set ::testprefix "$prefix-$d"
425 reset_db
426 uplevel $s
427 unset ::detail
428 }
429 set ::testprefix $saved
430 }
431
432 proc detail_check {} {
433 if {$::detail != "none" && $::detail!="full" && $::detail!="col"} {
434 error "not in foreach_detail_mode {...} block"
435 }
436 }
437 proc detail_is_none {} { detail_check ; expr {$::detail == "none"} }
438 proc detail_is_col {} { detail_check ; expr {$::detail == "col" } }
439 proc detail_is_full {} { detail_check ; expr {$::detail == "full"} }
440
441
442 #-------------------------------------------------------------------------
443 # Convert a poslist of the type returned by fts5_test_poslist() to a
444 # collist as returned by fts5_test_collist().
445 #
446 proc fts5_poslist2collist {poslist} {
447 set res [list]
448 foreach h $poslist {
449 regexp {(.*)\.[1234567890]+} $h -> cand
450 lappend res $cand
451 }
452 set res [lsort -command fts5_collist_elem_compare -unique $res]
453 return $res
454 }
455
456 # Comparison function used by fts5_poslist2collist to sort collist entries.
457 proc fts5_collist_elem_compare {a b} {
458 foreach {a1 a2} [split $a .] {}
459 foreach {b1 b2} [split $b .] {}
460
461 if {$a1==$b1} { return [expr $a2 - $b2] }
462 return [expr $a1 - $b1]
463 }
464
465
466 #--------------------------------------------------------------------------
467 # Construct and return a tcl list equivalent to that returned by the SQL
468 # query executed against database handle [db]:
469 #
470 # SELECT
471 # rowid,
472 # fts5_test_poslist($tbl),
473 # fts5_test_collist($tbl)
474 # FROM $tbl('$expr')
475 # ORDER BY rowid $order;
476 #
477 proc fts5_query_data {expr tbl {order ASC} {aDictVar ""}} {
478
479 # Figure out the set of columns in the FTS5 table. This routine does
480 # not handle tables with UNINDEXED columns, but if it did, it would
481 # have to be here.
482 db eval "PRAGMA table_info = $tbl" x { lappend lCols $x(name) }
483
484 set d ""
485 if {$aDictVar != ""} {
486 upvar $aDictVar aDict
487 set d aDict
488 }
489
490 set cols ""
491 foreach e $lCols { append cols ", '$e'" }
492 set tclexpr [db one [subst -novar {
493 SELECT fts5_expr_tcl( $expr, 'nearset $cols -dict $d -pc ::pc' [set cols] )
494 }]]
495
496 set res [list]
497 db eval "SELECT rowid, * FROM $tbl ORDER BY rowid $order" x {
498 set cols [list]
499 foreach col $lCols { lappend cols $x($col) }
500
501 set ::pc 0
502 set rowdata [eval $tclexpr]
503 if {$rowdata != ""} {
504 lappend res $x(rowid) $rowdata [fts5_poslist2collist $rowdata]
505 }
506 }
507
508 set res
509 }
510
511 #-------------------------------------------------------------------------
512 # Similar to [fts5_query_data], but omit the collist field.
513 #
514 proc fts5_poslist_data {expr tbl {order ASC} {aDictVar ""}} {
515 set res [list]
516
517 if {$aDictVar!=""} {
518 upvar $aDictVar aDict
519 set dict aDict
520 } else {
521 set dict ""
522 }
523
524 foreach {rowid poslist collist} [fts5_query_data $expr $tbl $order $dict] {
525 lappend res $rowid $poslist
526 }
527 set res
528 }
529
530 proc fts5_collist_data {expr tbl {order ASC} {aDictVar ""}} {
531 set res [list]
532
533 if {$aDictVar!=""} {
534 upvar $aDictVar aDict
535 set dict aDict
536 } else {
537 set dict ""
538 }
539
540 foreach {rowid poslist collist} [fts5_query_data $expr $tbl $order $dict] {
541 lappend res $rowid $collist
542 }
543 set res
544 }
545
546 #-------------------------------------------------------------------------
547 #
548
549 # This command will only work inside a [foreach_detail_mode] block. It tests
550 # whether or not expression $expr run on FTS5 table $tbl is supported by
551 # the current mode. If so, 1 is returned. If not, 0.
552 #
553 # detail=full (all queries supported)
554 # detail=col (all but phrase queries and NEAR queries)
555 # detail=none (all but phrase queries, NEAR queries, and column filters)
556 #
557 proc fts5_expr_ok {expr tbl} {
558
559 if {![detail_is_full]} {
560 set nearset "nearset_rc"
561 if {[detail_is_col]} { set nearset "nearset_rf" }
562
563 set ::expr_not_ok 0
564 db eval "PRAGMA table_info = $tbl" x { lappend lCols $x(name) }
565
566 set cols ""
567 foreach e $lCols { append cols ", '$e'" }
568 set ::pc 0
569 set tclexpr [db one [subst -novar {
570 SELECT fts5_expr_tcl( $expr, '[set nearset] $cols -pc ::pc' [set cols] )
571 }]]
572 eval $tclexpr
573 if {$::expr_not_ok} { return 0 }
574 }
575
576 return 1
577 }
578
579 # Helper for [fts5_expr_ok]
580 proc nearset_rf {aCol args} {
581 set idx [lsearch -exact $args --]
582 if {$idx != [llength $args]-2 || [llength [lindex $args end]]!=1} {
583 set ::expr_not_ok 1
584 }
585 list
586 }
587
588 # Helper for [fts5_expr_ok]
589 proc nearset_rc {aCol args} {
590 nearset_rf $aCol {*}$args
591 if {[lsearch $args -col]>=0} {
592 set ::expr_not_ok 1
593 }
594 list
595 }
596
597
598 #-------------------------------------------------------------------------
599 # Code for a simple Tcl tokenizer that supports synonyms at query time.
600 #
601 proc tclnum_tokenize {mode tflags text} {
602 foreach {w iStart iEnd} [fts5_tokenize_split $text] {
603 sqlite3_fts5_token $w $iStart $iEnd
604 if {$tflags == $mode && [info exists ::tclnum_syn($w)]} {
605 foreach s $::tclnum_syn($w) { sqlite3_fts5_token -colo $s $iStart $iEnd }
606 }
607 }
608 }
609
610 proc tclnum_create {args} {
611 set mode query
612 if {[llength $args]} {
613 set mode [lindex $args 0]
614 }
615 if {$mode != "query" && $mode != "document"} { error "bad mode: $mode" }
616 return [list tclnum_tokenize $mode]
617 }
618
619 proc fts5_tclnum_register {db} {
620 foreach SYNDICT {
621 {zero 0}
622 {one 1 i}
623 {two 2 ii}
624 {three 3 iii}
625 {four 4 iv}
626 {five 5 v}
627 {six 6 vi}
628 {seven 7 vii}
629 {eight 8 viii}
630 {nine 9 ix}
631
632 {a1 a2 a3 a4 a5 a6 a7 a8 a9}
633 {b1 b2 b3 b4 b5 b6 b7 b8 b9}
634 {c1 c2 c3 c4 c5 c6 c7 c8 c9}
635 } {
636 foreach s $SYNDICT {
637 set o [list]
638 foreach x $SYNDICT {if {$x!=$s} {lappend o $x}}
639 set ::tclnum_syn($s) $o
640 }
641 }
642 sqlite3_fts5_create_tokenizer db tclnum tclnum_create
643 }
644 #
645 # End of tokenizer code.
646 #-------------------------------------------------------------------------
647
OLDNEW
« no previous file with comments | « third_party/sqlite/src/ext/fts5/fts5parse.y ('k') | third_party/sqlite/src/ext/fts5/test/fts5aa.test » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698