third_party/sqlite/sqlite-src-3170000/ext/fts5/test/fts5_common.tcl - Issue 2747283002: [sql] Import reference version of SQLite 3.17..

Side by Side Diff: third_party/sqlite/sqlite-src-3170000/ext/fts5/test/fts5_common.tcl

Issue 2747283002: [sql] Import reference version of SQLite 3.17.. (Closed)

Patch Set: Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « third_party/sqlite/sqlite-src-3170000/ext/fts5/mkportersteps.tcl ('k') | third_party/sqlite/sqlite-src-3170000/ext/fts5/test/fts5aa.test » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 # 2014 Dec 19

	2 #

	3 # The author disclaims copyright to this source code. In place of

	4 # a legal notice, here is a blessing:

	5 #

	6 # May you do good and not evil.

	7 # May you find forgiveness for yourself and forgive others.

	8 # May you share freely, never taking more than you give.

	9 #

	10 #***********************************************************************

	11 #

	12

	13 if {![info exists testdir]} {

	14 set testdir [file join [file dirname [info script]] .. .. .. test]

	15 }

	16 source $testdir/tester.tcl

	17

	18 ifcapable !fts5 {

	19 proc return_if_no_fts5 {} {

	20 finish_test

	21 return -code return

	22 }

	23 return

	24 } else {

	25 proc return_if_no_fts5 {} {}

	26 }

	27

	28 catch {

	29 sqlite3_fts5_may_be_corrupt 0

	30 reset_db

	31 }

	32

	33 proc fts5_test_poslist {cmd} {

	34 set res [list]

	35 for {set i 0} {$i < [$cmd xInstCount]} {incr i} {

	36 lappend res [string map {{ } .} [$cmd xInst $i]]

	37 }

	38 set res

	39 }

	40

	41 proc fts5_test_poslist2 {cmd} {

	42 set res [list]

	43

	44 for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} {

	45 $cmd xPhraseForeach $i c o {

	46 lappend res $i.$c.$o

	47 }

	48 }

	49

	50 #set res

	51 sort_poslist $res

	52 }

	53

	54 proc fts5_test_collist {cmd} {

	55 set res [list]

	56

	57 for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} {

	58 $cmd xPhraseColumnForeach $i c { lappend res $i.$c }

	59 }

	60

	61 set res

	62 }

	63

	64 proc fts5_test_columnsize {cmd} {

	65 set res [list]

	66 for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {

	67 lappend res [$cmd xColumnSize $i]

	68 }

	69 set res

	70 }

	71

	72 proc fts5_test_columntext {cmd} {

	73 set res [list]

	74 for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {

	75 lappend res [$cmd xColumnText $i]

	76 }

	77 set res

	78 }

	79

	80 proc fts5_test_columntotalsize {cmd} {

	81 set res [list]

	82 for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {

	83 lappend res [$cmd xColumnTotalSize $i]

	84 }

	85 set res

	86 }

	87

	88 proc test_append_token {varname token iStart iEnd} {

	89 upvar $varname var

	90 lappend var $token

	91 return "SQLITE_OK"

	92 }

	93 proc fts5_test_tokenize {cmd} {

	94 set res [list]

	95 for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {

	96 set tokens [list]

	97 $cmd xTokenize [$cmd xColumnText $i] [list test_append_token tokens]

	98 lappend res $tokens

	99 }

	100 set res

	101 }

	102

	103 proc fts5_test_rowcount {cmd} {

	104 $cmd xRowCount

	105 }

	106

	107 proc test_queryphrase_cb {cnt cmd} {

	108 upvar $cnt L

	109 for {set i 0} {$i < [$cmd xInstCount]} {incr i} {

	110 foreach {ip ic io} [$cmd xInst $i] break

	111 set A($ic) 1

	112 }

	113 foreach ic [array names A] {

	114 lset L $ic [expr {[lindex $L $ic] + 1}]

	115 }

	116 }

	117 proc fts5_test_queryphrase {cmd} {

	118 set res [list]

	119 for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} {

	120 set cnt [list]

	121 for {set j 0} {$j < [$cmd xColumnCount]} {incr j} { lappend cnt 0 }

	122 $cmd xQueryPhrase $i [list test_queryphrase_cb cnt]

	123 lappend res $cnt

	124 }

	125 set res

	126 }

	127

	128 proc fts5_test_phrasecount {cmd} {

	129 $cmd xPhraseCount

	130 }

	131

	132 proc fts5_test_all {cmd} {

	133 set res [list]

	134 lappend res columnsize [fts5_test_columnsize $cmd]

	135 lappend res columntext [fts5_test_columntext $cmd]

	136 lappend res columntotalsize [fts5_test_columntotalsize $cmd]

	137 lappend res poslist [fts5_test_poslist $cmd]

	138 lappend res tokenize [fts5_test_tokenize $cmd]

	139 lappend res rowcount [fts5_test_rowcount $cmd]

	140 set res

	141 }

	142

	143 proc fts5_aux_test_functions {db} {

	144 foreach f {

	145 fts5_test_columnsize

	146 fts5_test_columntext

	147 fts5_test_columntotalsize

	148 fts5_test_poslist

	149 fts5_test_poslist2

	150 fts5_test_collist

	151 fts5_test_tokenize

	152 fts5_test_rowcount

	153 fts5_test_all

	154

	155 fts5_test_queryphrase

	156 fts5_test_phrasecount

	157 } {

	158 sqlite3_fts5_create_function $db $f $f

	159 }

	160 }

	161

	162 proc fts5_segcount {tbl} {

	163 set N 0

	164 foreach n [fts5_level_segs $tbl] { incr N $n }

	165 set N

	166 }

	167

	168 proc fts5_level_segs {tbl} {

	169 set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10"

	170 set ret [list]

	171 foreach L [lrange [db one $sql] 1 end] {

	172 lappend ret [expr [llength $L] - 3]

	173 }

	174 set ret

	175 }

	176

	177 proc fts5_level_segids {tbl} {

	178 set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10"

	179 set ret [list]

	180 foreach L [lrange [db one $sql] 1 end] {

	181 set lvl [list]

	182 foreach S [lrange $L 3 end] {

	183 regexp {id=([1234567890]*)} $S -> segid

	184 lappend lvl $segid

	185 }

	186 lappend ret $lvl

	187 }

	188 set ret

	189 }

	190

	191 proc fts5_rnddoc {n} {

	192 set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j]

	193 set doc [list]

	194 for {set i 0} {$i < $n} {incr i} {

	195 lappend doc "x[string map $map [format %.3d [expr int(rand()*1000)]]]"

	196 }

	197 set doc

	198 }

	199

	200 #-------------------------------------------------------------------------

	201 # Usage:

	202 #

	203 # nearset aCol ?-pc VARNAME? ?-near N? ?-col C? -- phrase1 phrase2...

	204 #

	205 # This command is used to test if a document (set of column values) matches

	206 # the logical equivalent of a single FTS5 NEAR() clump and, if so, return

	207 # the equivalent of an FTS5 position list.

	208 #

	209 # Parameter $aCol is passed a list of the column values for the document

	210 # to test. Parameters $phrase1 and so on are the phrases.

	211 #

	212 # The result is a list of phrase hits. Each phrase hit is formatted as

	213 # three integers separated by "." characters, in the following format:

	214 #

	215 # <phrase number> . <column number> . <token offset>

	216 #

	217 # Options:

	218 #

	219 # -near N (NEAR distance. Default 10)

	220 # -col C (List of column indexes to match against)

	221 # -pc VARNAME (variable in caller frame to use for phrase numbering)

	222 # -dict VARNAME (array in caller frame to use for synonyms)

	223 #

	224 proc nearset {aCol args} {

	225

	226 # Process the command line options.

	227 #

	228 set O(-near) 10

	229 set O(-col) {}

	230 set O(-pc) ""

	231 set O(-dict) ""

	232

	233 set nOpt [lsearch -exact $args --]

	234 if {$nOpt<0} { error "no -- option" }

	235

	236 # Set $lPhrase to be a list of phrases. $nPhrase its length.

	237 set lPhrase [lrange $args [expr $nOpt+1] end]

	238 set nPhrase [llength $lPhrase]

	239

	240 foreach {k v} [lrange $args 0 [expr $nOpt-1]] {

	241 if {[info exists O($k)]==0} { error "unrecognized option $k" }

	242 set O($k) $v

	243 }

	244

	245 if {$O(-pc) == ""} {

	246 set counter 0

	247 } else {

	248 upvar $O(-pc) counter

	249 }

	250

	251 if {$O(-dict)!=""} { upvar $O(-dict) aDict }

	252

	253 for {set j 0} {$j < [llength $aCol]} {incr j} {

	254 for {set i 0} {$i < $nPhrase} {incr i} {

	255 set A($j,$i) [list]

	256 }

	257 }

	258

	259 # Loop through each column of the current row.

	260 for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} {

	261

	262 # If there is a column filter, test whether this column is excluded. If

	263 # so, skip to the next iteration of this loop. Otherwise, set zCol to the

	264 # column value and nToken to the number of tokens that comprise it.

	265 if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue

	266 set zCol [lindex $aCol $iCol]

	267 set nToken [llength $zCol]

	268

	269 # Each iteration of the following loop searches a substring of the

	270 # column value for phrase matches. The last token of the substring

	271 # is token $iLast of the column value. The first token is:

	272 #

	273 # iFirst = ($iLast - $O(-near) - 1)

	274 #

	275 # where $sz is the length of the phrase being searched for. A phrase

	276 # counts as matching the substring if its first token lies on or before

	277 # $iLast and its last token on or after $iFirst.

	278 #

	279 # For example, if the query is "NEAR(a+b c, 2)" and the column value:

	280 #

	281 # "x x x x A B x x C x"

	282 # 0 1 2 3 4 5 6 7 8 9"

	283 #

	284 # when (iLast==8 && iFirst=5) the range will contain both phrases and

	285 # so both instances can be added to the output poslists.

	286 #

	287 set iLast [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)]

	288 for { } {$iLast < $nToken} {incr iLast} {

	289

	290 catch { array unset B }

	291

	292 for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {

	293 set p [lindex $lPhrase $iPhrase]

	294 set nPm1 [expr {[llength $p] - 1}]

	295 set iFirst [expr $iLast - $O(-near) - [llength $p]]

	296

	297 for {set i $iFirst} {$i <= $iLast} {incr i} {

	298 set lCand [lrange $zCol $i [expr $i+$nPm1]]

	299 set bMatch 1

	300 foreach tok $p term $lCand {

	301 if {[nearset_match aDict $tok $term]==0} { set bMatch 0 ; break }

	302 }

	303 if {$bMatch} { lappend B($iPhrase) $i }

	304 }

	305

	306 if {![info exists B($iPhrase)]} break

	307 }

	308

	309 if {$iPhrase==$nPhrase} {

	310 for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {

	311 set A($iCol,$iPhrase) [concat $A($iCol,$iPhrase) $B($iPhrase)]

	312 set A($iCol,$iPhrase) [lsort -integer -uniq $A($iCol,$iPhrase)]

	313 }

	314 }

	315 }

	316 }

	317

	318 set res [list]

	319 #puts [array names A]

	320

	321 for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {

	322 for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} {

	323 foreach a $A($iCol,$iPhrase) {

	324 lappend res "$counter.$iCol.$a"

	325 }

	326 }

	327 incr counter

	328 }

	329

	330 #puts "$aCol -> $res"

	331 sort_poslist $res

	332 }

	333

	334 proc nearset_match {aDictVar tok term} {

	335 if {[string match $tok $term]} { return 1 }

	336

	337 upvar $aDictVar aDict

	338 if {[info exists aDict($tok)]} {

	339 foreach s $aDict($tok) {

	340 if {[string match $s $term]} { return 1 }

	341 }

	342 }

	343 return 0;

	344 }

	345

	346 #-------------------------------------------------------------------------

	347 # Usage:

	348 #

	349 # sort_poslist LIST

	350 #

	351 # Sort a position list of the type returned by command [nearset]

	352 #

	353 proc sort_poslist {L} {

	354 lsort -command instcompare $L

	355 }

	356 proc instcompare {lhs rhs} {

	357 foreach {p1 c1 o1} [split $lhs .] {}

	358 foreach {p2 c2 o2} [split $rhs .] {}

	359

	360 set res [expr $c1 - $c2]

	361 if {$res==0} { set res [expr $o1 - $o2] }

	362 if {$res==0} { set res [expr $p1 - $p2] }

	363

	364 return $res

	365 }

	366

	367 #-------------------------------------------------------------------------

	368 # Logical operators used by the commands returned by fts5_tcl_expr().

	369 #

	370 proc AND {args} {

	371 foreach a $args {

	372 if {[llength $a]==0} { return [list] }

	373 }

	374 sort_poslist [concat {*}$args]

	375 }

	376 proc OR {args} {

	377 sort_poslist [concat {*}$args]

	378 }

	379 proc NOT {a b} {

	380 if {[llength $b]>0} { return [list] }

	381 return $a

	382 }

	383

	384 #-------------------------------------------------------------------------

	385 # This command is similar to [split], except that it also provides the

	386 # start and end offsets of each token. For example:

	387 #

	388 # [fts5_tokenize_split "abc d ef"] -> {abc 0 3 d 4 5 ef 6 8}

	389 #

	390

	391 proc gobble_whitespace {textvar} {

	392 upvar $textvar t

	393 regexp {([ ])(.)} $t -> space t

	394 return [string length $space]

	395 }

	396

	397 proc gobble_text {textvar wordvar} {

	398 upvar $textvar t

	399 upvar $wordvar w

	400 regexp {([^ ])(.)} $t -> w t

	401 return [string length $w]

	402 }

	403

	404 proc fts5_tokenize_split {text} {

	405 set token ""

	406 set ret [list]

	407 set iOff [gobble_whitespace text]

	408 while {[set nToken [gobble_text text word]]} {

	409 lappend ret $word $iOff [expr $iOff+$nToken]

	410 incr iOff $nToken

	411 incr iOff [gobble_whitespace text]

	412 }

	413

	414 set ret

	415 }

	416

	417 #-------------------------------------------------------------------------

	418 #

	419 proc foreach_detail_mode {prefix script} {

	420 set saved $::testprefix

	421 foreach d [list full col none] {

	422 set s [string map [list %DETAIL% $d] $script]

	423 set ::detail $d

	424 set ::testprefix "$prefix-$d"

	425 reset_db

	426 uplevel $s

	427 unset ::detail

	428 }

	429 set ::testprefix $saved

	430 }

	431

	432 proc detail_check {} {

	433 if {$::detail != "none" && $::detail!="full" && $::detail!="col"} {

	434 error "not in foreach_detail_mode {...} block"

	435 }

	436 }

	437 proc detail_is_none {} { detail_check ; expr {$::detail == "none"} }

	438 proc detail_is_col {} { detail_check ; expr {$::detail == "col" } }

	439 proc detail_is_full {} { detail_check ; expr {$::detail == "full"} }

	440

	441

	442 #-------------------------------------------------------------------------

	443 # Convert a poslist of the type returned by fts5_test_poslist() to a

	444 # collist as returned by fts5_test_collist().

	445 #

	446 proc fts5_poslist2collist {poslist} {

	447 set res [list]

	448 foreach h $poslist {

	449 regexp {(.*)\.[1234567890]+} $h -> cand

	450 lappend res $cand

	451 }

	452 set res [lsort -command fts5_collist_elem_compare -unique $res]

	453 return $res

	454 }

	455

	456 # Comparison function used by fts5_poslist2collist to sort collist entries.

	457 proc fts5_collist_elem_compare {a b} {

	458 foreach {a1 a2} [split $a .] {}

	459 foreach {b1 b2} [split $b .] {}

	460

	461 if {$a1==$b1} { return [expr $a2 - $b2] }

	462 return [expr $a1 - $b1]

	463 }

	464

	465

	466 #--------------------------------------------------------------------------

	467 # Construct and return a tcl list equivalent to that returned by the SQL

	468 # query executed against database handle [db]:

	469 #

	470 # SELECT

	471 # rowid,

	472 # fts5_test_poslist($tbl),

	473 # fts5_test_collist($tbl)

	474 # FROM $tbl('$expr')

	475 # ORDER BY rowid $order;

	476 #

	477 proc fts5_query_data {expr tbl {order ASC} {aDictVar ""}} {

	478

	479 # Figure out the set of columns in the FTS5 table. This routine does

	480 # not handle tables with UNINDEXED columns, but if it did, it would

	481 # have to be here.

	482 db eval "PRAGMA table_info = $tbl" x { lappend lCols $x(name) }

	483

	484 set d ""

	485 if {$aDictVar != ""} {

	486 upvar $aDictVar aDict

	487 set d aDict

	488 }

	489

	490 set cols ""

	491 foreach e $lCols { append cols ", '$e'" }

	492 set tclexpr [db one [subst -novar {

	493 SELECT fts5_expr_tcl( $expr, 'nearset $cols -dict $d -pc ::pc' [set cols] )

	494 }]]

	495

	496 set res [list]

	497 db eval "SELECT rowid, * FROM $tbl ORDER BY rowid $order" x {

	498 set cols [list]

	499 foreach col $lCols { lappend cols $x($col) }

	500

	501 set ::pc 0

	502 set rowdata [eval $tclexpr]

	503 if {$rowdata != ""} {

	504 lappend res $x(rowid) $rowdata [fts5_poslist2collist $rowdata]

	505 }

	506 }

	507

	508 set res

	509 }

	510

	511 #-------------------------------------------------------------------------

	512 # Similar to [fts5_query_data], but omit the collist field.

	513 #

	514 proc fts5_poslist_data {expr tbl {order ASC} {aDictVar ""}} {

	515 set res [list]

	516

	517 if {$aDictVar!=""} {

	518 upvar $aDictVar aDict

	519 set dict aDict

	520 } else {

	521 set dict ""

	522 }

	523

	524 foreach {rowid poslist collist} [fts5_query_data $expr $tbl $order $dict] {

	525 lappend res $rowid $poslist

	526 }

	527 set res

	528 }

	529

	530 proc fts5_collist_data {expr tbl {order ASC} {aDictVar ""}} {

	531 set res [list]

	532

	533 if {$aDictVar!=""} {

	534 upvar $aDictVar aDict

	535 set dict aDict

	536 } else {

	537 set dict ""

	538 }

	539

	540 foreach {rowid poslist collist} [fts5_query_data $expr $tbl $order $dict] {

	541 lappend res $rowid $collist

	542 }

	543 set res

	544 }

	545

	546 #-------------------------------------------------------------------------

	547 #

	548

	549 # This command will only work inside a [foreach_detail_mode] block. It tests

	550 # whether or not expression $expr run on FTS5 table $tbl is supported by

	551 # the current mode. If so, 1 is returned. If not, 0.

	552 #

	553 # detail=full (all queries supported)

	554 # detail=col (all but phrase queries and NEAR queries)

	555 # detail=none (all but phrase queries, NEAR queries, and column filters)

	556 #

	557 proc fts5_expr_ok {expr tbl} {

	558

	559 if {![detail_is_full]} {

	560 set nearset "nearset_rc"

	561 if {[detail_is_col]} { set nearset "nearset_rf" }

	562

	563 set ::expr_not_ok 0

	564 db eval "PRAGMA table_info = $tbl" x { lappend lCols $x(name) }

	565

	566 set cols ""

	567 foreach e $lCols { append cols ", '$e'" }

	568 set ::pc 0

	569 set tclexpr [db one [subst -novar {

	570 SELECT fts5_expr_tcl( $expr, '[set nearset] $cols -pc ::pc' [set cols] )

	571 }]]

	572 eval $tclexpr

	573 if {$::expr_not_ok} { return 0 }

	574 }

	575

	576 return 1

	577 }

	578

	579 # Helper for [fts5_expr_ok]

	580 proc nearset_rf {aCol args} {

	581 set idx [lsearch -exact $args --]

	582 if {$idx != [llength $args]-2 \|\| [llength [lindex $args end]]!=1} {

	583 set ::expr_not_ok 1

	584 }

	585 list

	586 }

	587

	588 # Helper for [fts5_expr_ok]

	589 proc nearset_rc {aCol args} {

	590 nearset_rf $aCol {*}$args

	591 if {[lsearch $args -col]>=0} {

	592 set ::expr_not_ok 1

	593 }

	594 list

	595 }

	596

	597

	598 #-------------------------------------------------------------------------

	599 # Code for a simple Tcl tokenizer that supports synonyms at query time.

	600 #

	601 proc tclnum_tokenize {mode tflags text} {

	602 foreach {w iStart iEnd} [fts5_tokenize_split $text] {

	603 sqlite3_fts5_token $w $iStart $iEnd

	604 if {$tflags == $mode && [info exists ::tclnum_syn($w)]} {

	605 foreach s $::tclnum_syn($w) { sqlite3_fts5_token -colo $s $iStart $iEnd }

	606 }

	607 }

	608 }

	609

	610 proc tclnum_create {args} {

	611 set mode query

	612 if {[llength $args]} {

	613 set mode [lindex $args 0]

	614 }

	615 if {$mode != "query" && $mode != "document"} { error "bad mode: $mode" }

	616 return [list tclnum_tokenize $mode]

	617 }

	618

	619 proc fts5_tclnum_register {db} {

	620 foreach SYNDICT {

	621 {zero 0}

	622 {one 1 i}

	623 {two 2 ii}

	624 {three 3 iii}

	625 {four 4 iv}

	626 {five 5 v}

	627 {six 6 vi}

	628 {seven 7 vii}

	629 {eight 8 viii}

	630 {nine 9 ix}

	631

	632 {a1 a2 a3 a4 a5 a6 a7 a8 a9}

	633 {b1 b2 b3 b4 b5 b6 b7 b8 b9}

	634 {c1 c2 c3 c4 c5 c6 c7 c8 c9}

	635 } {

	636 foreach s $SYNDICT {

	637 set o [list]

	638 foreach x $SYNDICT {if {$x!=$s} {lappend o $x}}

	639 set ::tclnum_syn($s) $o

	640 }

	641 }

	642 sqlite3_fts5_create_tokenizer db tclnum tclnum_create

	643 }

	644 #

	645 # End of tokenizer code.

	646 #-------------------------------------------------------------------------

	647

OLD	NEW