| Index: third_party/sqlite/sqlite-src-3100200/ext/fts5/test/fts5_common.tcl
|
| diff --git a/third_party/sqlite/sqlite-src-3100200/ext/fts5/test/fts5_common.tcl b/third_party/sqlite/sqlite-src-3100200/ext/fts5/test/fts5_common.tcl
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..2c64b3b9a4b42ca4718d86bdbd8cc6e5038b0e77
|
| --- /dev/null
|
| +++ b/third_party/sqlite/sqlite-src-3100200/ext/fts5/test/fts5_common.tcl
|
| @@ -0,0 +1,329 @@
|
| +# 2014 Dec 19
|
| +#
|
| +# The author disclaims copyright to this source code. In place of
|
| +# a legal notice, here is a blessing:
|
| +#
|
| +# May you do good and not evil.
|
| +# May you find forgiveness for yourself and forgive others.
|
| +# May you share freely, never taking more than you give.
|
| +#
|
| +#***********************************************************************
|
| +#
|
| +
|
| +if {![info exists testdir]} {
|
| + set testdir [file join [file dirname [info script]] .. .. .. test]
|
| +}
|
| +source $testdir/tester.tcl
|
| +
|
| +catch {
|
| + sqlite3_fts5_may_be_corrupt 0
|
| + reset_db
|
| +}
|
| +
|
| +proc fts5_test_poslist {cmd} {
|
| + set res [list]
|
| + for {set i 0} {$i < [$cmd xInstCount]} {incr i} {
|
| + lappend res [string map {{ } .} [$cmd xInst $i]]
|
| + }
|
| + set res
|
| +}
|
| +
|
| +proc fts5_test_columnsize {cmd} {
|
| + set res [list]
|
| + for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
|
| + lappend res [$cmd xColumnSize $i]
|
| + }
|
| + set res
|
| +}
|
| +
|
| +proc fts5_test_columntext {cmd} {
|
| + set res [list]
|
| + for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
|
| + lappend res [$cmd xColumnText $i]
|
| + }
|
| + set res
|
| +}
|
| +
|
| +proc fts5_test_columntotalsize {cmd} {
|
| + set res [list]
|
| + for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
|
| + lappend res [$cmd xColumnTotalSize $i]
|
| + }
|
| + set res
|
| +}
|
| +
|
| +proc test_append_token {varname token iStart iEnd} {
|
| + upvar $varname var
|
| + lappend var $token
|
| + return "SQLITE_OK"
|
| +}
|
| +proc fts5_test_tokenize {cmd} {
|
| + set res [list]
|
| + for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
|
| + set tokens [list]
|
| + $cmd xTokenize [$cmd xColumnText $i] [list test_append_token tokens]
|
| + lappend res $tokens
|
| + }
|
| + set res
|
| +}
|
| +
|
| +proc fts5_test_rowcount {cmd} {
|
| + $cmd xRowCount
|
| +}
|
| +
|
| +proc test_queryphrase_cb {cnt cmd} {
|
| + upvar $cnt L
|
| + for {set i 0} {$i < [$cmd xInstCount]} {incr i} {
|
| + foreach {ip ic io} [$cmd xInst $i] break
|
| + set A($ic) 1
|
| + }
|
| + foreach ic [array names A] {
|
| + lset L $ic [expr {[lindex $L $ic] + 1}]
|
| + }
|
| +}
|
| +proc fts5_test_queryphrase {cmd} {
|
| + set res [list]
|
| + for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} {
|
| + set cnt [list]
|
| + for {set j 0} {$j < [$cmd xColumnCount]} {incr j} { lappend cnt 0 }
|
| + $cmd xQueryPhrase $i [list test_queryphrase_cb cnt]
|
| + lappend res $cnt
|
| + }
|
| + set res
|
| +}
|
| +
|
| +proc fts5_test_phrasecount {cmd} {
|
| + $cmd xPhraseCount
|
| +}
|
| +
|
| +proc fts5_test_all {cmd} {
|
| + set res [list]
|
| + lappend res columnsize [fts5_test_columnsize $cmd]
|
| + lappend res columntext [fts5_test_columntext $cmd]
|
| + lappend res columntotalsize [fts5_test_columntotalsize $cmd]
|
| + lappend res poslist [fts5_test_poslist $cmd]
|
| + lappend res tokenize [fts5_test_tokenize $cmd]
|
| + lappend res rowcount [fts5_test_rowcount $cmd]
|
| + set res
|
| +}
|
| +
|
| +proc fts5_aux_test_functions {db} {
|
| + foreach f {
|
| + fts5_test_columnsize
|
| + fts5_test_columntext
|
| + fts5_test_columntotalsize
|
| + fts5_test_poslist
|
| + fts5_test_tokenize
|
| + fts5_test_rowcount
|
| + fts5_test_all
|
| +
|
| + fts5_test_queryphrase
|
| + fts5_test_phrasecount
|
| + } {
|
| + sqlite3_fts5_create_function $db $f $f
|
| + }
|
| +}
|
| +
|
| +proc fts5_level_segs {tbl} {
|
| + set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10"
|
| + set ret [list]
|
| + foreach L [lrange [db one $sql] 1 end] {
|
| + lappend ret [expr [llength $L] - 3]
|
| + }
|
| + set ret
|
| +}
|
| +
|
| +proc fts5_level_segids {tbl} {
|
| + set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10"
|
| + set ret [list]
|
| + foreach L [lrange [db one $sql] 1 end] {
|
| + set lvl [list]
|
| + foreach S [lrange $L 3 end] {
|
| + regexp {id=([1234567890]*)} $S -> segid
|
| + lappend lvl $segid
|
| + }
|
| + lappend ret $lvl
|
| + }
|
| + set ret
|
| +}
|
| +
|
| +proc fts5_rnddoc {n} {
|
| + set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j]
|
| + set doc [list]
|
| + for {set i 0} {$i < $n} {incr i} {
|
| + lappend doc "x[string map $map [format %.3d [expr int(rand()*1000)]]]"
|
| + }
|
| + set doc
|
| +}
|
| +
|
| +#-------------------------------------------------------------------------
|
| +# Usage:
|
| +#
|
| +# nearset aCol ?-pc VARNAME? ?-near N? ?-col C? -- phrase1 phrase2...
|
| +#
|
| +# This command is used to test if a document (set of column values) matches
|
| +# the logical equivalent of a single FTS5 NEAR() clump and, if so, return
|
| +# the equivalent of an FTS5 position list.
|
| +#
|
| +# Parameter $aCol is passed a list of the column values for the document
|
| +# to test. Parameters $phrase1 and so on are the phrases.
|
| +#
|
| +# The result is a list of phrase hits. Each phrase hit is formatted as
|
| +# three integers separated by "." characters, in the following format:
|
| +#
|
| +# <phrase number> . <column number> . <token offset>
|
| +#
|
| +# Options:
|
| +#
|
| +# -near N (NEAR distance. Default 10)
|
| +# -col C (List of column indexes to match against)
|
| +# -pc VARNAME (variable in caller frame to use for phrase numbering)
|
| +#
|
| +proc nearset {aCol args} {
|
| + set O(-near) 10
|
| + set O(-col) {}
|
| + set O(-pc) ""
|
| +
|
| + set nOpt [lsearch -exact $args --]
|
| + if {$nOpt<0} { error "no -- option" }
|
| +
|
| + foreach {k v} [lrange $args 0 [expr $nOpt-1]] {
|
| + if {[info exists O($k)]==0} { error "unrecognized option $k" }
|
| + set O($k) $v
|
| + }
|
| +
|
| + if {$O(-pc) == ""} {
|
| + set counter 0
|
| + } else {
|
| + upvar $O(-pc) counter
|
| + }
|
| +
|
| + # Set $phraselist to be a list of phrases. $nPhrase its length.
|
| + set phraselist [lrange $args [expr $nOpt+1] end]
|
| + set nPhrase [llength $phraselist]
|
| +
|
| + for {set j 0} {$j < [llength $aCol]} {incr j} {
|
| + for {set i 0} {$i < $nPhrase} {incr i} {
|
| + set A($j,$i) [list]
|
| + }
|
| + }
|
| +
|
| + set iCol -1
|
| + foreach col $aCol {
|
| + incr iCol
|
| + if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue
|
| + set nToken [llength $col]
|
| +
|
| + set iFL [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)]
|
| + for { } {$iFL < $nToken} {incr iFL} {
|
| + for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
|
| + set B($iPhrase) [list]
|
| + }
|
| +
|
| + for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
|
| + set p [lindex $phraselist $iPhrase]
|
| + set nPm1 [expr {[llength $p] - 1}]
|
| + set iFirst [expr $iFL - $O(-near) - [llength $p]]
|
| +
|
| + for {set i $iFirst} {$i <= $iFL} {incr i} {
|
| + if {[lrange $col $i [expr $i+$nPm1]] == $p} { lappend B($iPhrase) $i }
|
| + }
|
| + if {[llength $B($iPhrase)] == 0} break
|
| + }
|
| +
|
| + if {$iPhrase==$nPhrase} {
|
| + for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
|
| + set A($iCol,$iPhrase) [concat $A($iCol,$iPhrase) $B($iPhrase)]
|
| + set A($iCol,$iPhrase) [lsort -integer -uniq $A($iCol,$iPhrase)]
|
| + }
|
| + }
|
| + }
|
| + }
|
| +
|
| + set res [list]
|
| + #puts [array names A]
|
| +
|
| + for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
|
| + for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} {
|
| + foreach a $A($iCol,$iPhrase) {
|
| + lappend res "$counter.$iCol.$a"
|
| + }
|
| + }
|
| + incr counter
|
| + }
|
| +
|
| + #puts $res
|
| + sort_poslist $res
|
| +}
|
| +
|
| +#-------------------------------------------------------------------------
|
| +# Usage:
|
| +#
|
| +# sort_poslist LIST
|
| +#
|
| +# Sort a position list of the type returned by command [nearset]
|
| +#
|
| +proc sort_poslist {L} {
|
| + lsort -command instcompare $L
|
| +}
|
| +proc instcompare {lhs rhs} {
|
| + foreach {p1 c1 o1} [split $lhs .] {}
|
| + foreach {p2 c2 o2} [split $rhs .] {}
|
| +
|
| + set res [expr $c1 - $c2]
|
| + if {$res==0} { set res [expr $o1 - $o2] }
|
| + if {$res==0} { set res [expr $p1 - $p2] }
|
| +
|
| + return $res
|
| +}
|
| +
|
| +#-------------------------------------------------------------------------
|
| +# Logical operators used by the commands returned by fts5_tcl_expr().
|
| +#
|
| +proc AND {args} {
|
| + foreach a $args {
|
| + if {[llength $a]==0} { return [list] }
|
| + }
|
| + sort_poslist [concat {*}$args]
|
| +}
|
| +proc OR {args} {
|
| + sort_poslist [concat {*}$args]
|
| +}
|
| +proc NOT {a b} {
|
| + if {[llength $b]>0} { return [list] }
|
| + return $a
|
| +}
|
| +
|
| +#-------------------------------------------------------------------------
|
| +# This command is similar to [split], except that it also provides the
|
| +# start and end offsets of each token. For example:
|
| +#
|
| +# [fts5_tokenize_split "abc d ef"] -> {abc 0 3 d 4 5 ef 6 8}
|
| +#
|
| +
|
| +proc gobble_whitespace {textvar} {
|
| + upvar $textvar t
|
| + regexp {([ ]*)(.*)} $t -> space t
|
| + return [string length $space]
|
| +}
|
| +
|
| +proc gobble_text {textvar wordvar} {
|
| + upvar $textvar t
|
| + upvar $wordvar w
|
| + regexp {([^ ]*)(.*)} $t -> w t
|
| + return [string length $w]
|
| +}
|
| +
|
| +proc fts5_tokenize_split {text} {
|
| + set token ""
|
| + set ret [list]
|
| + set iOff [gobble_whitespace text]
|
| + while {[set nToken [gobble_text text word]]} {
|
| + lappend ret $word $iOff [expr $iOff+$nToken]
|
| + incr iOff $nToken
|
| + incr iOff [gobble_whitespace text]
|
| + }
|
| +
|
| + set ret
|
| +}
|
| +
|
|
|