| Index: third_party/sqlite/src/ext/fts5/test/fts5_common.tcl
|
| diff --git a/third_party/sqlite/src/ext/fts5/test/fts5_common.tcl b/third_party/sqlite/src/ext/fts5/test/fts5_common.tcl
|
| index 2c64b3b9a4b42ca4718d86bdbd8cc6e5038b0e77..0f371dcfd9d90e2e61cc3908cd05ccf157c3b83e 100644
|
| --- a/third_party/sqlite/src/ext/fts5/test/fts5_common.tcl
|
| +++ b/third_party/sqlite/src/ext/fts5/test/fts5_common.tcl
|
| @@ -15,6 +15,16 @@ if {![info exists testdir]} {
|
| }
|
| source $testdir/tester.tcl
|
|
|
| +ifcapable !fts5 {
|
| + proc return_if_no_fts5 {} {
|
| + finish_test
|
| + return -code return
|
| + }
|
| + return
|
| +} else {
|
| + proc return_if_no_fts5 {} {}
|
| +}
|
| +
|
| catch {
|
| sqlite3_fts5_may_be_corrupt 0
|
| reset_db
|
| @@ -28,6 +38,29 @@ proc fts5_test_poslist {cmd} {
|
| set res
|
| }
|
|
|
| +proc fts5_test_poslist2 {cmd} {
|
| + set res [list]
|
| +
|
| + for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} {
|
| + $cmd xPhraseForeach $i c o {
|
| + lappend res $i.$c.$o
|
| + }
|
| + }
|
| +
|
| + #set res
|
| + sort_poslist $res
|
| +}
|
| +
|
| +proc fts5_test_collist {cmd} {
|
| + set res [list]
|
| +
|
| + for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} {
|
| + $cmd xPhraseColumnForeach $i c { lappend res $i.$c }
|
| + }
|
| +
|
| + set res
|
| +}
|
| +
|
| proc fts5_test_columnsize {cmd} {
|
| set res [list]
|
| for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
|
| @@ -113,6 +146,8 @@ proc fts5_aux_test_functions {db} {
|
| fts5_test_columntext
|
| fts5_test_columntotalsize
|
| fts5_test_poslist
|
| + fts5_test_poslist2
|
| + fts5_test_collist
|
| fts5_test_tokenize
|
| fts5_test_rowcount
|
| fts5_test_all
|
| @@ -124,6 +159,12 @@ proc fts5_aux_test_functions {db} {
|
| }
|
| }
|
|
|
| +proc fts5_segcount {tbl} {
|
| + set N 0
|
| + foreach n [fts5_level_segs $tbl] { incr N $n }
|
| + set N
|
| +}
|
| +
|
| proc fts5_level_segs {tbl} {
|
| set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10"
|
| set ret [list]
|
| @@ -178,15 +219,24 @@ proc fts5_rnddoc {n} {
|
| # -near N (NEAR distance. Default 10)
|
| # -col C (List of column indexes to match against)
|
| # -pc VARNAME (variable in caller frame to use for phrase numbering)
|
| +# -dict VARNAME (array in caller frame to use for synonyms)
|
| #
|
| proc nearset {aCol args} {
|
| +
|
| + # Process the command line options.
|
| + #
|
| set O(-near) 10
|
| set O(-col) {}
|
| set O(-pc) ""
|
| + set O(-dict) ""
|
|
|
| set nOpt [lsearch -exact $args --]
|
| if {$nOpt<0} { error "no -- option" }
|
|
|
| + # Set $lPhrase to be a list of phrases. $nPhrase its length.
|
| + set lPhrase [lrange $args [expr $nOpt+1] end]
|
| + set nPhrase [llength $lPhrase]
|
| +
|
| foreach {k v} [lrange $args 0 [expr $nOpt-1]] {
|
| if {[info exists O($k)]==0} { error "unrecognized option $k" }
|
| set O($k) $v
|
| @@ -198,9 +248,7 @@ proc nearset {aCol args} {
|
| upvar $O(-pc) counter
|
| }
|
|
|
| - # Set $phraselist to be a list of phrases. $nPhrase its length.
|
| - set phraselist [lrange $args [expr $nOpt+1] end]
|
| - set nPhrase [llength $phraselist]
|
| + if {$O(-dict)!=""} { upvar $O(-dict) aDict }
|
|
|
| for {set j 0} {$j < [llength $aCol]} {incr j} {
|
| for {set i 0} {$i < $nPhrase} {incr i} {
|
| @@ -208,27 +256,54 @@ proc nearset {aCol args} {
|
| }
|
| }
|
|
|
| - set iCol -1
|
| - foreach col $aCol {
|
| - incr iCol
|
| - if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue
|
| - set nToken [llength $col]
|
| + # Loop through each column of the current row.
|
| + for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} {
|
|
|
| - set iFL [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)]
|
| - for { } {$iFL < $nToken} {incr iFL} {
|
| - for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
|
| - set B($iPhrase) [list]
|
| - }
|
| + # If there is a column filter, test whether this column is excluded. If
|
| + # so, skip to the next iteration of this loop. Otherwise, set zCol to the
|
| + # column value and nToken to the number of tokens that comprise it.
|
| + if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue
|
| + set zCol [lindex $aCol $iCol]
|
| + set nToken [llength $zCol]
|
| +
|
| + # Each iteration of the following loop searches a substring of the
|
| + # column value for phrase matches. The last token of the substring
|
| + # is token $iLast of the column value. The first token is:
|
| + #
|
| + # iFirst = ($iLast - $O(-near) - 1)
|
| + #
|
| + # where $sz is the length of the phrase being searched for. A phrase
|
| + # counts as matching the substring if its first token lies on or before
|
| + # $iLast and its last token on or after $iFirst.
|
| + #
|
| + # For example, if the query is "NEAR(a+b c, 2)" and the column value:
|
| + #
|
| + # "x x x x A B x x C x"
|
| + # 0 1 2 3 4 5 6 7 8 9"
|
| + #
|
| + # when (iLast==8 && iFirst=5) the range will contain both phrases and
|
| + # so both instances can be added to the output poslists.
|
| + #
|
| + set iLast [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)]
|
| + for { } {$iLast < $nToken} {incr iLast} {
|
| +
|
| + catch { array unset B }
|
|
|
| for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
|
| - set p [lindex $phraselist $iPhrase]
|
| + set p [lindex $lPhrase $iPhrase]
|
| set nPm1 [expr {[llength $p] - 1}]
|
| - set iFirst [expr $iFL - $O(-near) - [llength $p]]
|
| -
|
| - for {set i $iFirst} {$i <= $iFL} {incr i} {
|
| - if {[lrange $col $i [expr $i+$nPm1]] == $p} { lappend B($iPhrase) $i }
|
| + set iFirst [expr $iLast - $O(-near) - [llength $p]]
|
| +
|
| + for {set i $iFirst} {$i <= $iLast} {incr i} {
|
| + set lCand [lrange $zCol $i [expr $i+$nPm1]]
|
| + set bMatch 1
|
| + foreach tok $p term $lCand {
|
| + if {[nearset_match aDict $tok $term]==0} { set bMatch 0 ; break }
|
| + }
|
| + if {$bMatch} { lappend B($iPhrase) $i }
|
| }
|
| - if {[llength $B($iPhrase)] == 0} break
|
| +
|
| + if {![info exists B($iPhrase)]} break
|
| }
|
|
|
| if {$iPhrase==$nPhrase} {
|
| @@ -252,10 +327,22 @@ proc nearset {aCol args} {
|
| incr counter
|
| }
|
|
|
| - #puts $res
|
| + #puts "$aCol -> $res"
|
| sort_poslist $res
|
| }
|
|
|
| +proc nearset_match {aDictVar tok term} {
|
| + if {[string match $tok $term]} { return 1 }
|
| +
|
| + upvar $aDictVar aDict
|
| + if {[info exists aDict($tok)]} {
|
| + foreach s $aDict($tok) {
|
| + if {[string match $s $term]} { return 1 }
|
| + }
|
| + }
|
| + return 0;
|
| +}
|
| +
|
| #-------------------------------------------------------------------------
|
| # Usage:
|
| #
|
| @@ -327,3 +414,234 @@ proc fts5_tokenize_split {text} {
|
| set ret
|
| }
|
|
|
| +#-------------------------------------------------------------------------
|
| +#
|
| +proc foreach_detail_mode {prefix script} {
|
| + set saved $::testprefix
|
| + foreach d [list full col none] {
|
| + set s [string map [list %DETAIL% $d] $script]
|
| + set ::detail $d
|
| + set ::testprefix "$prefix-$d"
|
| + reset_db
|
| + uplevel $s
|
| + unset ::detail
|
| + }
|
| + set ::testprefix $saved
|
| +}
|
| +
|
| +proc detail_check {} {
|
| + if {$::detail != "none" && $::detail!="full" && $::detail!="col"} {
|
| + error "not in foreach_detail_mode {...} block"
|
| + }
|
| +}
|
| +proc detail_is_none {} { detail_check ; expr {$::detail == "none"} }
|
| +proc detail_is_col {} { detail_check ; expr {$::detail == "col" } }
|
| +proc detail_is_full {} { detail_check ; expr {$::detail == "full"} }
|
| +
|
| +
|
| +#-------------------------------------------------------------------------
|
| +# Convert a poslist of the type returned by fts5_test_poslist() to a
|
| +# collist as returned by fts5_test_collist().
|
| +#
|
| +proc fts5_poslist2collist {poslist} {
|
| + set res [list]
|
| + foreach h $poslist {
|
| + regexp {(.*)\.[1234567890]+} $h -> cand
|
| + lappend res $cand
|
| + }
|
| + set res [lsort -command fts5_collist_elem_compare -unique $res]
|
| + return $res
|
| +}
|
| +
|
| +# Comparison function used by fts5_poslist2collist to sort collist entries.
|
| +proc fts5_collist_elem_compare {a b} {
|
| + foreach {a1 a2} [split $a .] {}
|
| + foreach {b1 b2} [split $b .] {}
|
| +
|
| + if {$a1==$b1} { return [expr $a2 - $b2] }
|
| + return [expr $a1 - $b1]
|
| +}
|
| +
|
| +
|
| +#--------------------------------------------------------------------------
|
| +# Construct and return a tcl list equivalent to that returned by the SQL
|
| +# query executed against database handle [db]:
|
| +#
|
| +# SELECT
|
| +# rowid,
|
| +# fts5_test_poslist($tbl),
|
| +# fts5_test_collist($tbl)
|
| +# FROM $tbl('$expr')
|
| +# ORDER BY rowid $order;
|
| +#
|
| +proc fts5_query_data {expr tbl {order ASC} {aDictVar ""}} {
|
| +
|
| + # Figure out the set of columns in the FTS5 table. This routine does
|
| + # not handle tables with UNINDEXED columns, but if it did, it would
|
| + # have to be here.
|
| + db eval "PRAGMA table_info = $tbl" x { lappend lCols $x(name) }
|
| +
|
| + set d ""
|
| + if {$aDictVar != ""} {
|
| + upvar $aDictVar aDict
|
| + set d aDict
|
| + }
|
| +
|
| + set cols ""
|
| + foreach e $lCols { append cols ", '$e'" }
|
| + set tclexpr [db one [subst -novar {
|
| + SELECT fts5_expr_tcl( $expr, 'nearset $cols -dict $d -pc ::pc' [set cols] )
|
| + }]]
|
| +
|
| + set res [list]
|
| + db eval "SELECT rowid, * FROM $tbl ORDER BY rowid $order" x {
|
| + set cols [list]
|
| + foreach col $lCols { lappend cols $x($col) }
|
| +
|
| + set ::pc 0
|
| + set rowdata [eval $tclexpr]
|
| + if {$rowdata != ""} {
|
| + lappend res $x(rowid) $rowdata [fts5_poslist2collist $rowdata]
|
| + }
|
| + }
|
| +
|
| + set res
|
| +}
|
| +
|
| +#-------------------------------------------------------------------------
|
| +# Similar to [fts5_query_data], but omit the collist field.
|
| +#
|
| +proc fts5_poslist_data {expr tbl {order ASC} {aDictVar ""}} {
|
| + set res [list]
|
| +
|
| + if {$aDictVar!=""} {
|
| + upvar $aDictVar aDict
|
| + set dict aDict
|
| + } else {
|
| + set dict ""
|
| + }
|
| +
|
| + foreach {rowid poslist collist} [fts5_query_data $expr $tbl $order $dict] {
|
| + lappend res $rowid $poslist
|
| + }
|
| + set res
|
| +}
|
| +
|
| +proc fts5_collist_data {expr tbl {order ASC} {aDictVar ""}} {
|
| + set res [list]
|
| +
|
| + if {$aDictVar!=""} {
|
| + upvar $aDictVar aDict
|
| + set dict aDict
|
| + } else {
|
| + set dict ""
|
| + }
|
| +
|
| + foreach {rowid poslist collist} [fts5_query_data $expr $tbl $order $dict] {
|
| + lappend res $rowid $collist
|
| + }
|
| + set res
|
| +}
|
| +
|
| +#-------------------------------------------------------------------------
|
| +#
|
| +
|
| +# This command will only work inside a [foreach_detail_mode] block. It tests
|
| +# whether or not expression $expr run on FTS5 table $tbl is supported by
|
| +# the current mode. If so, 1 is returned. If not, 0.
|
| +#
|
| +# detail=full (all queries supported)
|
| +# detail=col (all but phrase queries and NEAR queries)
|
| +# detail=none (all but phrase queries, NEAR queries, and column filters)
|
| +#
|
| +proc fts5_expr_ok {expr tbl} {
|
| +
|
| + if {![detail_is_full]} {
|
| + set nearset "nearset_rc"
|
| + if {[detail_is_col]} { set nearset "nearset_rf" }
|
| +
|
| + set ::expr_not_ok 0
|
| + db eval "PRAGMA table_info = $tbl" x { lappend lCols $x(name) }
|
| +
|
| + set cols ""
|
| + foreach e $lCols { append cols ", '$e'" }
|
| + set ::pc 0
|
| + set tclexpr [db one [subst -novar {
|
| + SELECT fts5_expr_tcl( $expr, '[set nearset] $cols -pc ::pc' [set cols] )
|
| + }]]
|
| + eval $tclexpr
|
| + if {$::expr_not_ok} { return 0 }
|
| + }
|
| +
|
| + return 1
|
| +}
|
| +
|
| +# Helper for [fts5_expr_ok]
|
| +proc nearset_rf {aCol args} {
|
| + set idx [lsearch -exact $args --]
|
| + if {$idx != [llength $args]-2 || [llength [lindex $args end]]!=1} {
|
| + set ::expr_not_ok 1
|
| + }
|
| + list
|
| +}
|
| +
|
| +# Helper for [fts5_expr_ok]
|
| +proc nearset_rc {aCol args} {
|
| + nearset_rf $aCol {*}$args
|
| + if {[lsearch $args -col]>=0} {
|
| + set ::expr_not_ok 1
|
| + }
|
| + list
|
| +}
|
| +
|
| +
|
| +#-------------------------------------------------------------------------
|
| +# Code for a simple Tcl tokenizer that supports synonyms at query time.
|
| +#
|
| +proc tclnum_tokenize {mode tflags text} {
|
| + foreach {w iStart iEnd} [fts5_tokenize_split $text] {
|
| + sqlite3_fts5_token $w $iStart $iEnd
|
| + if {$tflags == $mode && [info exists ::tclnum_syn($w)]} {
|
| + foreach s $::tclnum_syn($w) { sqlite3_fts5_token -colo $s $iStart $iEnd }
|
| + }
|
| + }
|
| +}
|
| +
|
| +proc tclnum_create {args} {
|
| + set mode query
|
| + if {[llength $args]} {
|
| + set mode [lindex $args 0]
|
| + }
|
| + if {$mode != "query" && $mode != "document"} { error "bad mode: $mode" }
|
| + return [list tclnum_tokenize $mode]
|
| +}
|
| +
|
| +proc fts5_tclnum_register {db} {
|
| + foreach SYNDICT {
|
| + {zero 0}
|
| + {one 1 i}
|
| + {two 2 ii}
|
| + {three 3 iii}
|
| + {four 4 iv}
|
| + {five 5 v}
|
| + {six 6 vi}
|
| + {seven 7 vii}
|
| + {eight 8 viii}
|
| + {nine 9 ix}
|
| +
|
| + {a1 a2 a3 a4 a5 a6 a7 a8 a9}
|
| + {b1 b2 b3 b4 b5 b6 b7 b8 b9}
|
| + {c1 c2 c3 c4 c5 c6 c7 c8 c9}
|
| + } {
|
| + foreach s $SYNDICT {
|
| + set o [list]
|
| + foreach x $SYNDICT {if {$x!=$s} {lappend o $x}}
|
| + set ::tclnum_syn($s) $o
|
| + }
|
| + }
|
| + sqlite3_fts5_create_tokenizer db tclnum tclnum_create
|
| +}
|
| +#
|
| +# End of tokenizer code.
|
| +#-------------------------------------------------------------------------
|
| +
|
|
|