Index: third_party/sqlite/src/ext/fts5/test/fts5_common.tcl |
diff --git a/third_party/sqlite/src/ext/fts5/test/fts5_common.tcl b/third_party/sqlite/src/ext/fts5/test/fts5_common.tcl |
index 2c64b3b9a4b42ca4718d86bdbd8cc6e5038b0e77..0f371dcfd9d90e2e61cc3908cd05ccf157c3b83e 100644 |
--- a/third_party/sqlite/src/ext/fts5/test/fts5_common.tcl |
+++ b/third_party/sqlite/src/ext/fts5/test/fts5_common.tcl |
@@ -15,6 +15,16 @@ if {![info exists testdir]} { |
} |
source $testdir/tester.tcl |
+ifcapable !fts5 { |
+ proc return_if_no_fts5 {} { |
+ finish_test |
+ return -code return |
+ } |
+ return |
+} else { |
+ proc return_if_no_fts5 {} {} |
+} |
+ |
catch { |
sqlite3_fts5_may_be_corrupt 0 |
reset_db |
@@ -28,6 +38,29 @@ proc fts5_test_poslist {cmd} { |
set res |
} |
+proc fts5_test_poslist2 {cmd} { |
+ set res [list] |
+ |
+ for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} { |
+ $cmd xPhraseForeach $i c o { |
+ lappend res $i.$c.$o |
+ } |
+ } |
+ |
+ #set res |
+ sort_poslist $res |
+} |
+ |
+proc fts5_test_collist {cmd} { |
+ set res [list] |
+ |
+ for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} { |
+ $cmd xPhraseColumnForeach $i c { lappend res $i.$c } |
+ } |
+ |
+ set res |
+} |
+ |
proc fts5_test_columnsize {cmd} { |
set res [list] |
for {set i 0} {$i < [$cmd xColumnCount]} {incr i} { |
@@ -113,6 +146,8 @@ proc fts5_aux_test_functions {db} { |
fts5_test_columntext |
fts5_test_columntotalsize |
fts5_test_poslist |
+ fts5_test_poslist2 |
+ fts5_test_collist |
fts5_test_tokenize |
fts5_test_rowcount |
fts5_test_all |
@@ -124,6 +159,12 @@ proc fts5_aux_test_functions {db} { |
} |
} |
+proc fts5_segcount {tbl} { |
+ set N 0 |
+ foreach n [fts5_level_segs $tbl] { incr N $n } |
+ set N |
+} |
+ |
proc fts5_level_segs {tbl} { |
set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10" |
set ret [list] |
@@ -178,15 +219,24 @@ proc fts5_rnddoc {n} { |
# -near N (NEAR distance. Default 10) |
# -col C (List of column indexes to match against) |
# -pc VARNAME (variable in caller frame to use for phrase numbering) |
+# -dict VARNAME (array in caller frame to use for synonyms) |
# |
proc nearset {aCol args} { |
+ |
+ # Process the command line options. |
+ # |
set O(-near) 10 |
set O(-col) {} |
set O(-pc) "" |
+ set O(-dict) "" |
set nOpt [lsearch -exact $args --] |
if {$nOpt<0} { error "no -- option" } |
+ # Set $lPhrase to be a list of phrases. $nPhrase its length. |
+ set lPhrase [lrange $args [expr $nOpt+1] end] |
+ set nPhrase [llength $lPhrase] |
+ |
foreach {k v} [lrange $args 0 [expr $nOpt-1]] { |
if {[info exists O($k)]==0} { error "unrecognized option $k" } |
set O($k) $v |
@@ -198,9 +248,7 @@ proc nearset {aCol args} { |
upvar $O(-pc) counter |
} |
- # Set $phraselist to be a list of phrases. $nPhrase its length. |
- set phraselist [lrange $args [expr $nOpt+1] end] |
- set nPhrase [llength $phraselist] |
+ if {$O(-dict)!=""} { upvar $O(-dict) aDict } |
for {set j 0} {$j < [llength $aCol]} {incr j} { |
for {set i 0} {$i < $nPhrase} {incr i} { |
@@ -208,27 +256,54 @@ proc nearset {aCol args} { |
} |
} |
- set iCol -1 |
- foreach col $aCol { |
- incr iCol |
- if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue |
- set nToken [llength $col] |
+ # Loop through each column of the current row. |
+ for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} { |
- set iFL [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)] |
- for { } {$iFL < $nToken} {incr iFL} { |
- for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { |
- set B($iPhrase) [list] |
- } |
+ # If there is a column filter, test whether this column is excluded. If |
+ # so, skip to the next iteration of this loop. Otherwise, set zCol to the |
+ # column value and nToken to the number of tokens that comprise it. |
+ if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue |
+ set zCol [lindex $aCol $iCol] |
+ set nToken [llength $zCol] |
+ |
+ # Each iteration of the following loop searches a substring of the |
+ # column value for phrase matches. The last token of the substring |
+ # is token $iLast of the column value. The first token is: |
+ # |
+ # iFirst = ($iLast - $O(-near) - 1) |
+ # |
+ # where $sz is the length of the phrase being searched for. A phrase |
+ # counts as matching the substring if its first token lies on or before |
+ # $iLast and its last token on or after $iFirst. |
+ # |
+ # For example, if the query is "NEAR(a+b c, 2)" and the column value: |
+ # |
+ # "x x x x A B x x C x" |
+ # 0 1 2 3 4 5 6 7 8 9" |
+ # |
+ # when (iLast==8 && iFirst=5) the range will contain both phrases and |
+ # so both instances can be added to the output poslists. |
+ # |
+ set iLast [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)] |
+ for { } {$iLast < $nToken} {incr iLast} { |
+ |
+ catch { array unset B } |
for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { |
- set p [lindex $phraselist $iPhrase] |
+ set p [lindex $lPhrase $iPhrase] |
set nPm1 [expr {[llength $p] - 1}] |
- set iFirst [expr $iFL - $O(-near) - [llength $p]] |
- |
- for {set i $iFirst} {$i <= $iFL} {incr i} { |
- if {[lrange $col $i [expr $i+$nPm1]] == $p} { lappend B($iPhrase) $i } |
+ set iFirst [expr $iLast - $O(-near) - [llength $p]] |
+ |
+ for {set i $iFirst} {$i <= $iLast} {incr i} { |
+ set lCand [lrange $zCol $i [expr $i+$nPm1]] |
+ set bMatch 1 |
+ foreach tok $p term $lCand { |
+ if {[nearset_match aDict $tok $term]==0} { set bMatch 0 ; break } |
+ } |
+ if {$bMatch} { lappend B($iPhrase) $i } |
} |
- if {[llength $B($iPhrase)] == 0} break |
+ |
+ if {![info exists B($iPhrase)]} break |
} |
if {$iPhrase==$nPhrase} { |
@@ -252,10 +327,22 @@ proc nearset {aCol args} { |
incr counter |
} |
- #puts $res |
+ #puts "$aCol -> $res" |
sort_poslist $res |
} |
+proc nearset_match {aDictVar tok term} { |
+ if {[string match $tok $term]} { return 1 } |
+ |
+ upvar $aDictVar aDict |
+ if {[info exists aDict($tok)]} { |
+ foreach s $aDict($tok) { |
+ if {[string match $s $term]} { return 1 } |
+ } |
+ } |
+ return 0; |
+} |
+ |
#------------------------------------------------------------------------- |
# Usage: |
# |
@@ -327,3 +414,234 @@ proc fts5_tokenize_split {text} { |
set ret |
} |
+#------------------------------------------------------------------------- |
+# |
+proc foreach_detail_mode {prefix script} { |
+ set saved $::testprefix |
+ foreach d [list full col none] { |
+ set s [string map [list %DETAIL% $d] $script] |
+ set ::detail $d |
+ set ::testprefix "$prefix-$d" |
+ reset_db |
+ uplevel $s |
+ unset ::detail |
+ } |
+ set ::testprefix $saved |
+} |
+ |
+proc detail_check {} { |
+ if {$::detail != "none" && $::detail!="full" && $::detail!="col"} { |
+ error "not in foreach_detail_mode {...} block" |
+ } |
+} |
+proc detail_is_none {} { detail_check ; expr {$::detail == "none"} } |
+proc detail_is_col {} { detail_check ; expr {$::detail == "col" } } |
+proc detail_is_full {} { detail_check ; expr {$::detail == "full"} } |
+ |
+ |
+#------------------------------------------------------------------------- |
+# Convert a poslist of the type returned by fts5_test_poslist() to a |
+# collist as returned by fts5_test_collist(). |
+# |
+proc fts5_poslist2collist {poslist} { |
+ set res [list] |
+ foreach h $poslist { |
+ regexp {(.*)\.[1234567890]+} $h -> cand |
+ lappend res $cand |
+ } |
+ set res [lsort -command fts5_collist_elem_compare -unique $res] |
+ return $res |
+} |
+ |
+# Comparison function used by fts5_poslist2collist to sort collist entries. |
+proc fts5_collist_elem_compare {a b} { |
+ foreach {a1 a2} [split $a .] {} |
+ foreach {b1 b2} [split $b .] {} |
+ |
+ if {$a1==$b1} { return [expr $a2 - $b2] } |
+ return [expr $a1 - $b1] |
+} |
+ |
+ |
+#-------------------------------------------------------------------------- |
+# Construct and return a tcl list equivalent to that returned by the SQL |
+# query executed against database handle [db]: |
+# |
+# SELECT |
+# rowid, |
+# fts5_test_poslist($tbl), |
+# fts5_test_collist($tbl) |
+# FROM $tbl('$expr') |
+# ORDER BY rowid $order; |
+# |
+proc fts5_query_data {expr tbl {order ASC} {aDictVar ""}} { |
+ |
+ # Figure out the set of columns in the FTS5 table. This routine does |
+ # not handle tables with UNINDEXED columns, but if it did, it would |
+ # have to be here. |
+ db eval "PRAGMA table_info = $tbl" x { lappend lCols $x(name) } |
+ |
+ set d "" |
+ if {$aDictVar != ""} { |
+ upvar $aDictVar aDict |
+ set d aDict |
+ } |
+ |
+ set cols "" |
+ foreach e $lCols { append cols ", '$e'" } |
+ set tclexpr [db one [subst -novar { |
+ SELECT fts5_expr_tcl( $expr, 'nearset $cols -dict $d -pc ::pc' [set cols] ) |
+ }]] |
+ |
+ set res [list] |
+ db eval "SELECT rowid, * FROM $tbl ORDER BY rowid $order" x { |
+ set cols [list] |
+ foreach col $lCols { lappend cols $x($col) } |
+ |
+ set ::pc 0 |
+ set rowdata [eval $tclexpr] |
+ if {$rowdata != ""} { |
+ lappend res $x(rowid) $rowdata [fts5_poslist2collist $rowdata] |
+ } |
+ } |
+ |
+ set res |
+} |
+ |
+#------------------------------------------------------------------------- |
+# Similar to [fts5_query_data], but omit the collist field. |
+# |
+proc fts5_poslist_data {expr tbl {order ASC} {aDictVar ""}} { |
+ set res [list] |
+ |
+ if {$aDictVar!=""} { |
+ upvar $aDictVar aDict |
+ set dict aDict |
+ } else { |
+ set dict "" |
+ } |
+ |
+ foreach {rowid poslist collist} [fts5_query_data $expr $tbl $order $dict] { |
+ lappend res $rowid $poslist |
+ } |
+ set res |
+} |
+ |
+proc fts5_collist_data {expr tbl {order ASC} {aDictVar ""}} { |
+ set res [list] |
+ |
+ if {$aDictVar!=""} { |
+ upvar $aDictVar aDict |
+ set dict aDict |
+ } else { |
+ set dict "" |
+ } |
+ |
+ foreach {rowid poslist collist} [fts5_query_data $expr $tbl $order $dict] { |
+ lappend res $rowid $collist |
+ } |
+ set res |
+} |
+ |
+#------------------------------------------------------------------------- |
+# |
+ |
+# This command will only work inside a [foreach_detail_mode] block. It tests |
+# whether or not expression $expr run on FTS5 table $tbl is supported by |
+# the current mode. If so, 1 is returned. If not, 0. |
+# |
+# detail=full (all queries supported) |
+# detail=col (all but phrase queries and NEAR queries) |
+# detail=none (all but phrase queries, NEAR queries, and column filters) |
+# |
+proc fts5_expr_ok {expr tbl} { |
+ |
+ if {![detail_is_full]} { |
+ set nearset "nearset_rc" |
+ if {[detail_is_col]} { set nearset "nearset_rf" } |
+ |
+ set ::expr_not_ok 0 |
+ db eval "PRAGMA table_info = $tbl" x { lappend lCols $x(name) } |
+ |
+ set cols "" |
+ foreach e $lCols { append cols ", '$e'" } |
+ set ::pc 0 |
+ set tclexpr [db one [subst -novar { |
+ SELECT fts5_expr_tcl( $expr, '[set nearset] $cols -pc ::pc' [set cols] ) |
+ }]] |
+ eval $tclexpr |
+ if {$::expr_not_ok} { return 0 } |
+ } |
+ |
+ return 1 |
+} |
+ |
+# Helper for [fts5_expr_ok] |
+proc nearset_rf {aCol args} { |
+ set idx [lsearch -exact $args --] |
+ if {$idx != [llength $args]-2 || [llength [lindex $args end]]!=1} { |
+ set ::expr_not_ok 1 |
+ } |
+ list |
+} |
+ |
+# Helper for [fts5_expr_ok] |
+proc nearset_rc {aCol args} { |
+ nearset_rf $aCol {*}$args |
+ if {[lsearch $args -col]>=0} { |
+ set ::expr_not_ok 1 |
+ } |
+ list |
+} |
+ |
+ |
+#------------------------------------------------------------------------- |
+# Code for a simple Tcl tokenizer that supports synonyms at query time. |
+# |
+proc tclnum_tokenize {mode tflags text} { |
+ foreach {w iStart iEnd} [fts5_tokenize_split $text] { |
+ sqlite3_fts5_token $w $iStart $iEnd |
+ if {$tflags == $mode && [info exists ::tclnum_syn($w)]} { |
+ foreach s $::tclnum_syn($w) { sqlite3_fts5_token -colo $s $iStart $iEnd } |
+ } |
+ } |
+} |
+ |
+proc tclnum_create {args} { |
+ set mode query |
+ if {[llength $args]} { |
+ set mode [lindex $args 0] |
+ } |
+ if {$mode != "query" && $mode != "document"} { error "bad mode: $mode" } |
+ return [list tclnum_tokenize $mode] |
+} |
+ |
+proc fts5_tclnum_register {db} { |
+ foreach SYNDICT { |
+ {zero 0} |
+ {one 1 i} |
+ {two 2 ii} |
+ {three 3 iii} |
+ {four 4 iv} |
+ {five 5 v} |
+ {six 6 vi} |
+ {seven 7 vii} |
+ {eight 8 viii} |
+ {nine 9 ix} |
+ |
+ {a1 a2 a3 a4 a5 a6 a7 a8 a9} |
+ {b1 b2 b3 b4 b5 b6 b7 b8 b9} |
+ {c1 c2 c3 c4 c5 c6 c7 c8 c9} |
+ } { |
+ foreach s $SYNDICT { |
+ set o [list] |
+ foreach x $SYNDICT {if {$x!=$s} {lappend o $x}} |
+ set ::tclnum_syn($s) $o |
+ } |
+ } |
+ sqlite3_fts5_create_tokenizer db tclnum tclnum_create |
+} |
+# |
+# End of tokenizer code. |
+#------------------------------------------------------------------------- |
+ |