Index: third_party/sqlite/src/test/fts3rnd.test |
diff --git a/third_party/sqlite/src/test/fts3rnd.test b/third_party/sqlite/src/test/fts3rnd.test |
new file mode 100644 |
index 0000000000000000000000000000000000000000..0909cee614d089491b2487099a4d62d5813a98fc |
--- /dev/null |
+++ b/third_party/sqlite/src/test/fts3rnd.test |
@@ -0,0 +1,434 @@ |
+# 2009 December 03 |
+# |
+# May you do good and not evil. |
+# May you find forgiveness for yourself and forgive others. |
+# May you share freely, never taking more than you give. |
+# |
+#*********************************************************************** |
+# |
+# Brute force (random data) tests for FTS3. |
+# |
+ |
+#------------------------------------------------------------------------- |
+# |
+# The FTS3 tests implemented in this file focus on testing that FTS3 |
+# returns the correct set of documents for various types of full-text |
+# query. This is done using pseudo-randomly generated data and queries. |
+# The expected result of each query is calculated using Tcl code. |
+# |
+# 1. The database is initialized to contain a single table with three |
+# columns. 100 rows are inserted into the table. Each of the three |
+# values in each row is a document consisting of between 0 and 100 |
+# terms. Terms are selected from a vocabulary of $G(nVocab) terms. |
+# |
+# 2. The following is performed 100 times: |
+# |
+# a. A row is inserted into the database. The row contents are |
+# generated as in step 1. The docid is a pseudo-randomly selected |
+# value between 0 and 1000000. |
+# |
+# b. A psuedo-randomly selected row is updated. One of its columns is |
+# set to contain a new document generated in the same way as the |
+# documents in step 1. |
+# |
+# c. A psuedo-randomly selected row is deleted. |
+# |
+# d. For each of several types of fts3 queries, 10 SELECT queries |
+# of the form: |
+# |
+# SELECT docid FROM <tbl> WHERE <tbl> MATCH '<query>' |
+# |
+# are evaluated. The results are compared to those calculated by |
+# Tcl code in this file. The patterns used for the different query |
+# types are: |
+# |
+# 1. query = <term> |
+# 2. query = <prefix> |
+# 3. query = "<term> <term>" |
+# 4. query = "<term> <term> <term>" |
+# 5. query = "<prefix> <prefix> <prefix>" |
+# 6. query = <term> NEAR <term> |
+# 7. query = <term> NEAR/11 <term> NEAR/11 <term> |
+# 8. query = <term> OR <term> |
+# 9. query = <term> NOT <term> |
+# 10. query = <term> AND <term> |
+# 11. query = <term> NEAR <term> OR <term> NEAR <term> |
+# 12. query = <term> NEAR <term> NOT <term> NEAR <term> |
+# 13. query = <term> NEAR <term> AND <term> NEAR <term> |
+# |
+# where <term> is a term psuedo-randomly selected from the vocabulary |
+# and prefix is the first 2 characters of such a term followed by |
+# a "*" character. |
+# |
+# Every second iteration, steps (a) through (d) above are performed |
+# within a single transaction. This forces the queries in (d) to |
+# read data from both the database and the in-memory hash table |
+# that caches the full-text index entries created by steps (a), (b) |
+# and (c) until the transaction is committed. |
+# |
+# The procedure above is run 5 times, using advisory fts3 node sizes of 50, |
+# 500, 1000 and 2000 bytes. |
+# |
+# After the test using an advisory node-size of 50, an OOM test is run using |
+# the database. This test is similar to step (d) above, except that it tests |
+# the effects of transient and persistent OOM conditions encountered while |
+# executing each query. |
+# |
+ |
+set testdir [file dirname $argv0] |
+source $testdir/tester.tcl |
+ |
+# If this build does not include FTS3, skip the tests in this file. |
+# |
+ifcapable !fts3 { finish_test ; return } |
+source $testdir/fts3_common.tcl |
+source $testdir/malloc_common.tcl |
+ |
+set G(nVocab) 100 |
+ |
+set nVocab 100 |
+set lVocab [list] |
+ |
+expr srand(0) |
+ |
+# Generate a vocabulary of nVocab words. Each word is 3 characters long. |
+# |
+set lChar {a b c d e f g h i j k l m n o p q r s t u v w x y z} |
+for {set i 0} {$i < $nVocab} {incr i} { |
+ set len [expr int(rand()*3)+2] |
+ set word [lindex $lChar [expr int(rand()*26)]] |
+ append word [lindex $lChar [expr int(rand()*26)]] |
+ if {$len>2} { append word [lindex $lChar [expr int(rand()*26)]] } |
+ if {$len>3} { append word [lindex $lChar [expr int(rand()*26)]] } |
+ lappend lVocab $word |
+} |
+ |
+proc random_term {} { |
+ lindex $::lVocab [expr {int(rand()*$::nVocab)}] |
+} |
+ |
+# Return a document consisting of $nWord arbitrarily selected terms |
+# from the $::lVocab list. |
+# |
+proc generate_doc {nWord} { |
+ set doc [list] |
+ for {set i 0} {$i < $nWord} {incr i} { |
+ lappend doc [random_term] |
+ } |
+ return $doc |
+} |
+ |
+ |
+ |
+# Primitives to update the table. |
+# |
+unset -nocomplain t1 |
+proc insert_row {rowid} { |
+ set a [generate_doc [expr int((rand()*100))]] |
+ set b [generate_doc [expr int((rand()*100))]] |
+ set c [generate_doc [expr int((rand()*100))]] |
+ execsql { INSERT INTO t1(docid, a, b, c) VALUES($rowid, $a, $b, $c) } |
+ set ::t1($rowid) [list $a $b $c] |
+} |
+proc delete_row {rowid} { |
+ execsql { DELETE FROM t1 WHERE rowid = $rowid } |
+ catch {unset ::t1($rowid)} |
+} |
+proc update_row {rowid} { |
+ set cols {a b c} |
+ set iCol [expr int(rand()*3)] |
+ set doc [generate_doc [expr int((rand()*100))]] |
+ lset ::t1($rowid) $iCol $doc |
+ execsql "UPDATE t1 SET [lindex $cols $iCol] = \$doc WHERE rowid = \$rowid" |
+} |
+ |
+proc simple_phrase {zPrefix} { |
+ set ret [list] |
+ |
+ set reg [string map {* {[^ ]*}} $zPrefix] |
+ set reg " $reg " |
+ |
+ foreach key [lsort -integer [array names ::t1]] { |
+ set value $::t1($key) |
+ set cnt [list] |
+ foreach col $value { |
+ if {[regexp $reg " $col "]} { lappend ret $key ; break } |
+ } |
+ } |
+ |
+ #lsort -uniq -integer $ret |
+ set ret |
+} |
+ |
+# This [proc] is used to test the FTS3 matchinfo() function. |
+# |
+proc simple_token_matchinfo {zToken} { |
+ |
+ set nDoc(0) 0 |
+ set nDoc(1) 0 |
+ set nDoc(2) 0 |
+ set nHit(0) 0 |
+ set nHit(1) 0 |
+ set nHit(2) 0 |
+ |
+ |
+ foreach key [array names ::t1] { |
+ set value $::t1($key) |
+ set a($key) [list] |
+ foreach i {0 1 2} col $value { |
+ set hit [llength [lsearch -all $col $zToken]] |
+ lappend a($key) $hit |
+ incr nHit($i) $hit |
+ if {$hit>0} { incr nDoc($i) } |
+ } |
+ } |
+ |
+ set ret [list] |
+ foreach docid [lsort -integer [array names a]] { |
+ if { [lindex [lsort -integer $a($docid)] end] } { |
+ set matchinfo [list 1 3] |
+ foreach i {0 1 2} hit $a($docid) { |
+ lappend matchinfo $hit $nHit($i) $nDoc($i) |
+ } |
+ lappend ret $docid $matchinfo |
+ } |
+ } |
+ |
+ set ret |
+} |
+ |
+proc simple_near {termlist nNear} { |
+ set ret [list] |
+ |
+ foreach {key value} [array get ::t1] { |
+ foreach v $value { |
+ |
+ set l [lsearch -exact -all $v [lindex $termlist 0]] |
+ foreach T [lrange $termlist 1 end] { |
+ set l2 [list] |
+ foreach i $l { |
+ set iStart [expr $i - $nNear - 1] |
+ set iEnd [expr $i + $nNear + 1] |
+ if {$iStart < 0} {set iStart 0} |
+ foreach i2 [lsearch -exact -all [lrange $v $iStart $iEnd] $T] { |
+ incr i2 $iStart |
+ if {$i2 != $i} { lappend l2 $i2 } |
+ } |
+ } |
+ set l [lsort -uniq -integer $l2] |
+ } |
+ |
+ if {[llength $l]} { |
+#puts "MATCH($key): $v" |
+ lappend ret $key |
+ } |
+ } |
+ } |
+ |
+ lsort -unique -integer $ret |
+} |
+ |
+# The following three procs: |
+# |
+# setup_not A B |
+# setup_or A B |
+# setup_and A B |
+# |
+# each take two arguments. Both arguments must be lists of integer values |
+# sorted by value. The return value is the list produced by evaluating |
+# the equivalent of "A op B", where op is the FTS3 operator NOT, OR or |
+# AND. |
+# |
+proc setop_not {A B} { |
+ foreach b $B { set n($b) {} } |
+ set ret [list] |
+ foreach a $A { if {![info exists n($a)]} {lappend ret $a} } |
+ return $ret |
+} |
+proc setop_or {A B} { |
+ lsort -integer -uniq [concat $A $B] |
+} |
+proc setop_and {A B} { |
+ foreach b $B { set n($b) {} } |
+ set ret [list] |
+ foreach a $A { if {[info exists n($a)]} {lappend ret $a} } |
+ return $ret |
+} |
+ |
+proc mit {blob} { |
+ set scan(littleEndian) i* |
+ set scan(bigEndian) I* |
+ binary scan $blob $scan($::tcl_platform(byteOrder)) r |
+ return $r |
+} |
+db func mit mit |
+ |
+set sqlite_fts3_enable_parentheses 1 |
+ |
+foreach nodesize {50 500 1000 2000} { |
+ catch { array unset ::t1 } |
+ |
+ # Create the FTS3 table. Populate it (and the Tcl array) with 100 rows. |
+ # |
+ db transaction { |
+ catchsql { DROP TABLE t1 } |
+ execsql "CREATE VIRTUAL TABLE t1 USING fts3(a, b, c)" |
+ execsql "INSERT INTO t1(t1) VALUES('nodesize=$nodesize')" |
+ for {set i 0} {$i < 100} {incr i} { insert_row $i } |
+ } |
+ |
+ for {set iTest 1} {$iTest <= 100} {incr iTest} { |
+ catchsql COMMIT |
+ |
+ set DO_MALLOC_TEST 0 |
+ set nRep 10 |
+ if {$iTest==100 && $nodesize==50} { |
+ set DO_MALLOC_TEST 1 |
+ set nRep 2 |
+ } |
+ |
+ # Delete one row, update one row and insert one row. |
+ # |
+ set rows [array names ::t1] |
+ set nRow [llength $rows] |
+ set iUpdate [lindex $rows [expr {int(rand()*$nRow)}]] |
+ set iDelete $iUpdate |
+ while {$iDelete == $iUpdate} { |
+ set iDelete [lindex $rows [expr {int(rand()*$nRow)}]] |
+ } |
+ set iInsert $iUpdate |
+ while {[info exists ::t1($iInsert)]} { |
+ set iInsert [expr {int(rand()*1000000)}] |
+ } |
+ execsql BEGIN |
+ insert_row $iInsert |
+ update_row $iUpdate |
+ delete_row $iDelete |
+ if {0==($iTest%2)} { execsql COMMIT } |
+ |
+ if {0==($iTest%2)} { |
+ do_test fts3rnd-1.$nodesize.$iTest.0 { fts3_integrity_check t1 } ok |
+ } |
+ |
+ # Pick 10 terms from the vocabulary. Check that the results of querying |
+ # the database for the set of documents containing each of these terms |
+ # is the same as the result obtained by scanning the contents of the Tcl |
+ # array for each term. |
+ # |
+ for {set i 0} {$i < 10} {incr i} { |
+ set term [random_term] |
+ do_select_test fts3rnd-1.$nodesize.$iTest.1.$i { |
+ SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH $term |
+ } [simple_token_matchinfo $term] |
+ } |
+ |
+ # This time, use the first two characters of each term as a term prefix |
+ # to query for. Test that querying the Tcl array produces the same results |
+ # as querying the FTS3 table for the prefix. |
+ # |
+ for {set i 0} {$i < $nRep} {incr i} { |
+ set prefix [string range [random_term] 0 end-1] |
+ set match "${prefix}*" |
+ do_select_test fts3rnd-1.$nodesize.$iTest.2.$i { |
+ SELECT docid FROM t1 WHERE t1 MATCH $match |
+ } [simple_phrase $match] |
+ } |
+ |
+ # Similar to the above, except for phrase queries. |
+ # |
+ for {set i 0} {$i < $nRep} {incr i} { |
+ set term [list [random_term] [random_term]] |
+ set match "\"$term\"" |
+ do_select_test fts3rnd-1.$nodesize.$iTest.3.$i { |
+ SELECT docid FROM t1 WHERE t1 MATCH $match |
+ } [simple_phrase $term] |
+ } |
+ |
+ # Three word phrases. |
+ # |
+ for {set i 0} {$i < $nRep} {incr i} { |
+ set term [list [random_term] [random_term] [random_term]] |
+ set match "\"$term\"" |
+ do_select_test fts3rnd-1.$nodesize.$iTest.4.$i { |
+ SELECT docid FROM t1 WHERE t1 MATCH $match |
+ } [simple_phrase $term] |
+ } |
+ |
+ # Three word phrases made up of term-prefixes. |
+ # |
+ for {set i 0} {$i < $nRep} {incr i} { |
+ set query "[string range [random_term] 0 end-1]* " |
+ append query "[string range [random_term] 0 end-1]* " |
+ append query "[string range [random_term] 0 end-1]*" |
+ |
+ set match "\"$query\"" |
+ do_select_test fts3rnd-1.$nodesize.$iTest.5.$i { |
+ SELECT docid FROM t1 WHERE t1 MATCH $match |
+ } [simple_phrase $query] |
+ } |
+ |
+ # A NEAR query with terms as the arguments. |
+ # |
+ for {set i 0} {$i < $nRep} {incr i} { |
+ set terms [list [random_term] [random_term]] |
+ set match [join $terms " NEAR "] |
+ do_select_test fts3rnd-1.$nodesize.$iTest.6.$i { |
+ SELECT docid FROM t1 WHERE t1 MATCH $match |
+ } [simple_near $terms 10] |
+ } |
+ |
+ # A 3-way NEAR query with terms as the arguments. |
+ # |
+ for {set i 0} {$i < $nRep} {incr i} { |
+ set terms [list [random_term] [random_term] [random_term]] |
+ set nNear 11 |
+ set match [join $terms " NEAR/$nNear "] |
+ do_select_test fts3rnd-1.$nodesize.$iTest.7.$i { |
+ SELECT docid FROM t1 WHERE t1 MATCH $match |
+ } [simple_near $terms $nNear] |
+ } |
+ |
+ # Set operations on simple term queries. |
+ # |
+ foreach {tn op proc} { |
+ 8 OR setop_or |
+ 9 NOT setop_not |
+ 10 AND setop_and |
+ } { |
+ for {set i 0} {$i < $nRep} {incr i} { |
+ set term1 [random_term] |
+ set term2 [random_term] |
+ set match "$term1 $op $term2" |
+ do_select_test fts3rnd-1.$nodesize.$iTest.$tn.$i { |
+ SELECT docid FROM t1 WHERE t1 MATCH $match |
+ } [$proc [simple_phrase $term1] [simple_phrase $term2]] |
+ } |
+ } |
+ |
+ # Set operations on NEAR queries. |
+ # |
+ foreach {tn op proc} { |
+ 8 OR setop_or |
+ 9 NOT setop_not |
+ 10 AND setop_and |
+ } { |
+ for {set i 0} {$i < $nRep} {incr i} { |
+ set term1 [random_term] |
+ set term2 [random_term] |
+ set term3 [random_term] |
+ set term4 [random_term] |
+ set match "$term1 NEAR $term2 $op $term3 NEAR $term4" |
+ do_select_test fts3rnd-1.$nodesize.$iTest.$tn.$i { |
+ SELECT docid FROM t1 WHERE t1 MATCH $match |
+ } [$proc \ |
+ [simple_near [list $term1 $term2] 10] \ |
+ [simple_near [list $term3 $term4] 10] |
+ ] |
+ } |
+ } |
+ |
+ catchsql COMMIT |
+ } |
+} |
+ |
+finish_test |