Index: third_party/sqlite/sqlite-src-3080704/test/fts3rnd.test |
diff --git a/third_party/sqlite/sqlite-src-3080704/test/fts3rnd.test b/third_party/sqlite/sqlite-src-3080704/test/fts3rnd.test |
deleted file mode 100644 |
index 97af54925f3953e866468ddac7cc3afa8736b471..0000000000000000000000000000000000000000 |
--- a/third_party/sqlite/sqlite-src-3080704/test/fts3rnd.test |
+++ /dev/null |
@@ -1,460 +0,0 @@ |
-# 2009 December 03 |
-# |
-# May you do good and not evil. |
-# May you find forgiveness for yourself and forgive others. |
-# May you share freely, never taking more than you give. |
-# |
-#*********************************************************************** |
-# |
-# Brute force (random data) tests for FTS3. |
-# |
- |
-#------------------------------------------------------------------------- |
-# |
-# The FTS3 tests implemented in this file focus on testing that FTS3 |
-# returns the correct set of documents for various types of full-text |
-# query. This is done using pseudo-randomly generated data and queries. |
-# The expected result of each query is calculated using Tcl code. |
-# |
-# 1. The database is initialized to contain a single table with three |
-# columns. 100 rows are inserted into the table. Each of the three |
-# values in each row is a document consisting of between 0 and 100 |
-# terms. Terms are selected from a vocabulary of $G(nVocab) terms. |
-# |
-# 2. The following is performed 100 times: |
-# |
-# a. A row is inserted into the database. The row contents are |
-# generated as in step 1. The docid is a pseudo-randomly selected |
-# value between 0 and 1000000. |
-# |
-# b. A psuedo-randomly selected row is updated. One of its columns is |
-# set to contain a new document generated in the same way as the |
-# documents in step 1. |
-# |
-# c. A psuedo-randomly selected row is deleted. |
-# |
-# d. For each of several types of fts3 queries, 10 SELECT queries |
-# of the form: |
-# |
-# SELECT docid FROM <tbl> WHERE <tbl> MATCH '<query>' |
-# |
-# are evaluated. The results are compared to those calculated by |
-# Tcl code in this file. The patterns used for the different query |
-# types are: |
-# |
-# 1. query = <term> |
-# 2. query = <prefix> |
-# 3. query = "<term> <term>" |
-# 4. query = "<term> <term> <term>" |
-# 5. query = "<prefix> <prefix> <prefix>" |
-# 6. query = <term> NEAR <term> |
-# 7. query = <term> NEAR/11 <term> NEAR/11 <term> |
-# 8. query = <term> OR <term> |
-# 9. query = <term> NOT <term> |
-# 10. query = <term> AND <term> |
-# 11. query = <term> NEAR <term> OR <term> NEAR <term> |
-# 12. query = <term> NEAR <term> NOT <term> NEAR <term> |
-# 13. query = <term> NEAR <term> AND <term> NEAR <term> |
-# |
-# where <term> is a term psuedo-randomly selected from the vocabulary |
-# and prefix is the first 2 characters of such a term followed by |
-# a "*" character. |
-# |
-# Every second iteration, steps (a) through (d) above are performed |
-# within a single transaction. This forces the queries in (d) to |
-# read data from both the database and the in-memory hash table |
-# that caches the full-text index entries created by steps (a), (b) |
-# and (c) until the transaction is committed. |
-# |
-# The procedure above is run 5 times, using advisory fts3 node sizes of 50, |
-# 500, 1000 and 2000 bytes. |
-# |
-# After the test using an advisory node-size of 50, an OOM test is run using |
-# the database. This test is similar to step (d) above, except that it tests |
-# the effects of transient and persistent OOM conditions encountered while |
-# executing each query. |
-# |
- |
-set testdir [file dirname $argv0] |
-source $testdir/tester.tcl |
- |
-# If this build does not include FTS3, skip the tests in this file. |
-# |
-ifcapable !fts3 { finish_test ; return } |
-source $testdir/fts3_common.tcl |
-source $testdir/malloc_common.tcl |
- |
-set G(nVocab) 100 |
- |
-set nVocab 100 |
-set lVocab [list] |
- |
-expr srand(0) |
- |
-# Generate a vocabulary of nVocab words. Each word is 3 characters long. |
-# |
-set lChar {a b c d e f g h i j k l m n o p q r s t u v w x y z} |
-for {set i 0} {$i < $nVocab} {incr i} { |
- set len [expr int(rand()*3)+2] |
- set word [lindex $lChar [expr int(rand()*26)]] |
- append word [lindex $lChar [expr int(rand()*26)]] |
- if {$len>2} { append word [lindex $lChar [expr int(rand()*26)]] } |
- if {$len>3} { append word [lindex $lChar [expr int(rand()*26)]] } |
- lappend lVocab $word |
-} |
- |
-proc random_term {} { |
- lindex $::lVocab [expr {int(rand()*$::nVocab)}] |
-} |
- |
-# Return a document consisting of $nWord arbitrarily selected terms |
-# from the $::lVocab list. |
-# |
-proc generate_doc {nWord} { |
- set doc [list] |
- for {set i 0} {$i < $nWord} {incr i} { |
- lappend doc [random_term] |
- } |
- return $doc |
-} |
- |
- |
- |
-# Primitives to update the table. |
-# |
-unset -nocomplain t1 |
-proc insert_row {rowid} { |
- set a [generate_doc [expr int((rand()*100))]] |
- set b [generate_doc [expr int((rand()*100))]] |
- set c [generate_doc [expr int((rand()*100))]] |
- execsql { INSERT INTO t1(docid, a, b, c) VALUES($rowid, $a, $b, $c) } |
- set ::t1($rowid) [list $a $b $c] |
-} |
-proc delete_row {rowid} { |
- execsql { DELETE FROM t1 WHERE rowid = $rowid } |
- catch {unset ::t1($rowid)} |
-} |
-proc update_row {rowid} { |
- set cols {a b c} |
- set iCol [expr int(rand()*3)] |
- set doc [generate_doc [expr int((rand()*100))]] |
- lset ::t1($rowid) $iCol $doc |
- execsql "UPDATE t1 SET [lindex $cols $iCol] = \$doc WHERE rowid = \$rowid" |
-} |
- |
-proc simple_phrase {zPrefix} { |
- set ret [list] |
- |
- set reg [string map {* {[^ ]*}} $zPrefix] |
- set reg " $reg " |
- |
- foreach key [lsort -integer [array names ::t1]] { |
- set value $::t1($key) |
- set cnt [list] |
- foreach col $value { |
- if {[regexp $reg " $col "]} { lappend ret $key ; break } |
- } |
- } |
- |
- #lsort -uniq -integer $ret |
- set ret |
-} |
- |
-# This [proc] is used to test the FTS3 matchinfo() function. |
-# |
-proc simple_token_matchinfo {zToken bDesc} { |
- |
- set nDoc(0) 0 |
- set nDoc(1) 0 |
- set nDoc(2) 0 |
- set nHit(0) 0 |
- set nHit(1) 0 |
- set nHit(2) 0 |
- |
- set dir -inc |
- if {$bDesc} { set dir -dec } |
- |
- foreach key [array names ::t1] { |
- set value $::t1($key) |
- set a($key) [list] |
- foreach i {0 1 2} col $value { |
- set hit [llength [lsearch -all $col $zToken]] |
- lappend a($key) $hit |
- incr nHit($i) $hit |
- if {$hit>0} { incr nDoc($i) } |
- } |
- } |
- |
- set ret [list] |
- foreach docid [lsort -integer $dir [array names a]] { |
- if { [lindex [lsort -integer $a($docid)] end] } { |
- set matchinfo [list 1 3] |
- foreach i {0 1 2} hit $a($docid) { |
- lappend matchinfo $hit $nHit($i) $nDoc($i) |
- } |
- lappend ret $docid $matchinfo |
- } |
- } |
- |
- set ret |
-} |
- |
-proc simple_near {termlist nNear} { |
- set ret [list] |
- |
- foreach {key value} [array get ::t1] { |
- foreach v $value { |
- |
- set l [lsearch -exact -all $v [lindex $termlist 0]] |
- foreach T [lrange $termlist 1 end] { |
- set l2 [list] |
- foreach i $l { |
- set iStart [expr $i - $nNear - 1] |
- set iEnd [expr $i + $nNear + 1] |
- if {$iStart < 0} {set iStart 0} |
- foreach i2 [lsearch -exact -all [lrange $v $iStart $iEnd] $T] { |
- incr i2 $iStart |
- if {$i2 != $i} { lappend l2 $i2 } |
- } |
- } |
- set l [lsort -uniq -integer $l2] |
- } |
- |
- if {[llength $l]} { |
-#puts "MATCH($key): $v" |
- lappend ret $key |
- } |
- } |
- } |
- |
- lsort -unique -integer $ret |
-} |
- |
-# The following three procs: |
-# |
-# setup_not A B |
-# setup_or A B |
-# setup_and A B |
-# |
-# each take two arguments. Both arguments must be lists of integer values |
-# sorted by value. The return value is the list produced by evaluating |
-# the equivalent of "A op B", where op is the FTS3 operator NOT, OR or |
-# AND. |
-# |
-proc setop_not {A B} { |
- foreach b $B { set n($b) {} } |
- set ret [list] |
- foreach a $A { if {![info exists n($a)]} {lappend ret $a} } |
- return $ret |
-} |
-proc setop_or {A B} { |
- lsort -integer -uniq [concat $A $B] |
-} |
-proc setop_and {A B} { |
- foreach b $B { set n($b) {} } |
- set ret [list] |
- foreach a $A { if {[info exists n($a)]} {lappend ret $a} } |
- return $ret |
-} |
- |
-proc mit {blob} { |
- set scan(littleEndian) i* |
- set scan(bigEndian) I* |
- binary scan $blob $scan($::tcl_platform(byteOrder)) r |
- return $r |
-} |
-db func mit mit |
-set sqlite_fts3_enable_parentheses 1 |
- |
-proc do_orderbydocid_test {tn sql res} { |
- uplevel [list do_select_test $tn.asc "$sql ORDER BY docid ASC" $res] |
- uplevel [list do_select_test $tn.desc "$sql ORDER BY docid DESC" \ |
- [lsort -int -dec $res] |
- ] |
-} |
- |
-set NUM_TRIALS 100 |
- |
-foreach {nodesize order} { |
- 50 DESC |
- 50 ASC |
- 500 ASC |
- 1000 DESC |
- 2000 ASC |
-} { |
- catch { array unset ::t1 } |
- set testname "$nodesize/$order" |
- |
- # Create the FTS3 table. Populate it (and the Tcl array) with 100 rows. |
- # |
- db transaction { |
- catchsql { DROP TABLE t1 } |
- execsql "CREATE VIRTUAL TABLE t1 USING fts4(a, b, c, order=$order)" |
- execsql "INSERT INTO t1(t1) VALUES('nodesize=$nodesize')" |
- for {set i 0} {$i < 100} {incr i} { insert_row $i } |
- } |
- |
- for {set iTest 1} {$iTest <= $NUM_TRIALS} {incr iTest} { |
- catchsql COMMIT |
- |
- set DO_MALLOC_TEST 0 |
- set nRep 10 |
- if {$iTest==100 && $nodesize==50} { |
- set DO_MALLOC_TEST 1 |
- set nRep 2 |
- } |
- |
- set ::testprefix fts3rnd-1.$testname.$iTest |
- |
- # Delete one row, update one row and insert one row. |
- # |
- set rows [array names ::t1] |
- set nRow [llength $rows] |
- set iUpdate [lindex $rows [expr {int(rand()*$nRow)}]] |
- set iDelete $iUpdate |
- while {$iDelete == $iUpdate} { |
- set iDelete [lindex $rows [expr {int(rand()*$nRow)}]] |
- } |
- set iInsert $iUpdate |
- while {[info exists ::t1($iInsert)]} { |
- set iInsert [expr {int(rand()*1000000)}] |
- } |
- execsql BEGIN |
- insert_row $iInsert |
- update_row $iUpdate |
- delete_row $iDelete |
- if {0==($iTest%2)} { execsql COMMIT } |
- |
- if {0==($iTest%2)} { |
- #do_test 0 { fts3_integrity_check t1 } ok |
- } |
- |
- # Pick 10 terms from the vocabulary. Check that the results of querying |
- # the database for the set of documents containing each of these terms |
- # is the same as the result obtained by scanning the contents of the Tcl |
- # array for each term. |
- # |
- for {set i 0} {$i < 10} {incr i} { |
- set term [random_term] |
- do_select_test 1.$i.asc { |
- SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH $term |
- ORDER BY docid ASC |
- } [simple_token_matchinfo $term 0] |
- do_select_test 1.$i.desc { |
- SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH $term |
- ORDER BY docid DESC |
- } [simple_token_matchinfo $term 1] |
- } |
- |
- # This time, use the first two characters of each term as a term prefix |
- # to query for. Test that querying the Tcl array produces the same results |
- # as querying the FTS3 table for the prefix. |
- # |
- for {set i 0} {$i < $nRep} {incr i} { |
- set prefix [string range [random_term] 0 end-1] |
- set match "${prefix}*" |
- do_orderbydocid_test 2.$i { |
- SELECT docid FROM t1 WHERE t1 MATCH $match |
- } [simple_phrase $match] |
- } |
- |
- # Similar to the above, except for phrase queries. |
- # |
- for {set i 0} {$i < $nRep} {incr i} { |
- set term [list [random_term] [random_term]] |
- set match "\"$term\"" |
- do_orderbydocid_test 3.$i { |
- SELECT docid FROM t1 WHERE t1 MATCH $match |
- } [simple_phrase $term] |
- } |
- |
- # Three word phrases. |
- # |
- for {set i 0} {$i < $nRep} {incr i} { |
- set term [list [random_term] [random_term] [random_term]] |
- set match "\"$term\"" |
- do_orderbydocid_test 4.$i { |
- SELECT docid FROM t1 WHERE t1 MATCH $match |
- } [simple_phrase $term] |
- } |
- |
- # Three word phrases made up of term-prefixes. |
- # |
- for {set i 0} {$i < $nRep} {incr i} { |
- set query "[string range [random_term] 0 end-1]* " |
- append query "[string range [random_term] 0 end-1]* " |
- append query "[string range [random_term] 0 end-1]*" |
- |
- set match "\"$query\"" |
- do_orderbydocid_test 5.$i { |
- SELECT docid FROM t1 WHERE t1 MATCH $match |
- } [simple_phrase $query] |
- } |
- |
- # A NEAR query with terms as the arguments: |
- # |
- # ... MATCH '$term1 NEAR $term2' ... |
- # |
- for {set i 0} {$i < $nRep} {incr i} { |
- set terms [list [random_term] [random_term]] |
- set match [join $terms " NEAR "] |
- do_orderbydocid_test 6.$i { |
- SELECT docid FROM t1 WHERE t1 MATCH $match |
- } [simple_near $terms 10] |
- } |
- |
- # A 3-way NEAR query with terms as the arguments. |
- # |
- for {set i 0} {$i < $nRep} {incr i} { |
- set terms [list [random_term] [random_term] [random_term]] |
- set nNear 11 |
- set match [join $terms " NEAR/$nNear "] |
- do_orderbydocid_test 7.$i { |
- SELECT docid FROM t1 WHERE t1 MATCH $match |
- } [simple_near $terms $nNear] |
- } |
- |
- # Set operations on simple term queries. |
- # |
- foreach {tn op proc} { |
- 8 OR setop_or |
- 9 NOT setop_not |
- 10 AND setop_and |
- } { |
- for {set i 0} {$i < $nRep} {incr i} { |
- set term1 [random_term] |
- set term2 [random_term] |
- set match "$term1 $op $term2" |
- do_orderbydocid_test $tn.$i { |
- SELECT docid FROM t1 WHERE t1 MATCH $match |
- } [$proc [simple_phrase $term1] [simple_phrase $term2]] |
- } |
- } |
- |
- # Set operations on NEAR queries. |
- # |
- foreach {tn op proc} { |
- 11 OR setop_or |
- 12 NOT setop_not |
- 13 AND setop_and |
- } { |
- for {set i 0} {$i < $nRep} {incr i} { |
- set term1 [random_term] |
- set term2 [random_term] |
- set term3 [random_term] |
- set term4 [random_term] |
- set match "$term1 NEAR $term2 $op $term3 NEAR $term4" |
- do_orderbydocid_test $tn.$i { |
- SELECT docid FROM t1 WHERE t1 MATCH $match |
- } [$proc \ |
- [simple_near [list $term1 $term2] 10] \ |
- [simple_near [list $term3 $term4] 10] |
- ] |
- } |
- } |
- |
- catchsql COMMIT |
- } |
-} |
- |
-finish_test |