third_party/sqlite/src/test/fts3rnd.test - Issue 6990047: Import SQLite 3.7.6.3.

Unified Diff: third_party/sqlite/src/test/fts3rnd.test

Issue 6990047: Import SQLite 3.7.6.3. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 9 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: third_party/sqlite/src/test/fts3rnd.test

diff --git a/third_party/sqlite/src/test/fts3rnd.test b/third_party/sqlite/src/test/fts3rnd.test

new file mode 100644

index 0000000000000000000000000000000000000000..0909cee614d089491b2487099a4d62d5813a98fc

--- /dev/null

+++ b/third_party/sqlite/src/test/fts3rnd.test

@@ -0,0 +1,434 @@

+# 2009 December 03

+# May you do good and not evil.

+# May you find forgiveness for yourself and forgive others.

+# May you share freely, never taking more than you give.

+#***********************************************************************

+# Brute force (random data) tests for FTS3.

+#-------------------------------------------------------------------------

+# The FTS3 tests implemented in this file focus on testing that FTS3

+# returns the correct set of documents for various types of full-text

+# query. This is done using pseudo-randomly generated data and queries.

+# The expected result of each query is calculated using Tcl code.

+# 1. The database is initialized to contain a single table with three

+# columns. 100 rows are inserted into the table. Each of the three

+# values in each row is a document consisting of between 0 and 100

+# terms. Terms are selected from a vocabulary of $G(nVocab) terms.

+# 2. The following is performed 100 times:

+# a. A row is inserted into the database. The row contents are

+# generated as in step 1. The docid is a pseudo-randomly selected

+# value between 0 and 1000000.

+# b. A psuedo-randomly selected row is updated. One of its columns is

+# set to contain a new document generated in the same way as the

+# documents in step 1.

+# c. A psuedo-randomly selected row is deleted.

+# d. For each of several types of fts3 queries, 10 SELECT queries

+# of the form:

+# SELECT docid FROM <tbl> WHERE <tbl> MATCH '<query>'

+# are evaluated. The results are compared to those calculated by

+# Tcl code in this file. The patterns used for the different query

+# types are:

+# 1. query = <term>

+# 2. query = <prefix>

+# 3. query = "<term> <term>"

+# 4. query = "<term> <term> <term>"

+# 5. query = "<prefix> <prefix> <prefix>"

+# 6. query = <term> NEAR <term>

+# 7. query = <term> NEAR/11 <term> NEAR/11 <term>

+# 8. query = <term> OR <term>

+# 9. query = <term> NOT <term>

+# 10. query = <term> AND <term>

+# 11. query = <term> NEAR <term> OR <term> NEAR <term>

+# 12. query = <term> NEAR <term> NOT <term> NEAR <term>

+# 13. query = <term> NEAR <term> AND <term> NEAR <term>

+# where <term> is a term psuedo-randomly selected from the vocabulary

+# and prefix is the first 2 characters of such a term followed by

+# a "*" character.

+# Every second iteration, steps (a) through (d) above are performed

+# within a single transaction. This forces the queries in (d) to

+# read data from both the database and the in-memory hash table

+# that caches the full-text index entries created by steps (a), (b)

+# and (c) until the transaction is committed.

+# The procedure above is run 5 times, using advisory fts3 node sizes of 50,

+# 500, 1000 and 2000 bytes.

+# After the test using an advisory node-size of 50, an OOM test is run using

+# the database. This test is similar to step (d) above, except that it tests

+# the effects of transient and persistent OOM conditions encountered while

+# executing each query.

+set testdir [file dirname $argv0]

+source $testdir/tester.tcl

+# If this build does not include FTS3, skip the tests in this file.

+ifcapable !fts3 { finish_test ; return }

+source $testdir/fts3_common.tcl

+source $testdir/malloc_common.tcl

+set G(nVocab) 100

+set nVocab 100

+set lVocab [list]

+expr srand(0)

+# Generate a vocabulary of nVocab words. Each word is 3 characters long.

+set lChar {a b c d e f g h i j k l m n o p q r s t u v w x y z}

+for {set i 0} {$i < $nVocab} {incr i} {

+ set len [expr int(rand()*3)+2]

+ set word [lindex $lChar [expr int(rand()*26)]]

+ append word [lindex $lChar [expr int(rand()*26)]]

+ if {$len>2} { append word [lindex $lChar [expr int(rand()*26)]] }

+ if {$len>3} { append word [lindex $lChar [expr int(rand()*26)]] }

+ lappend lVocab $word

+proc random_term {} {

+ lindex $::lVocab [expr {int(rand()*$::nVocab)}]

+# Return a document consisting of $nWord arbitrarily selected terms

+# from the $::lVocab list.

+proc generate_doc {nWord} {

+ set doc [list]

+ for {set i 0} {$i < $nWord} {incr i} {

+ lappend doc [random_term]

+ }

+ return $doc

+# Primitives to update the table.

+unset -nocomplain t1

+proc insert_row {rowid} {

+ set a [generate_doc [expr int((rand()*100))]]

+ set b [generate_doc [expr int((rand()*100))]]

+ set c [generate_doc [expr int((rand()*100))]]

+ execsql { INSERT INTO t1(docid, a, b, c) VALUES($rowid, $a, $b, $c) }

+ set ::t1($rowid) [list $a $b $c]

+proc delete_row {rowid} {

+ execsql { DELETE FROM t1 WHERE rowid = $rowid }

+ catch {unset ::t1($rowid)}

+proc update_row {rowid} {

+ set cols {a b c}

+ set iCol [expr int(rand()*3)]

+ set doc [generate_doc [expr int((rand()*100))]]

+ lset ::t1($rowid) $iCol $doc

+ execsql "UPDATE t1 SET [lindex $cols $iCol] = \$doc WHERE rowid = \$rowid"

+proc simple_phrase {zPrefix} {

+ set ret [list]

+ set reg [string map {* {[^ ]*}} $zPrefix]

+ set reg " $reg "

+ foreach key [lsort -integer [array names ::t1]] {

+ set value $::t1($key)

+ set cnt [list]

+ foreach col $value {

+ if {[regexp $reg " $col "]} { lappend ret $key ; break }

+ }

+ #lsort -uniq -integer $ret

+ set ret

+# This [proc] is used to test the FTS3 matchinfo() function.

+proc simple_token_matchinfo {zToken} {

+ set nDoc(0) 0

+ set nDoc(1) 0

+ set nDoc(2) 0

+ set nHit(0) 0

+ set nHit(1) 0

+ set nHit(2) 0

+ foreach key [array names ::t1] {

+ set value $::t1($key)

+ set a($key) [list]

+ foreach i {0 1 2} col $value {

+ set hit [llength [lsearch -all $col $zToken]]

+ lappend a($key) $hit

+ incr nHit($i) $hit

+ if {$hit>0} { incr nDoc($i) }

+ }

+ set ret [list]

+ foreach docid [lsort -integer [array names a]] {

+ if { [lindex [lsort -integer $a($docid)] end] } {

+ set matchinfo [list 1 3]

+ foreach i {0 1 2} hit $a($docid) {

+ lappend matchinfo $hit $nHit($i) $nDoc($i)

+ }

+ lappend ret $docid $matchinfo

+ }

+ set ret

+proc simple_near {termlist nNear} {

+ set ret [list]

+ foreach {key value} [array get ::t1] {

+ foreach v $value {

+ set l [lsearch -exact -all $v [lindex $termlist 0]]

+ foreach T [lrange $termlist 1 end] {

+ set l2 [list]

+ foreach i $l {

+ set iStart [expr $i - $nNear - 1]

+ set iEnd [expr $i + $nNear + 1]

+ if {$iStart < 0} {set iStart 0}

+ foreach i2 [lsearch -exact -all [lrange $v $iStart $iEnd] $T] {

+ incr i2 $iStart

+ if {$i2 != $i} { lappend l2 $i2 }

+ }

+ set l [lsort -uniq -integer $l2]

+ }

+ if {[llength $l]} {

+#puts "MATCH($key): $v"

+ lappend ret $key

+ }

+ lsort -unique -integer $ret

+# The following three procs:

+# setup_not A B

+# setup_or A B

+# setup_and A B

+# each take two arguments. Both arguments must be lists of integer values

+# sorted by value. The return value is the list produced by evaluating

+# the equivalent of "A op B", where op is the FTS3 operator NOT, OR or

+# AND.

+proc setop_not {A B} {

+ foreach b $B { set n($b) {} }

+ set ret [list]

+ foreach a $A { if {![info exists n($a)]} {lappend ret $a} }

+ return $ret

+proc setop_or {A B} {

+ lsort -integer -uniq [concat $A $B]

+proc setop_and {A B} {

+ foreach b $B { set n($b) {} }

+ set ret [list]

+ foreach a $A { if {[info exists n($a)]} {lappend ret $a} }

+ return $ret

+proc mit {blob} {

+ set scan(littleEndian) i*

+ set scan(bigEndian) I*

+ binary scan $blob $scan($::tcl_platform(byteOrder)) r

+ return $r

+db func mit mit

+set sqlite_fts3_enable_parentheses 1

+foreach nodesize {50 500 1000 2000} {

+ catch { array unset ::t1 }

+ # Create the FTS3 table. Populate it (and the Tcl array) with 100 rows.

+ #

+ db transaction {

+ catchsql { DROP TABLE t1 }

+ execsql "CREATE VIRTUAL TABLE t1 USING fts3(a, b, c)"

+ execsql "INSERT INTO t1(t1) VALUES('nodesize=$nodesize')"

+ for {set i 0} {$i < 100} {incr i} { insert_row $i }

+ }

+ for {set iTest 1} {$iTest <= 100} {incr iTest} {

+ catchsql COMMIT

+ set DO_MALLOC_TEST 0

+ set nRep 10

+ if {$iTest==100 && $nodesize==50} {

+ set DO_MALLOC_TEST 1

+ set nRep 2

+ }

+ # Delete one row, update one row and insert one row.

+ #

+ set rows [array names ::t1]

+ set nRow [llength $rows]

+ set iUpdate [lindex $rows [expr {int(rand()*$nRow)}]]

+ set iDelete $iUpdate

+ while {$iDelete == $iUpdate} {

+ set iDelete [lindex $rows [expr {int(rand()*$nRow)}]]

+ }

+ set iInsert $iUpdate

+ while {[info exists ::t1($iInsert)]} {

+ set iInsert [expr {int(rand()*1000000)}]

+ }

+ execsql BEGIN

+ insert_row $iInsert

+ update_row $iUpdate

+ delete_row $iDelete

+ if {0==($iTest%2)} { execsql COMMIT }

+ if {0==($iTest%2)} {

+ do_test fts3rnd-1.$nodesize.$iTest.0 { fts3_integrity_check t1 } ok

+ }

+ # Pick 10 terms from the vocabulary. Check that the results of querying

+ # the database for the set of documents containing each of these terms

+ # is the same as the result obtained by scanning the contents of the Tcl

+ # array for each term.

+ #

+ for {set i 0} {$i < 10} {incr i} {

+ set term [random_term]

+ do_select_test fts3rnd-1.$nodesize.$iTest.1.$i {

+ SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH $term

+ } [simple_token_matchinfo $term]

+ }

+ # This time, use the first two characters of each term as a term prefix

+ # to query for. Test that querying the Tcl array produces the same results

+ # as querying the FTS3 table for the prefix.

+ #

+ for {set i 0} {$i < $nRep} {incr i} {

+ set prefix [string range [random_term] 0 end-1]

+ set match "${prefix}*"

+ do_select_test fts3rnd-1.$nodesize.$iTest.2.$i {

+ SELECT docid FROM t1 WHERE t1 MATCH $match

+ } [simple_phrase $match]

+ }

+ # Similar to the above, except for phrase queries.

+ #

+ for {set i 0} {$i < $nRep} {incr i} {

+ set term [list [random_term] [random_term]]

+ set match "\"$term\""

+ do_select_test fts3rnd-1.$nodesize.$iTest.3.$i {

+ SELECT docid FROM t1 WHERE t1 MATCH $match

+ } [simple_phrase $term]

+ }

+ # Three word phrases.

+ #

+ for {set i 0} {$i < $nRep} {incr i} {

+ set term [list [random_term] [random_term] [random_term]]

+ set match "\"$term\""

+ do_select_test fts3rnd-1.$nodesize.$iTest.4.$i {

+ SELECT docid FROM t1 WHERE t1 MATCH $match

+ } [simple_phrase $term]

+ }

+ # Three word phrases made up of term-prefixes.

+ #

+ for {set i 0} {$i < $nRep} {incr i} {

+ set query "[string range [random_term] 0 end-1]* "

+ append query "[string range [random_term] 0 end-1]* "

+ append query "[string range [random_term] 0 end-1]*"

+ set match "\"$query\""

+ do_select_test fts3rnd-1.$nodesize.$iTest.5.$i {

+ SELECT docid FROM t1 WHERE t1 MATCH $match

+ } [simple_phrase $query]

+ }

+ # A NEAR query with terms as the arguments.

+ #

+ for {set i 0} {$i < $nRep} {incr i} {

+ set terms [list [random_term] [random_term]]

+ set match [join $terms " NEAR "]

+ do_select_test fts3rnd-1.$nodesize.$iTest.6.$i {

+ SELECT docid FROM t1 WHERE t1 MATCH $match

+ } [simple_near $terms 10]

+ }

+ # A 3-way NEAR query with terms as the arguments.

+ #

+ for {set i 0} {$i < $nRep} {incr i} {

+ set terms [list [random_term] [random_term] [random_term]]

+ set nNear 11

+ set match [join $terms " NEAR/$nNear "]

+ do_select_test fts3rnd-1.$nodesize.$iTest.7.$i {

+ SELECT docid FROM t1 WHERE t1 MATCH $match

+ } [simple_near $terms $nNear]

+ }

+ # Set operations on simple term queries.

+ #

+ foreach {tn op proc} {

+ 8 OR setop_or

+ 9 NOT setop_not

+ 10 AND setop_and

+ } {

+ for {set i 0} {$i < $nRep} {incr i} {

+ set term1 [random_term]

+ set term2 [random_term]

+ set match "$term1 $op $term2"

+ do_select_test fts3rnd-1.$nodesize.$iTest.$tn.$i {

+ SELECT docid FROM t1 WHERE t1 MATCH $match

+ } [$proc [simple_phrase $term1] [simple_phrase $term2]]

+ }

+ # Set operations on NEAR queries.

+ #

+ foreach {tn op proc} {

+ 8 OR setop_or

+ 9 NOT setop_not

+ 10 AND setop_and

+ } {

+ for {set i 0} {$i < $nRep} {incr i} {

+ set term1 [random_term]

+ set term2 [random_term]

+ set term3 [random_term]

+ set term4 [random_term]

+ set match "$term1 NEAR $term2 $op $term3 NEAR $term4"

+ do_select_test fts3rnd-1.$nodesize.$iTest.$tn.$i {

+ SELECT docid FROM t1 WHERE t1 MATCH $match

+ } [$proc \

+ [simple_near [list $term1 $term2] 10] \

+ [simple_near [list $term3 $term4] 10]

+ ]

+ }

+ catchsql COMMIT

+ }

+finish_test

« no previous file with comments | « third_party/sqlite/src/test/fts3query.test ('k') | third_party/sqlite/src/test/fts3shared.test » ('j') | no next file with comments »