third_party/sqlite/src/test/fts3rnd.test - Issue 6990047: Import SQLite 3.7.6.3.

Side by Side Diff: third_party/sqlite/src/test/fts3rnd.test

Issue 6990047: Import SQLite 3.7.6.3. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 9 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 # 2009 December 03

	2 #

	3 # May you do good and not evil.

	4 # May you find forgiveness for yourself and forgive others.

	5 # May you share freely, never taking more than you give.

	6 #

	7 #***********************************************************************

	8 #

	9 # Brute force (random data) tests for FTS3.

	10 #

	11

	12 #-------------------------------------------------------------------------

	13 #

	14 # The FTS3 tests implemented in this file focus on testing that FTS3

	15 # returns the correct set of documents for various types of full-text

	16 # query. This is done using pseudo-randomly generated data and queries.

	17 # The expected result of each query is calculated using Tcl code.

	18 #

	19 # 1. The database is initialized to contain a single table with three

	20 # columns. 100 rows are inserted into the table. Each of the three

	21 # values in each row is a document consisting of between 0 and 100

	22 # terms. Terms are selected from a vocabulary of $G(nVocab) terms.

	23 #

	24 # 2. The following is performed 100 times:

	25 #

	26 # a. A row is inserted into the database. The row contents are

	27 # generated as in step 1. The docid is a pseudo-randomly selected

	28 # value between 0 and 1000000.

	29 #

	30 # b. A psuedo-randomly selected row is updated. One of its columns is

	31 # set to contain a new document generated in the same way as the

	32 # documents in step 1.

	33 #

	34 # c. A psuedo-randomly selected row is deleted.

	35 #

	36 # d. For each of several types of fts3 queries, 10 SELECT queries

	37 # of the form:

	38 #

	39 # SELECT docid FROM <tbl> WHERE <tbl> MATCH '<query>'

	40 #

	41 # are evaluated. The results are compared to those calculated by

	42 # Tcl code in this file. The patterns used for the different query

	43 # types are:

	44 #

	45 # 1. query = <term>

	46 # 2. query = <prefix>

	47 # 3. query = "<term> <term>"

	48 # 4. query = "<term> <term> <term>"

	49 # 5. query = "<prefix> <prefix> <prefix>"

	50 # 6. query = <term> NEAR <term>

	51 # 7. query = <term> NEAR/11 <term> NEAR/11 <term>

	52 # 8. query = <term> OR <term>

	53 # 9. query = <term> NOT <term>

	54 # 10. query = <term> AND <term>

	55 # 11. query = <term> NEAR <term> OR <term> NEAR <term>

	56 # 12. query = <term> NEAR <term> NOT <term> NEAR <term>

	57 # 13. query = <term> NEAR <term> AND <term> NEAR <term>

	58 #

	59 # where <term> is a term psuedo-randomly selected from the vocabulary

	60 # and prefix is the first 2 characters of such a term followed by

	61 # a "*" character.

	62 #

	63 # Every second iteration, steps (a) through (d) above are performed

	64 # within a single transaction. This forces the queries in (d) to

	65 # read data from both the database and the in-memory hash table

	66 # that caches the full-text index entries created by steps (a), (b)

	67 # and (c) until the transaction is committed.

	68 #

	69 # The procedure above is run 5 times, using advisory fts3 node sizes of 50,

	70 # 500, 1000 and 2000 bytes.

	71 #

	72 # After the test using an advisory node-size of 50, an OOM test is run using

	73 # the database. This test is similar to step (d) above, except that it tests

	74 # the effects of transient and persistent OOM conditions encountered while

	75 # executing each query.

	76 #

	77

	78 set testdir [file dirname $argv0]

	79 source $testdir/tester.tcl

	80

	81 # If this build does not include FTS3, skip the tests in this file.

	82 #

	83 ifcapable !fts3 { finish_test ; return }

	84 source $testdir/fts3_common.tcl

	85 source $testdir/malloc_common.tcl

	86

	87 set G(nVocab) 100

	88

	89 set nVocab 100

	90 set lVocab [list]

	91

	92 expr srand(0)

	93

	94 # Generate a vocabulary of nVocab words. Each word is 3 characters long.

	95 #

	96 set lChar {a b c d e f g h i j k l m n o p q r s t u v w x y z}

	97 for {set i 0} {$i < $nVocab} {incr i} {

	98 set len [expr int(rand()*3)+2]

	99 set word [lindex $lChar [expr int(rand()*26)]]

	100 append word [lindex $lChar [expr int(rand()*26)]]

	101 if {$len>2} { append word [lindex $lChar [expr int(rand()*26)]] }

	102 if {$len>3} { append word [lindex $lChar [expr int(rand()*26)]] }

	103 lappend lVocab $word

	104 }

	105

	106 proc random_term {} {

	107 lindex $::lVocab [expr {int(rand()*$::nVocab)}]

	108 }

	109

	110 # Return a document consisting of $nWord arbitrarily selected terms

	111 # from the $::lVocab list.

	112 #

	113 proc generate_doc {nWord} {

	114 set doc [list]

	115 for {set i 0} {$i < $nWord} {incr i} {

	116 lappend doc [random_term]

	117 }

	118 return $doc

	119 }

	120

	121

	122

	123 # Primitives to update the table.

	124 #

	125 unset -nocomplain t1

	126 proc insert_row {rowid} {

	127 set a [generate_doc [expr int((rand()*100))]]

	128 set b [generate_doc [expr int((rand()*100))]]

	129 set c [generate_doc [expr int((rand()*100))]]

	130 execsql { INSERT INTO t1(docid, a, b, c) VALUES($rowid, $a, $b, $c) }

	131 set ::t1($rowid) [list $a $b $c]

	132 }

	133 proc delete_row {rowid} {

	134 execsql { DELETE FROM t1 WHERE rowid = $rowid }

	135 catch {unset ::t1($rowid)}

	136 }

	137 proc update_row {rowid} {

	138 set cols {a b c}

	139 set iCol [expr int(rand()*3)]

	140 set doc [generate_doc [expr int((rand()*100))]]

	141 lset ::t1($rowid) $iCol $doc

	142 execsql "UPDATE t1 SET [lindex $cols $iCol] = \$doc WHERE rowid = \$rowid"

	143 }

	144

	145 proc simple_phrase {zPrefix} {

	146 set ret [list]

	147

	148 set reg [string map {* {[^ ]*}} $zPrefix]

	149 set reg " $reg "

	150

	151 foreach key [lsort -integer [array names ::t1]] {

	152 set value $::t1($key)

	153 set cnt [list]

	154 foreach col $value {

	155 if {[regexp $reg " $col "]} { lappend ret $key ; break }

	156 }

	157 }

	158

	159 #lsort -uniq -integer $ret

	160 set ret

	161 }

	162

	163 # This [proc] is used to test the FTS3 matchinfo() function.

	164 #

	165 proc simple_token_matchinfo {zToken} {

	166

	167 set nDoc(0) 0

	168 set nDoc(1) 0

	169 set nDoc(2) 0

	170 set nHit(0) 0

	171 set nHit(1) 0

	172 set nHit(2) 0

	173

	174

	175 foreach key [array names ::t1] {

	176 set value $::t1($key)

	177 set a($key) [list]

	178 foreach i {0 1 2} col $value {

	179 set hit [llength [lsearch -all $col $zToken]]

	180 lappend a($key) $hit

	181 incr nHit($i) $hit

	182 if {$hit>0} { incr nDoc($i) }

	183 }

	184 }

	185

	186 set ret [list]

	187 foreach docid [lsort -integer [array names a]] {

	188 if { [lindex [lsort -integer $a($docid)] end] } {

	189 set matchinfo [list 1 3]

	190 foreach i {0 1 2} hit $a($docid) {

	191 lappend matchinfo $hit $nHit($i) $nDoc($i)

	192 }

	193 lappend ret $docid $matchinfo

	194 }

	195 }

	196

	197 set ret

	198 }

	199

	200 proc simple_near {termlist nNear} {

	201 set ret [list]

	202

	203 foreach {key value} [array get ::t1] {

	204 foreach v $value {

	205

	206 set l [lsearch -exact -all $v [lindex $termlist 0]]

	207 foreach T [lrange $termlist 1 end] {

	208 set l2 [list]

	209 foreach i $l {

	210 set iStart [expr $i - $nNear - 1]

	211 set iEnd [expr $i + $nNear + 1]

	212 if {$iStart < 0} {set iStart 0}

	213 foreach i2 [lsearch -exact -all [lrange $v $iStart $iEnd] $T] {

	214 incr i2 $iStart

	215 if {$i2 != $i} { lappend l2 $i2 }

	216 }

	217 }

	218 set l [lsort -uniq -integer $l2]

	219 }

	220

	221 if {[llength $l]} {

	222 #puts "MATCH($key): $v"

	223 lappend ret $key

	224 }

	225 }

	226 }

	227

	228 lsort -unique -integer $ret

	229 }

	230

	231 # The following three procs:

	232 #

	233 # setup_not A B

	234 # setup_or A B

	235 # setup_and A B

	236 #

	237 # each take two arguments. Both arguments must be lists of integer values

	238 # sorted by value. The return value is the list produced by evaluating

	239 # the equivalent of "A op B", where op is the FTS3 operator NOT, OR or

	240 # AND.

	241 #

	242 proc setop_not {A B} {

	243 foreach b $B { set n($b) {} }

	244 set ret [list]

	245 foreach a $A { if {![info exists n($a)]} {lappend ret $a} }

	246 return $ret

	247 }

	248 proc setop_or {A B} {

	249 lsort -integer -uniq [concat $A $B]

	250 }

	251 proc setop_and {A B} {

	252 foreach b $B { set n($b) {} }

	253 set ret [list]

	254 foreach a $A { if {[info exists n($a)]} {lappend ret $a} }

	255 return $ret

	256 }

	257

	258 proc mit {blob} {

	259 set scan(littleEndian) i*

	260 set scan(bigEndian) I*

	261 binary scan $blob $scan($::tcl_platform(byteOrder)) r

	262 return $r

	263 }

	264 db func mit mit

	265

	266 set sqlite_fts3_enable_parentheses 1

	267

	268 foreach nodesize {50 500 1000 2000} {

	269 catch { array unset ::t1 }

	270

	271 # Create the FTS3 table. Populate it (and the Tcl array) with 100 rows.

	272 #

	273 db transaction {

	274 catchsql { DROP TABLE t1 }

	275 execsql "CREATE VIRTUAL TABLE t1 USING fts3(a, b, c)"

	276 execsql "INSERT INTO t1(t1) VALUES('nodesize=$nodesize')"

	277 for {set i 0} {$i < 100} {incr i} { insert_row $i }

	278 }

	279

	280 for {set iTest 1} {$iTest <= 100} {incr iTest} {

	281 catchsql COMMIT

	282

	283 set DO_MALLOC_TEST 0

	284 set nRep 10

	285 if {$iTest==100 && $nodesize==50} {

	286 set DO_MALLOC_TEST 1

	287 set nRep 2

	288 }

	289

	290 # Delete one row, update one row and insert one row.

	291 #

	292 set rows [array names ::t1]

	293 set nRow [llength $rows]

	294 set iUpdate [lindex $rows [expr {int(rand()*$nRow)}]]

	295 set iDelete $iUpdate

	296 while {$iDelete == $iUpdate} {

	297 set iDelete [lindex $rows [expr {int(rand()*$nRow)}]]

	298 }

	299 set iInsert $iUpdate

	300 while {[info exists ::t1($iInsert)]} {

	301 set iInsert [expr {int(rand()*1000000)}]

	302 }

	303 execsql BEGIN

	304 insert_row $iInsert

	305 update_row $iUpdate

	306 delete_row $iDelete

	307 if {0==($iTest%2)} { execsql COMMIT }

	308

	309 if {0==($iTest%2)} {

	310 do_test fts3rnd-1.$nodesize.$iTest.0 { fts3_integrity_check t1 } ok

	311 }

	312

	313 # Pick 10 terms from the vocabulary. Check that the results of querying

	314 # the database for the set of documents containing each of these terms

	315 # is the same as the result obtained by scanning the contents of the Tcl

	316 # array for each term.

	317 #

	318 for {set i 0} {$i < 10} {incr i} {

	319 set term [random_term]

	320 do_select_test fts3rnd-1.$nodesize.$iTest.1.$i {

	321 SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH $term

	322 } [simple_token_matchinfo $term]

	323 }

	324

	325 # This time, use the first two characters of each term as a term prefix

	326 # to query for. Test that querying the Tcl array produces the same results

	327 # as querying the FTS3 table for the prefix.

	328 #

	329 for {set i 0} {$i < $nRep} {incr i} {

	330 set prefix [string range [random_term] 0 end-1]

	331 set match "${prefix}*"

	332 do_select_test fts3rnd-1.$nodesize.$iTest.2.$i {

	333 SELECT docid FROM t1 WHERE t1 MATCH $match

	334 } [simple_phrase $match]

	335 }

	336

	337 # Similar to the above, except for phrase queries.

	338 #

	339 for {set i 0} {$i < $nRep} {incr i} {

	340 set term [list [random_term] [random_term]]

	341 set match "\"$term\""

	342 do_select_test fts3rnd-1.$nodesize.$iTest.3.$i {

	343 SELECT docid FROM t1 WHERE t1 MATCH $match

	344 } [simple_phrase $term]

	345 }

	346

	347 # Three word phrases.

	348 #

	349 for {set i 0} {$i < $nRep} {incr i} {

	350 set term [list [random_term] [random_term] [random_term]]

	351 set match "\"$term\""

	352 do_select_test fts3rnd-1.$nodesize.$iTest.4.$i {

	353 SELECT docid FROM t1 WHERE t1 MATCH $match

	354 } [simple_phrase $term]

	355 }

	356

	357 # Three word phrases made up of term-prefixes.

	358 #

	359 for {set i 0} {$i < $nRep} {incr i} {

	360 set query "[string range [random_term] 0 end-1]* "

	361 append query "[string range [random_term] 0 end-1]* "

	362 append query "[string range [random_term] 0 end-1]*"

	363

	364 set match "\"$query\""

	365 do_select_test fts3rnd-1.$nodesize.$iTest.5.$i {

	366 SELECT docid FROM t1 WHERE t1 MATCH $match

	367 } [simple_phrase $query]

	368 }

	369

	370 # A NEAR query with terms as the arguments.

	371 #

	372 for {set i 0} {$i < $nRep} {incr i} {

	373 set terms [list [random_term] [random_term]]

	374 set match [join $terms " NEAR "]

	375 do_select_test fts3rnd-1.$nodesize.$iTest.6.$i {

	376 SELECT docid FROM t1 WHERE t1 MATCH $match

	377 } [simple_near $terms 10]

	378 }

	379

	380 # A 3-way NEAR query with terms as the arguments.

	381 #

	382 for {set i 0} {$i < $nRep} {incr i} {

	383 set terms [list [random_term] [random_term] [random_term]]

	384 set nNear 11

	385 set match [join $terms " NEAR/$nNear "]

	386 do_select_test fts3rnd-1.$nodesize.$iTest.7.$i {

	387 SELECT docid FROM t1 WHERE t1 MATCH $match

	388 } [simple_near $terms $nNear]

	389 }

	390

	391 # Set operations on simple term queries.

	392 #

	393 foreach {tn op proc} {

	394 8 OR setop_or

	395 9 NOT setop_not

	396 10 AND setop_and

	397 } {

	398 for {set i 0} {$i < $nRep} {incr i} {

	399 set term1 [random_term]

	400 set term2 [random_term]

	401 set match "$term1 $op $term2"

	402 do_select_test fts3rnd-1.$nodesize.$iTest.$tn.$i {

	403 SELECT docid FROM t1 WHERE t1 MATCH $match

	404 } [$proc [simple_phrase $term1] [simple_phrase $term2]]

	405 }

	406 }

	407

	408 # Set operations on NEAR queries.

	409 #

	410 foreach {tn op proc} {

	411 8 OR setop_or

	412 9 NOT setop_not

	413 10 AND setop_and

	414 } {

	415 for {set i 0} {$i < $nRep} {incr i} {

	416 set term1 [random_term]

	417 set term2 [random_term]

	418 set term3 [random_term]

	419 set term4 [random_term]

	420 set match "$term1 NEAR $term2 $op $term3 NEAR $term4"

	421 do_select_test fts3rnd-1.$nodesize.$iTest.$tn.$i {

	422 SELECT docid FROM t1 WHERE t1 MATCH $match

	423 } [$proc \

	424 [simple_near [list $term1 $term2] 10] \

	425 [simple_near [list $term3 $term4] 10]

	426 ]

	427 }

	428 }

	429

	430 catchsql COMMIT

	431 }

	432 }

	433

	434 finish_test

OLD	NEW

« no previous file with comments | « third_party/sqlite/src/test/fts3query.test ('k') | third_party/sqlite/src/test/fts3shared.test » ('j') | no next file with comments »