| Index: third_party/sqlite/src/test/fts3rnd.test
 | 
| diff --git a/third_party/sqlite/src/test/fts3rnd.test b/third_party/sqlite/src/test/fts3rnd.test
 | 
| new file mode 100644
 | 
| index 0000000000000000000000000000000000000000..fe1ea5e72ea00cbd1c3b3a8149ad7e1b1c920dfe
 | 
| --- /dev/null
 | 
| +++ b/third_party/sqlite/src/test/fts3rnd.test
 | 
| @@ -0,0 +1,431 @@
 | 
| +# 2009 December 03
 | 
| +#
 | 
| +#    May you do good and not evil.
 | 
| +#    May you find forgiveness for yourself and forgive others.
 | 
| +#    May you share freely, never taking more than you give.
 | 
| +#
 | 
| +#***********************************************************************
 | 
| +#
 | 
| +# Brute force (random data) tests for FTS3.
 | 
| +#
 | 
| +
 | 
| +#-------------------------------------------------------------------------
 | 
| +#
 | 
| +# The FTS3 tests implemented in this file focus on testing that FTS3
 | 
| +# returns the correct set of documents for various types of full-text
 | 
| +# query. This is done using pseudo-randomly generated data and queries.
 | 
| +# The expected result of each query is calculated using Tcl code.
 | 
| +#
 | 
| +#   1. The database is initialized to contain a single table with three
 | 
| +#      columns. 100 rows are inserted into the table. Each of the three
 | 
| +#      values in each row is a document consisting of between 0 and 100
 | 
| +#      terms. Terms are selected from a vocabulary of $G(nVocab) terms.
 | 
| +#
 | 
| +#   2. The following is performed 100 times:
 | 
| +#
 | 
| +#      a. A row is inserted into the database. The row contents are 
 | 
| +#         generated as in step 1. The docid is a pseudo-randomly selected
 | 
| +#         value between 0 and 1000000.
 | 
| +# 
 | 
| +#      b. A psuedo-randomly selected row is updated. One of its columns is
 | 
| +#         set to contain a new document generated in the same way as the
 | 
| +#         documents in step 1.
 | 
| +# 
 | 
| +#      c. A psuedo-randomly selected row is deleted.
 | 
| +# 
 | 
| +#      d. For each of several types of fts3 queries, 10 SELECT queries
 | 
| +#         of the form:
 | 
| +# 
 | 
| +#           SELECT docid FROM <tbl> WHERE <tbl> MATCH '<query>'
 | 
| +# 
 | 
| +#         are evaluated. The results are compared to those calculated by
 | 
| +#         Tcl code in this file. The patterns used for the different query
 | 
| +#         types are:
 | 
| +# 
 | 
| +#           1.  query = <term>
 | 
| +#           2.  query = <prefix>
 | 
| +#           3.  query = "<term> <term>"
 | 
| +#           4.  query = "<term> <term> <term>"
 | 
| +#           5.  query = "<prefix> <prefix> <prefix>"
 | 
| +#           6.  query = <term> NEAR <term>
 | 
| +#           7.  query = <term> NEAR/11 <term> NEAR/11 <term>
 | 
| +#           8.  query = <term> OR <term>
 | 
| +#           9.  query = <term> NOT <term>
 | 
| +#           10. query = <term> AND <term>
 | 
| +#           11. query = <term> NEAR <term> OR <term> NEAR <term>
 | 
| +#           12. query = <term> NEAR <term> NOT <term> NEAR <term>
 | 
| +#           13. query = <term> NEAR <term> AND <term> NEAR <term>
 | 
| +# 
 | 
| +#         where <term> is a term psuedo-randomly selected from the vocabulary
 | 
| +#         and prefix is the first 2 characters of such a term followed by
 | 
| +#         a "*" character.
 | 
| +#     
 | 
| +#      Every second iteration, steps (a) through (d) above are performed
 | 
| +#      within a single transaction. This forces the queries in (d) to
 | 
| +#      read data from both the database and the in-memory hash table
 | 
| +#      that caches the full-text index entries created by steps (a), (b)
 | 
| +#      and (c) until the transaction is committed.
 | 
| +#
 | 
| +# The procedure above is run 5 times, using advisory fts3 node sizes of 50,
 | 
| +# 500, 1000 and 2000 bytes.
 | 
| +#
 | 
| +# After the test using an advisory node-size of 50, an OOM test is run using
 | 
| +# the database. This test is similar to step (d) above, except that it tests
 | 
| +# the effects of transient and persistent OOM conditions encountered while
 | 
| +# executing each query.
 | 
| +#
 | 
| +
 | 
| +set testdir [file dirname $argv0]
 | 
| +source $testdir/tester.tcl
 | 
| +
 | 
| +# If this build does not include FTS3, skip the tests in this file.
 | 
| +#
 | 
| +ifcapable !fts3 { finish_test ; return }
 | 
| +source $testdir/fts3_common.tcl
 | 
| +source $testdir/malloc_common.tcl
 | 
| +
 | 
| +set G(nVocab) 100
 | 
| +
 | 
| +set nVocab 100
 | 
| +set lVocab [list]
 | 
| +
 | 
| +expr srand(0)
 | 
| +
 | 
| +# Generate a vocabulary of nVocab words. Each word is 3 characters long.
 | 
| +#
 | 
| +set lChar {a b c d e f g h i j k l m n o p q r s t u v w x y z}
 | 
| +for {set i 0} {$i < $nVocab} {incr i} {
 | 
| +  set len [expr int(rand()*3)+2]
 | 
| +  set    word [lindex $lChar [expr int(rand()*26)]]
 | 
| +  append word [lindex $lChar [expr int(rand()*26)]]
 | 
| +  if {$len>2} { append word [lindex $lChar [expr int(rand()*26)]] }
 | 
| +  if {$len>3} { append word [lindex $lChar [expr int(rand()*26)]] }
 | 
| +  lappend lVocab $word
 | 
| +}
 | 
| +
 | 
| +proc random_term {} {
 | 
| +  lindex $::lVocab [expr {int(rand()*$::nVocab)}]
 | 
| +}
 | 
| +
 | 
| +# Return a document consisting of $nWord arbitrarily selected terms
 | 
| +# from the $::lVocab list.
 | 
| +#
 | 
| +proc generate_doc {nWord} {
 | 
| +  set doc [list]
 | 
| +  for {set i 0} {$i < $nWord} {incr i} {
 | 
| +    lappend doc [random_term]
 | 
| +  }
 | 
| +  return $doc
 | 
| +}
 | 
| +
 | 
| +
 | 
| +
 | 
| +# Primitives to update the table.
 | 
| +#
 | 
| +unset -nocomplain t1
 | 
| +proc insert_row {rowid} {
 | 
| +  set a [generate_doc [expr int((rand()*100))]]
 | 
| +  set b [generate_doc [expr int((rand()*100))]]
 | 
| +  set c [generate_doc [expr int((rand()*100))]]
 | 
| +  execsql { INSERT INTO t1(docid, a, b, c) VALUES($rowid, $a, $b, $c) }
 | 
| +  set ::t1($rowid) [list $a $b $c]
 | 
| +}
 | 
| +proc delete_row {rowid} {
 | 
| +  execsql { DELETE FROM t1 WHERE rowid = $rowid }
 | 
| +  catch {unset ::t1($rowid)}
 | 
| +}
 | 
| +proc update_row {rowid} {
 | 
| +  set cols {a b c}
 | 
| +  set iCol [expr int(rand()*3)]
 | 
| +  set doc  [generate_doc [expr int((rand()*100))]]
 | 
| +  lset ::t1($rowid) $iCol $doc
 | 
| +  execsql "UPDATE t1 SET [lindex $cols $iCol] = \$doc WHERE rowid = \$rowid"
 | 
| +}
 | 
| +
 | 
| +proc simple_phrase {zPrefix} {
 | 
| +  set ret [list]
 | 
| +
 | 
| +  set reg [string map {* {[^ ]*}} $zPrefix]
 | 
| +  set reg " $reg "
 | 
| +
 | 
| +  foreach key [lsort -integer [array names ::t1]] {
 | 
| +    set value $::t1($key)
 | 
| +    set cnt [list]
 | 
| +    foreach col $value {
 | 
| +      if {[regexp $reg " $col "]} { lappend ret $key ; break }
 | 
| +    }
 | 
| +  }
 | 
| +
 | 
| +  #lsort -uniq -integer $ret
 | 
| +  set ret
 | 
| +}
 | 
| +
 | 
| +# This [proc] is used to test the FTS3 matchinfo() function.
 | 
| +# 
 | 
| +proc simple_token_matchinfo {zToken} {
 | 
| +
 | 
| +  set nDoc(0) 0
 | 
| +  set nDoc(1) 0
 | 
| +  set nDoc(2) 0
 | 
| +  set nHit(0) 0
 | 
| +  set nHit(1) 0
 | 
| +  set nHit(2) 0
 | 
| +
 | 
| +
 | 
| +  foreach key [array names ::t1] {
 | 
| +    set value $::t1($key)
 | 
| +    set a($key) [list]
 | 
| +    foreach i {0 1 2} col $value {
 | 
| +      set hit [llength [lsearch -all $col $zToken]]
 | 
| +      lappend a($key) $hit
 | 
| +      incr nHit($i) $hit
 | 
| +      if {$hit>0} { incr nDoc($i) }
 | 
| +    }
 | 
| +  }
 | 
| +
 | 
| +  set ret [list]
 | 
| +  foreach docid [lsort -integer [array names a]] {
 | 
| +    if { [lindex [lsort -integer $a($docid)] end] } {
 | 
| +      set matchinfo [list 1 3]
 | 
| +      foreach i {0 1 2} hit $a($docid) {
 | 
| +        lappend matchinfo $hit $nHit($i) $nDoc($i)
 | 
| +      }
 | 
| +      lappend ret $docid $matchinfo
 | 
| +    }
 | 
| +  }
 | 
| +
 | 
| +  set ret
 | 
| +} 
 | 
| +
 | 
| +proc simple_near {termlist nNear} {
 | 
| +  set ret [list]
 | 
| +
 | 
| +  foreach {key value} [array get ::t1] {
 | 
| +    foreach v $value {
 | 
| +
 | 
| +      set l [lsearch -exact -all $v [lindex $termlist 0]]
 | 
| +      foreach T [lrange $termlist 1 end] {
 | 
| +        set l2 [list]
 | 
| +        foreach i $l {
 | 
| +          set iStart [expr $i - $nNear - 1]
 | 
| +          set iEnd [expr $i + $nNear + 1]
 | 
| +          if {$iStart < 0} {set iStart 0}
 | 
| +          foreach i2 [lsearch -exact -all [lrange $v $iStart $iEnd] $T] {
 | 
| +            incr i2 $iStart
 | 
| +            if {$i2 != $i} { lappend l2 $i2 } 
 | 
| +          }
 | 
| +        }
 | 
| +        set l [lsort -uniq -integer $l2]
 | 
| +      }
 | 
| +
 | 
| +      if {[llength $l]} {
 | 
| +#puts "MATCH($key): $v"
 | 
| +        lappend ret $key
 | 
| +      } 
 | 
| +    }
 | 
| +  }
 | 
| +
 | 
| +  lsort -unique -integer $ret
 | 
| +}
 | 
| +
 | 
| +# The following three procs:
 | 
| +# 
 | 
| +#   setup_not A B
 | 
| +#   setup_or  A B
 | 
| +#   setup_and A B
 | 
| +#
 | 
| +# each take two arguments. Both arguments must be lists of integer values
 | 
| +# sorted by value. The return value is the list produced by evaluating
 | 
| +# the equivalent of "A op B", where op is the FTS3 operator NOT, OR or
 | 
| +# AND.
 | 
| +#
 | 
| +proc setop_not {A B} {
 | 
| +  foreach b $B { set n($b) {} }
 | 
| +  set ret [list]
 | 
| +  foreach a $A { if {![info exists n($a)]} {lappend ret $a} }
 | 
| +  return $ret
 | 
| +}
 | 
| +proc setop_or {A B} {
 | 
| +  lsort -integer -uniq [concat $A $B]
 | 
| +}
 | 
| +proc setop_and {A B} {
 | 
| +  foreach b $B { set n($b) {} }
 | 
| +  set ret [list]
 | 
| +  foreach a $A { if {[info exists n($a)]} {lappend ret $a} }
 | 
| +  return $ret
 | 
| +}
 | 
| +
 | 
| +proc mit {blob} {
 | 
| +  set scan(littleEndian) i*
 | 
| +  set scan(bigEndian) I*
 | 
| +  binary scan $blob $scan($::tcl_platform(byteOrder)) r
 | 
| +  return $r
 | 
| +}
 | 
| +db func mit mit
 | 
| +
 | 
| +set sqlite_fts3_enable_parentheses 1
 | 
| +
 | 
| +foreach nodesize {50 500 1000 2000} {
 | 
| +  catch { array unset ::t1 }
 | 
| +
 | 
| +  # Create the FTS3 table. Populate it (and the Tcl array) with 100 rows.
 | 
| +  #
 | 
| +  db transaction {
 | 
| +    catchsql { DROP TABLE t1 }
 | 
| +    execsql "CREATE VIRTUAL TABLE t1 USING fts3(a, b, c)"
 | 
| +    execsql "INSERT INTO t1(t1) VALUES('nodesize=$nodesize')"
 | 
| +    for {set i 0} {$i < 100} {incr i} { insert_row $i }
 | 
| +  }
 | 
| +  
 | 
| +  for {set iTest 1} {$iTest <= 100} {incr iTest} {
 | 
| +    catchsql COMMIT
 | 
| +
 | 
| +    set DO_MALLOC_TEST 0
 | 
| +    set nRep 10
 | 
| +    if {$iTest==100 && $nodesize==50} { 
 | 
| +      set DO_MALLOC_TEST 1 
 | 
| +      set nRep 2
 | 
| +    }
 | 
| +  
 | 
| +    # Delete one row, update one row and insert one row.
 | 
| +    #
 | 
| +    set rows [array names ::t1]
 | 
| +    set nRow [llength $rows]
 | 
| +    set iUpdate [lindex $rows [expr {int(rand()*$nRow)}]]
 | 
| +    set iDelete $iUpdate
 | 
| +    while {$iDelete == $iUpdate} {
 | 
| +      set iDelete [lindex $rows [expr {int(rand()*$nRow)}]]
 | 
| +    }
 | 
| +    set iInsert $iUpdate
 | 
| +    while {[info exists ::t1($iInsert)]} {
 | 
| +      set iInsert [expr {int(rand()*1000000)}]
 | 
| +    }
 | 
| +    execsql BEGIN
 | 
| +      insert_row $iInsert
 | 
| +      update_row $iUpdate
 | 
| +      delete_row $iDelete
 | 
| +    if {0==($iTest%2)} { execsql COMMIT }
 | 
| +
 | 
| +    # Pick 10 terms from the vocabulary. Check that the results of querying
 | 
| +    # the database for the set of documents containing each of these terms
 | 
| +    # is the same as the result obtained by scanning the contents of the Tcl 
 | 
| +    # array for each term.
 | 
| +    #
 | 
| +    for {set i 0} {$i < 10} {incr i} {
 | 
| +      set term [random_term]
 | 
| +      do_select_test fts3rnd-1.$nodesize.$iTest.1.$i {
 | 
| +        SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH $term
 | 
| +      } [simple_token_matchinfo $term]
 | 
| +    }
 | 
| +
 | 
| +    # This time, use the first two characters of each term as a term prefix
 | 
| +    # to query for. Test that querying the Tcl array produces the same results
 | 
| +    # as querying the FTS3 table for the prefix.
 | 
| +    #
 | 
| +    for {set i 0} {$i < $nRep} {incr i} {
 | 
| +      set prefix [string range [random_term] 0 end-1]
 | 
| +      set match "${prefix}*"
 | 
| +      do_select_test fts3rnd-1.$nodesize.$iTest.2.$i {
 | 
| +        SELECT docid FROM t1 WHERE t1 MATCH $match
 | 
| +      } [simple_phrase $match]
 | 
| +    }
 | 
| +
 | 
| +    # Similar to the above, except for phrase queries.
 | 
| +    #
 | 
| +    for {set i 0} {$i < $nRep} {incr i} {
 | 
| +      set term [list [random_term] [random_term]]
 | 
| +      set match "\"$term\""
 | 
| +      do_select_test fts3rnd-1.$nodesize.$iTest.3.$i {
 | 
| +        SELECT docid FROM t1 WHERE t1 MATCH $match
 | 
| +      } [simple_phrase $term]
 | 
| +    }
 | 
| +
 | 
| +    # Three word phrases.
 | 
| +    #
 | 
| +    for {set i 0} {$i < $nRep} {incr i} {
 | 
| +      set term [list [random_term] [random_term] [random_term]]
 | 
| +      set match "\"$term\""
 | 
| +      do_select_test fts3rnd-1.$nodesize.$iTest.4.$i {
 | 
| +        SELECT docid FROM t1 WHERE t1 MATCH $match
 | 
| +      } [simple_phrase $term]
 | 
| +    }
 | 
| +
 | 
| +    # Three word phrases made up of term-prefixes.
 | 
| +    #
 | 
| +    for {set i 0} {$i < $nRep} {incr i} {
 | 
| +      set    query "[string range [random_term] 0 end-1]* "
 | 
| +      append query "[string range [random_term] 0 end-1]* "
 | 
| +      append query "[string range [random_term] 0 end-1]*"
 | 
| +
 | 
| +      set match "\"$query\""
 | 
| +      do_select_test fts3rnd-1.$nodesize.$iTest.5.$i {
 | 
| +        SELECT docid FROM t1 WHERE t1 MATCH $match
 | 
| +      } [simple_phrase $query]
 | 
| +    }
 | 
| +
 | 
| +    # A NEAR query with terms as the arguments.
 | 
| +    #
 | 
| +    for {set i 0} {$i < $nRep} {incr i} {
 | 
| +      set terms [list [random_term] [random_term]]
 | 
| +      set match [join $terms " NEAR "]
 | 
| +      do_select_test fts3rnd-1.$nodesize.$iTest.6.$i {
 | 
| +        SELECT docid FROM t1 WHERE t1 MATCH $match 
 | 
| +      } [simple_near $terms 10]
 | 
| +    }
 | 
| +
 | 
| +    # A 3-way NEAR query with terms as the arguments.
 | 
| +    #
 | 
| +    for {set i 0} {$i < $nRep} {incr i} {
 | 
| +      set terms [list [random_term] [random_term] [random_term]]
 | 
| +      set nNear 11
 | 
| +      set match [join $terms " NEAR/$nNear "]
 | 
| +      set fts3 [execsql { SELECT docid FROM t1 WHERE t1 MATCH $match }]
 | 
| +      do_select_test fts3rnd-1.$nodesize.$iTest.7.$i {
 | 
| +        SELECT docid FROM t1 WHERE t1 MATCH $match
 | 
| +      } [simple_near $terms $nNear]
 | 
| +    }
 | 
| +    
 | 
| +    # Set operations on simple term queries.
 | 
| +    #
 | 
| +    foreach {tn op proc} {
 | 
| +      8  OR  setop_or
 | 
| +      9  NOT setop_not
 | 
| +      10 AND setop_and
 | 
| +    } {
 | 
| +      for {set i 0} {$i < $nRep} {incr i} {
 | 
| +        set term1 [random_term]
 | 
| +        set term2 [random_term]
 | 
| +        set match "$term1 $op $term2"
 | 
| +        do_select_test fts3rnd-1.$nodesize.$iTest.$tn.$i {
 | 
| +          SELECT docid FROM t1 WHERE t1 MATCH $match
 | 
| +        } [$proc [simple_phrase $term1] [simple_phrase $term2]]
 | 
| +      }
 | 
| +    }
 | 
| + 
 | 
| +    # Set operations on NEAR queries.
 | 
| +    #
 | 
| +    foreach {tn op proc} {
 | 
| +      8  OR  setop_or
 | 
| +      9  NOT setop_not
 | 
| +      10 AND setop_and
 | 
| +    } {
 | 
| +      for {set i 0} {$i < $nRep} {incr i} {
 | 
| +        set term1 [random_term]
 | 
| +        set term2 [random_term]
 | 
| +        set term3 [random_term]
 | 
| +        set term4 [random_term]
 | 
| +        set match "$term1 NEAR $term2 $op $term3 NEAR $term4"
 | 
| +        do_select_test fts3rnd-1.$nodesize.$iTest.$tn.$i {
 | 
| +          SELECT docid FROM t1 WHERE t1 MATCH $match
 | 
| +        } [$proc                                  \
 | 
| +            [simple_near [list $term1 $term2] 10] \
 | 
| +            [simple_near [list $term3 $term4] 10]
 | 
| +          ]
 | 
| +      }
 | 
| +    }
 | 
| +
 | 
| +    catchsql COMMIT
 | 
| +  }
 | 
| +}
 | 
| +
 | 
| +finish_test
 | 
| 
 |