third_party/sqlite/sqlite-src-3080704/test/fts3rnd.test - Issue 2363173002: [sqlite] Remove obsolete reference version 3.8.7.4.

Side by Side Diff: third_party/sqlite/sqlite-src-3080704/test/fts3rnd.test

Issue 2363173002: [sqlite] Remove obsolete reference version 3.8.7.4. (Closed)

Patch Set: Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 # 2009 December 03

2 #

3 # May you do good and not evil.

4 # May you find forgiveness for yourself and forgive others.

5 # May you share freely, never taking more than you give.

6 #

7 #***********************************************************************

8 #

9 # Brute force (random data) tests for FTS3.

10 #

11

12 #-------------------------------------------------------------------------

13 #

14 # The FTS3 tests implemented in this file focus on testing that FTS3

15 # returns the correct set of documents for various types of full-text

16 # query. This is done using pseudo-randomly generated data and queries.

17 # The expected result of each query is calculated using Tcl code.

18 #

19 # 1. The database is initialized to contain a single table with three

20 # columns. 100 rows are inserted into the table. Each of the three

21 # values in each row is a document consisting of between 0 and 100

22 # terms. Terms are selected from a vocabulary of $G(nVocab) terms.

23 #

24 # 2. The following is performed 100 times:

25 #

26 # a. A row is inserted into the database. The row contents are

27 # generated as in step 1. The docid is a pseudo-randomly selected

28 # value between 0 and 1000000.

29 #

30 # b. A psuedo-randomly selected row is updated. One of its columns is

31 # set to contain a new document generated in the same way as the

32 # documents in step 1.

33 #

34 # c. A psuedo-randomly selected row is deleted.

35 #

36 # d. For each of several types of fts3 queries, 10 SELECT queries

37 # of the form:

38 #

39 # SELECT docid FROM <tbl> WHERE <tbl> MATCH '<query>'

40 #

41 # are evaluated. The results are compared to those calculated by

42 # Tcl code in this file. The patterns used for the different query

43 # types are:

44 #

45 # 1. query = <term>

46 # 2. query = <prefix>

47 # 3. query = "<term> <term>"

48 # 4. query = "<term> <term> <term>"

49 # 5. query = "<prefix> <prefix> <prefix>"

50 # 6. query = <term> NEAR <term>

51 # 7. query = <term> NEAR/11 <term> NEAR/11 <term>

52 # 8. query = <term> OR <term>

53 # 9. query = <term> NOT <term>

54 # 10. query = <term> AND <term>

55 # 11. query = <term> NEAR <term> OR <term> NEAR <term>

56 # 12. query = <term> NEAR <term> NOT <term> NEAR <term>

57 # 13. query = <term> NEAR <term> AND <term> NEAR <term>

58 #

59 # where <term> is a term psuedo-randomly selected from the vocabulary

60 # and prefix is the first 2 characters of such a term followed by

61 # a "*" character.

62 #

63 # Every second iteration, steps (a) through (d) above are performed

64 # within a single transaction. This forces the queries in (d) to

65 # read data from both the database and the in-memory hash table

66 # that caches the full-text index entries created by steps (a), (b)

67 # and (c) until the transaction is committed.

68 #

69 # The procedure above is run 5 times, using advisory fts3 node sizes of 50,

70 # 500, 1000 and 2000 bytes.

71 #

72 # After the test using an advisory node-size of 50, an OOM test is run using

73 # the database. This test is similar to step (d) above, except that it tests

74 # the effects of transient and persistent OOM conditions encountered while

75 # executing each query.

76 #

77

78 set testdir [file dirname $argv0]

79 source $testdir/tester.tcl

80

81 # If this build does not include FTS3, skip the tests in this file.

82 #

83 ifcapable !fts3 { finish_test ; return }

84 source $testdir/fts3_common.tcl

85 source $testdir/malloc_common.tcl

86

87 set G(nVocab) 100

88

89 set nVocab 100

90 set lVocab [list]

91

92 expr srand(0)

93

94 # Generate a vocabulary of nVocab words. Each word is 3 characters long.

95 #

96 set lChar {a b c d e f g h i j k l m n o p q r s t u v w x y z}

97 for {set i 0} {$i < $nVocab} {incr i} {

98 set len [expr int(rand()*3)+2]

99 set word [lindex $lChar [expr int(rand()*26)]]

100 append word [lindex $lChar [expr int(rand()*26)]]

101 if {$len>2} { append word [lindex $lChar [expr int(rand()*26)]] }

102 if {$len>3} { append word [lindex $lChar [expr int(rand()*26)]] }

103 lappend lVocab $word

104 }

105

106 proc random_term {} {

107 lindex $::lVocab [expr {int(rand()*$::nVocab)}]

108 }

109

110 # Return a document consisting of $nWord arbitrarily selected terms

111 # from the $::lVocab list.

112 #

113 proc generate_doc {nWord} {

114 set doc [list]

115 for {set i 0} {$i < $nWord} {incr i} {

116 lappend doc [random_term]

117 }

118 return $doc

119 }

120

121

122

123 # Primitives to update the table.

124 #

125 unset -nocomplain t1

126 proc insert_row {rowid} {

127 set a [generate_doc [expr int((rand()*100))]]

128 set b [generate_doc [expr int((rand()*100))]]

129 set c [generate_doc [expr int((rand()*100))]]

130 execsql { INSERT INTO t1(docid, a, b, c) VALUES($rowid, $a, $b, $c) }

131 set ::t1($rowid) [list $a $b $c]

132 }

133 proc delete_row {rowid} {

134 execsql { DELETE FROM t1 WHERE rowid = $rowid }

135 catch {unset ::t1($rowid)}

136 }

137 proc update_row {rowid} {

138 set cols {a b c}

139 set iCol [expr int(rand()*3)]

140 set doc [generate_doc [expr int((rand()*100))]]

141 lset ::t1($rowid) $iCol $doc

142 execsql "UPDATE t1 SET [lindex $cols $iCol] = \$doc WHERE rowid = \$rowid"

143 }

144

145 proc simple_phrase {zPrefix} {

146 set ret [list]

147

148 set reg [string map {* {[^ ]*}} $zPrefix]

149 set reg " $reg "

150

151 foreach key [lsort -integer [array names ::t1]] {

152 set value $::t1($key)

153 set cnt [list]

154 foreach col $value {

155 if {[regexp $reg " $col "]} { lappend ret $key ; break }

156 }

157 }

158

159 #lsort -uniq -integer $ret

160 set ret

161 }

162

163 # This [proc] is used to test the FTS3 matchinfo() function.

164 #

165 proc simple_token_matchinfo {zToken bDesc} {

166

167 set nDoc(0) 0

168 set nDoc(1) 0

169 set nDoc(2) 0

170 set nHit(0) 0

171 set nHit(1) 0

172 set nHit(2) 0

173

174 set dir -inc

175 if {$bDesc} { set dir -dec }

176

177 foreach key [array names ::t1] {

178 set value $::t1($key)

179 set a($key) [list]

180 foreach i {0 1 2} col $value {

181 set hit [llength [lsearch -all $col $zToken]]

182 lappend a($key) $hit

183 incr nHit($i) $hit

184 if {$hit>0} { incr nDoc($i) }

185 }

186 }

187

188 set ret [list]

189 foreach docid [lsort -integer $dir [array names a]] {

190 if { [lindex [lsort -integer $a($docid)] end] } {

191 set matchinfo [list 1 3]

192 foreach i {0 1 2} hit $a($docid) {

193 lappend matchinfo $hit $nHit($i) $nDoc($i)

194 }

195 lappend ret $docid $matchinfo

196 }

197 }

198

199 set ret

200 }

201

202 proc simple_near {termlist nNear} {

203 set ret [list]

204

205 foreach {key value} [array get ::t1] {

206 foreach v $value {

207

208 set l [lsearch -exact -all $v [lindex $termlist 0]]

209 foreach T [lrange $termlist 1 end] {

210 set l2 [list]

211 foreach i $l {

212 set iStart [expr $i - $nNear - 1]

213 set iEnd [expr $i + $nNear + 1]

214 if {$iStart < 0} {set iStart 0}

215 foreach i2 [lsearch -exact -all [lrange $v $iStart $iEnd] $T] {

216 incr i2 $iStart

217 if {$i2 != $i} { lappend l2 $i2 }

218 }

219 }

220 set l [lsort -uniq -integer $l2]

221 }

222

223 if {[llength $l]} {

224 #puts "MATCH($key): $v"

225 lappend ret $key

226 }

227 }

228 }

229

230 lsort -unique -integer $ret

231 }

232

233 # The following three procs:

234 #

235 # setup_not A B

236 # setup_or A B

237 # setup_and A B

238 #

239 # each take two arguments. Both arguments must be lists of integer values

240 # sorted by value. The return value is the list produced by evaluating

241 # the equivalent of "A op B", where op is the FTS3 operator NOT, OR or

242 # AND.

243 #

244 proc setop_not {A B} {

245 foreach b $B { set n($b) {} }

246 set ret [list]

247 foreach a $A { if {![info exists n($a)]} {lappend ret $a} }

248 return $ret

249 }

250 proc setop_or {A B} {

251 lsort -integer -uniq [concat $A $B]

252 }

253 proc setop_and {A B} {

254 foreach b $B { set n($b) {} }

255 set ret [list]

256 foreach a $A { if {[info exists n($a)]} {lappend ret $a} }

257 return $ret

258 }

259

260 proc mit {blob} {

261 set scan(littleEndian) i*

262 set scan(bigEndian) I*

263 binary scan $blob $scan($::tcl_platform(byteOrder)) r

264 return $r

265 }

266 db func mit mit

267 set sqlite_fts3_enable_parentheses 1

268

269 proc do_orderbydocid_test {tn sql res} {

270 uplevel [list do_select_test $tn.asc "$sql ORDER BY docid ASC" $res]

271 uplevel [list do_select_test $tn.desc "$sql ORDER BY docid DESC" \

272 [lsort -int -dec $res]

273 ]

274 }

275

276 set NUM_TRIALS 100

277

278 foreach {nodesize order} {

279 50 DESC

280 50 ASC

281 500 ASC

282 1000 DESC

283 2000 ASC

284 } {

285 catch { array unset ::t1 }

286 set testname "$nodesize/$order"

287

288 # Create the FTS3 table. Populate it (and the Tcl array) with 100 rows.

289 #

290 db transaction {

291 catchsql { DROP TABLE t1 }

292 execsql "CREATE VIRTUAL TABLE t1 USING fts4(a, b, c, order=$order)"

293 execsql "INSERT INTO t1(t1) VALUES('nodesize=$nodesize')"

294 for {set i 0} {$i < 100} {incr i} { insert_row $i }

295 }

296

297 for {set iTest 1} {$iTest <= $NUM_TRIALS} {incr iTest} {

298 catchsql COMMIT

299

300 set DO_MALLOC_TEST 0

301 set nRep 10

302 if {$iTest==100 && $nodesize==50} {

303 set DO_MALLOC_TEST 1

304 set nRep 2

305 }

306

307 set ::testprefix fts3rnd-1.$testname.$iTest

308

309 # Delete one row, update one row and insert one row.

310 #

311 set rows [array names ::t1]

312 set nRow [llength $rows]

313 set iUpdate [lindex $rows [expr {int(rand()*$nRow)}]]

314 set iDelete $iUpdate

315 while {$iDelete == $iUpdate} {

316 set iDelete [lindex $rows [expr {int(rand()*$nRow)}]]

317 }

318 set iInsert $iUpdate

319 while {[info exists ::t1($iInsert)]} {

320 set iInsert [expr {int(rand()*1000000)}]

321 }

322 execsql BEGIN

323 insert_row $iInsert

324 update_row $iUpdate

325 delete_row $iDelete

326 if {0==($iTest%2)} { execsql COMMIT }

327

328 if {0==($iTest%2)} {

329 #do_test 0 { fts3_integrity_check t1 } ok

330 }

331

332 # Pick 10 terms from the vocabulary. Check that the results of querying

333 # the database for the set of documents containing each of these terms

334 # is the same as the result obtained by scanning the contents of the Tcl

335 # array for each term.

336 #

337 for {set i 0} {$i < 10} {incr i} {

338 set term [random_term]

339 do_select_test 1.$i.asc {

340 SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH $term

341 ORDER BY docid ASC

342 } [simple_token_matchinfo $term 0]

343 do_select_test 1.$i.desc {

344 SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH $term

345 ORDER BY docid DESC

346 } [simple_token_matchinfo $term 1]

347 }

348

349 # This time, use the first two characters of each term as a term prefix

350 # to query for. Test that querying the Tcl array produces the same results

351 # as querying the FTS3 table for the prefix.

352 #

353 for {set i 0} {$i < $nRep} {incr i} {

354 set prefix [string range [random_term] 0 end-1]

355 set match "${prefix}*"

356 do_orderbydocid_test 2.$i {

357 SELECT docid FROM t1 WHERE t1 MATCH $match

358 } [simple_phrase $match]

359 }

360

361 # Similar to the above, except for phrase queries.

362 #

363 for {set i 0} {$i < $nRep} {incr i} {

364 set term [list [random_term] [random_term]]

365 set match "\"$term\""

366 do_orderbydocid_test 3.$i {

367 SELECT docid FROM t1 WHERE t1 MATCH $match

368 } [simple_phrase $term]

369 }

370

371 # Three word phrases.

372 #

373 for {set i 0} {$i < $nRep} {incr i} {

374 set term [list [random_term] [random_term] [random_term]]

375 set match "\"$term\""

376 do_orderbydocid_test 4.$i {

377 SELECT docid FROM t1 WHERE t1 MATCH $match

378 } [simple_phrase $term]

379 }

380

381 # Three word phrases made up of term-prefixes.

382 #

383 for {set i 0} {$i < $nRep} {incr i} {

384 set query "[string range [random_term] 0 end-1]* "

385 append query "[string range [random_term] 0 end-1]* "

386 append query "[string range [random_term] 0 end-1]*"

387

388 set match "\"$query\""

389 do_orderbydocid_test 5.$i {

390 SELECT docid FROM t1 WHERE t1 MATCH $match

391 } [simple_phrase $query]

392 }

393

394 # A NEAR query with terms as the arguments:

395 #

396 # ... MATCH '$term1 NEAR $term2' ...

397 #

398 for {set i 0} {$i < $nRep} {incr i} {

399 set terms [list [random_term] [random_term]]

400 set match [join $terms " NEAR "]

401 do_orderbydocid_test 6.$i {

402 SELECT docid FROM t1 WHERE t1 MATCH $match

403 } [simple_near $terms 10]

404 }

405

406 # A 3-way NEAR query with terms as the arguments.

407 #

408 for {set i 0} {$i < $nRep} {incr i} {

409 set terms [list [random_term] [random_term] [random_term]]

410 set nNear 11

411 set match [join $terms " NEAR/$nNear "]

412 do_orderbydocid_test 7.$i {

413 SELECT docid FROM t1 WHERE t1 MATCH $match

414 } [simple_near $terms $nNear]

415 }

416

417 # Set operations on simple term queries.

418 #

419 foreach {tn op proc} {

420 8 OR setop_or

421 9 NOT setop_not

422 10 AND setop_and

423 } {

424 for {set i 0} {$i < $nRep} {incr i} {

425 set term1 [random_term]

426 set term2 [random_term]

427 set match "$term1 $op $term2"

428 do_orderbydocid_test $tn.$i {

429 SELECT docid FROM t1 WHERE t1 MATCH $match

430 } [$proc [simple_phrase $term1] [simple_phrase $term2]]

431 }

432 }

433

434 # Set operations on NEAR queries.

435 #

436 foreach {tn op proc} {

437 11 OR setop_or

438 12 NOT setop_not

439 13 AND setop_and

440 } {

441 for {set i 0} {$i < $nRep} {incr i} {

442 set term1 [random_term]

443 set term2 [random_term]

444 set term3 [random_term]

445 set term4 [random_term]

446 set match "$term1 NEAR $term2 $op $term3 NEAR $term4"

447 do_orderbydocid_test $tn.$i {

448 SELECT docid FROM t1 WHERE t1 MATCH $match

449 } [$proc \

450 [simple_near [list $term1 $term2] 10] \

451 [simple_near [list $term3 $term4] 10]

452 ]

453 }

454 }

455

456 catchsql COMMIT

457 }

458 }

459

460 finish_test

OLD	NEW