OLD | NEW |
| (Empty) |
1 # 2010 November 02 | |
2 # | |
3 # The author disclaims copyright to this source code. In place of | |
4 # a legal notice, here is a blessing: | |
5 # | |
6 # May you do good and not evil. | |
7 # May you find forgiveness for yourself and forgive others. | |
8 # May you share freely, never taking more than you give. | |
9 # | |
10 #*********************************************************************** | |
11 # This file implements regression tests for the FTS3 module. The focus | |
12 # of this file is tables created with the "matchinfo=fts3" option. | |
13 # | |
14 | |
15 set testdir [file dirname $argv0] | |
16 source $testdir/tester.tcl | |
17 | |
18 # If SQLITE_ENABLE_FTS3 is not defined, omit this file. | |
19 ifcapable !fts3 { finish_test ; return } | |
20 | |
21 set testprefix fts3matchinfo | |
22 set sqlite_fts3_enable_parentheses 0 | |
23 | |
24 proc mit {blob} { | |
25 set scan(littleEndian) i* | |
26 set scan(bigEndian) I* | |
27 binary scan $blob $scan($::tcl_platform(byteOrder)) r | |
28 return $r | |
29 } | |
30 db func mit mit | |
31 | |
32 do_execsql_test 1.0 { | |
33 CREATE VIRTUAL TABLE t1 USING fts4(matchinfo=fts3); | |
34 SELECT name FROM sqlite_master WHERE type = 'table'; | |
35 } {t1 t1_content t1_segments t1_segdir t1_stat} | |
36 | |
37 do_execsql_test 1.1 { | |
38 INSERT INTO t1(content) VALUES('I wandered lonely as a cloud'); | |
39 INSERT INTO t1(content) VALUES('That floats on high o''er vales and hills,'); | |
40 INSERT INTO t1(content) VALUES('When all at once I saw a crowd,'); | |
41 INSERT INTO t1(content) VALUES('A host, of golden daffodils,'); | |
42 SELECT mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH 'I'; | |
43 } {{1 1 1 2 2} {1 1 1 2 2}} | |
44 | |
45 # Now create an FTS4 table that does not specify matchinfo=fts3. | |
46 # | |
47 do_execsql_test 1.2 { | |
48 CREATE VIRTUAL TABLE t2 USING fts4; | |
49 INSERT INTO t2 SELECT * FROM t1; | |
50 SELECT mit(matchinfo(t2)) FROM t2 WHERE t2 MATCH 'I'; | |
51 } {{1 1 1 2 2} {1 1 1 2 2}} | |
52 | |
53 # Test some syntax-error handling. | |
54 # | |
55 do_catchsql_test 2.0 { | |
56 CREATE VIRTUAL TABLE x1 USING fts4(matchinfo=fs3); | |
57 } {1 {unrecognized matchinfo: fs3}} | |
58 do_catchsql_test 2.1 { | |
59 CREATE VIRTUAL TABLE x2 USING fts4(mtchinfo=fts3); | |
60 } {1 {unrecognized parameter: mtchinfo=fts3}} | |
61 do_catchsql_test 2.2 { | |
62 CREATE VIRTUAL TABLE x2 USING fts4(matchinfo=fts5); | |
63 } {1 {unrecognized matchinfo: fts5}} | |
64 | |
65 # Check that with fts3, the "=" character is permitted in column definitions. | |
66 # | |
67 do_execsql_test 3.1 { | |
68 CREATE VIRTUAL TABLE t3 USING fts3(mtchinfo=fts3); | |
69 INSERT INTO t3(mtchinfo) VALUES('Beside the lake, beneath the trees'); | |
70 SELECT mtchinfo FROM t3; | |
71 } {{Beside the lake, beneath the trees}} | |
72 | |
73 do_execsql_test 3.2 { | |
74 CREATE VIRTUAL TABLE xx USING FTS4; | |
75 } | |
76 do_execsql_test 3.3 { | |
77 SELECT * FROM xx WHERE xx MATCH 'abc'; | |
78 } | |
79 do_execsql_test 3.4 { | |
80 SELECT * FROM xx WHERE xx MATCH 'a b c'; | |
81 } | |
82 | |
83 | |
84 #-------------------------------------------------------------------------- | |
85 # Proc [do_matchinfo_test] is used to test the FTSX matchinfo() function. | |
86 # | |
87 # The first argument - $tn - is a test identifier. This may be either a | |
88 # full identifier (i.e. "fts3matchinfo-1.1") or, if global var $testprefix | |
89 # is set, just the numeric component (i.e. "1.1"). | |
90 # | |
91 # The second argument is the name of an FTSX table. The third is the | |
92 # full text of a WHERE/MATCH expression to query the table for | |
93 # (i.e. "t1 MATCH 'abc'"). The final argument - $results - should be a | |
94 # key-value list (serialized array) with matchinfo() format specifiers | |
95 # as keys, and the results of executing the statement: | |
96 # | |
97 # SELECT matchinfo($tbl, '$key') FROM $tbl WHERE $expr | |
98 # | |
99 # For example: | |
100 # | |
101 # CREATE VIRTUAL TABLE t1 USING fts4; | |
102 # INSERT INTO t1 VALUES('abc'); | |
103 # INSERT INTO t1 VALUES('def'); | |
104 # INSERT INTO t1 VALUES('abc abc'); | |
105 # | |
106 # do_matchinfo_test 1.1 t1 "t1 MATCH 'abc'" { | |
107 # n {3 3} | |
108 # p {1 1} | |
109 # c {1 1} | |
110 # x {{1 3 2} {2 3 2}} | |
111 # } | |
112 # | |
113 # If the $results list contains keys mapped to "-" instead of a matchinfo() | |
114 # result, then this command computes the expected results based on other | |
115 # mappings to test the matchinfo() function. For example, the command above | |
116 # could be changed to: | |
117 # | |
118 # do_matchinfo_test 1.1 t1 "t1 MATCH 'abc'" { | |
119 # n {3 3} p {1 1} c {1 1} x {{1 3 2} {2 3 2}} | |
120 # pcx - | |
121 # } | |
122 # | |
123 # And this command would compute the expected results for matchinfo(t1, 'pcx') | |
124 # based on the results of matchinfo(t1, 'p'), matchinfo(t1, 'c') and | |
125 # matchinfo(t1, 'x') in order to test 'pcx'. | |
126 # | |
127 proc do_matchinfo_test {tn tbl expr results} { | |
128 | |
129 foreach {fmt res} $results { | |
130 if {$res == "-"} continue | |
131 set resarray($fmt) $res | |
132 } | |
133 | |
134 set nRow 0 | |
135 foreach {fmt res} [array get resarray] { | |
136 if {[llength $res]>$nRow} { set nRow [llength $res] } | |
137 } | |
138 | |
139 # Construct expected results for any formats for which the caller | |
140 # supplied result is "-". | |
141 # | |
142 foreach {fmt res} $results { | |
143 if {$res == "-"} { | |
144 set res [list] | |
145 for {set iRow 0} {$iRow<$nRow} {incr iRow} { | |
146 set rowres [list] | |
147 foreach c [split $fmt ""] { | |
148 set rowres [concat $rowres [lindex $resarray($c) $iRow]] | |
149 } | |
150 lappend res $rowres | |
151 } | |
152 set resarray($fmt) $res | |
153 } | |
154 } | |
155 | |
156 # Test each matchinfo() request individually. | |
157 # | |
158 foreach {fmt res} [array get resarray] { | |
159 set sql "SELECT mit(matchinfo($tbl, '$fmt')) FROM $tbl WHERE $expr" | |
160 do_execsql_test $tn.$fmt $sql [normalize2 $res] | |
161 } | |
162 | |
163 # Test them all executed together (multiple invocations of matchinfo()). | |
164 # | |
165 set exprlist [list] | |
166 foreach {format res} [array get resarray] { | |
167 lappend exprlist "mit(matchinfo($tbl, '$format'))" | |
168 } | |
169 set allres [list] | |
170 for {set iRow 0} {$iRow<$nRow} {incr iRow} { | |
171 foreach {format res} [array get resarray] { | |
172 lappend allres [lindex $res $iRow] | |
173 } | |
174 } | |
175 set sql "SELECT [join $exprlist ,] FROM $tbl WHERE $expr" | |
176 do_execsql_test $tn.multi $sql [normalize2 $allres] | |
177 } | |
178 proc normalize2 {list_of_lists} { | |
179 set res [list] | |
180 foreach elem $list_of_lists { | |
181 lappend res [list {*}$elem] | |
182 } | |
183 return $res | |
184 } | |
185 | |
186 | |
187 do_execsql_test 4.1.0 { | |
188 CREATE VIRTUAL TABLE t4 USING fts4(x, y); | |
189 INSERT INTO t4 VALUES('a b c d e', 'f g h i j'); | |
190 INSERT INTO t4 VALUES('f g h i j', 'a b c d e'); | |
191 } | |
192 | |
193 do_matchinfo_test 4.1.1 t4 {t4 MATCH 'a b c'} { | |
194 p {3 3} | |
195 c {2 2} | |
196 x { | |
197 {1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1} | |
198 {0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1} | |
199 } | |
200 n {2 2} | |
201 l {{5 5} {5 5}} | |
202 a {{5 5} {5 5}} | |
203 | |
204 s {{3 0} {0 3}} | |
205 | |
206 xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc - | |
207 xpxsscplax - | |
208 } | |
209 | |
210 do_matchinfo_test 4.1.2 t4 {t4 MATCH '"g h i"'} { | |
211 p {1 1} | |
212 c {2 2} | |
213 x { | |
214 {0 1 1 1 1 1} | |
215 {1 1 1 0 1 1} | |
216 } | |
217 n {2 2} | |
218 l {{5 5} {5 5}} | |
219 a {{5 5} {5 5}} | |
220 | |
221 s {{0 1} {1 0}} | |
222 | |
223 xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc - | |
224 sxsxs - | |
225 } | |
226 | |
227 do_matchinfo_test 4.1.3 t4 {t4 MATCH 'a b'} { s {{2 0} {0 2}} } | |
228 do_matchinfo_test 4.1.4 t4 {t4 MATCH '"a b" c'} { s {{2 0} {0 2}} } | |
229 do_matchinfo_test 4.1.5 t4 {t4 MATCH 'a "b c"'} { s {{2 0} {0 2}} } | |
230 do_matchinfo_test 4.1.6 t4 {t4 MATCH 'd d'} { s {{1 0} {0 1}} } | |
231 do_matchinfo_test 4.1.7 t4 {t4 MATCH 'f OR abcd'} { | |
232 x { | |
233 {0 1 1 1 1 1 0 0 0 0 0 0} | |
234 {1 1 1 0 1 1 0 0 0 0 0 0} | |
235 } | |
236 } | |
237 do_matchinfo_test 4.1.8 t4 {t4 MATCH 'f -abcd'} { | |
238 x { | |
239 {0 1 1 1 1 1} | |
240 {1 1 1 0 1 1} | |
241 } | |
242 } | |
243 | |
244 do_execsql_test 4.2.0 { | |
245 CREATE VIRTUAL TABLE t5 USING fts4; | |
246 INSERT INTO t5 VALUES('a a a a a'); | |
247 INSERT INTO t5 VALUES('a b a b a'); | |
248 INSERT INTO t5 VALUES('c b c b c'); | |
249 INSERT INTO t5 VALUES('x x x x x'); | |
250 } | |
251 do_matchinfo_test 4.2.1 t5 {t5 MATCH 'a a'} { | |
252 x {{5 8 2 5 8 2} {3 8 2 3 8 2}} | |
253 s {2 1} | |
254 } | |
255 do_matchinfo_test 4.2.2 t5 {t5 MATCH 'a b'} { s {2} } | |
256 do_matchinfo_test 4.2.3 t5 {t5 MATCH 'a b a'} { s {3} } | |
257 do_matchinfo_test 4.2.4 t5 {t5 MATCH 'a a a'} { s {3 1} } | |
258 do_matchinfo_test 4.2.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} } | |
259 do_matchinfo_test 4.2.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1} } | |
260 | |
261 do_execsql_test 4.3.0 "INSERT INTO t5 VALUES('x y [string repeat {b } 50000]')"; | |
262 | |
263 # It used to be that the second 'a' token would be deferred. That doesn't | |
264 # work any longer. | |
265 if 0 { | |
266 do_matchinfo_test 4.3.1 t5 {t5 MATCH 'a a'} { | |
267 x {{5 8 2 5 5 5} {3 8 2 3 5 5}} | |
268 s {2 1} | |
269 } | |
270 } | |
271 | |
272 do_matchinfo_test 4.3.2 t5 {t5 MATCH 'a b'} { s {2} } | |
273 do_matchinfo_test 4.3.3 t5 {t5 MATCH 'a b a'} { s {3} } | |
274 do_matchinfo_test 4.3.4 t5 {t5 MATCH 'a a a'} { s {3 1} } | |
275 do_matchinfo_test 4.3.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} } | |
276 do_matchinfo_test 4.3.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1 1} } | |
277 | |
278 do_execsql_test 4.4.0.1 { INSERT INTO t5(t5) VALUES('optimize') } | |
279 | |
280 ifcapable fts4_deferred { | |
281 do_execsql_test 4.4.0.2 { | |
282 UPDATE t5_segments | |
283 SET block = zeroblob(length(block)) | |
284 WHERE length(block)>10000; | |
285 } | |
286 } | |
287 | |
288 do_matchinfo_test 4.4.2 t5 {t5 MATCH 'a b'} { s {2} } | |
289 do_matchinfo_test 4.4.1 t5 {t5 MATCH 'a a'} { s {2 1} } | |
290 do_matchinfo_test 4.4.2 t5 {t5 MATCH 'a b'} { s {2} } | |
291 do_matchinfo_test 4.4.3 t5 {t5 MATCH 'a b a'} { s {3} } | |
292 do_matchinfo_test 4.4.4 t5 {t5 MATCH 'a a a'} { s {3 1} } | |
293 do_matchinfo_test 4.4.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} } | |
294 | |
295 do_execsql_test 4.5.0 { | |
296 CREATE VIRTUAL TABLE t6 USING fts4(a, b, c); | |
297 INSERT INTO t6 VALUES('a', 'b', 'c'); | |
298 } | |
299 do_matchinfo_test 4.5.1 t6 {t6 MATCH 'a b c'} { s {{1 1 1}} } | |
300 | |
301 | |
302 #------------------------------------------------------------------------- | |
303 # Check the following restrictions: | |
304 # | |
305 # + Matchinfo flags 'a', 'l' and 'n' can only be used with fts4, not fts3. | |
306 # + Matchinfo flag 'l' cannot be used with matchinfo=fts3. | |
307 # | |
308 do_execsql_test 5.1 { | |
309 CREATE VIRTUAL TABLE t7 USING fts3(a, b); | |
310 INSERT INTO t7 VALUES('u v w', 'x y z'); | |
311 | |
312 CREATE VIRTUAL TABLE t8 USING fts4(a, b, matchinfo=fts3); | |
313 INSERT INTO t8 VALUES('u v w', 'x y z'); | |
314 } | |
315 | |
316 do_catchsql_test 5.2.1 { | |
317 SELECT matchinfo(t7, 'a') FROM t7 WHERE t7 MATCH 'x y' | |
318 } {1 {unrecognized matchinfo request: a}} | |
319 do_catchsql_test 5.2.2 { | |
320 SELECT matchinfo(t7, 'l') FROM t7 WHERE t7 MATCH 'x y' | |
321 } {1 {unrecognized matchinfo request: l}} | |
322 do_catchsql_test 5.2.3 { | |
323 SELECT matchinfo(t7, 'n') FROM t7 WHERE t7 MATCH 'x y' | |
324 } {1 {unrecognized matchinfo request: n}} | |
325 | |
326 do_catchsql_test 5.3.1 { | |
327 SELECT matchinfo(t8, 'l') FROM t8 WHERE t8 MATCH 'x y' | |
328 } {1 {unrecognized matchinfo request: l}} | |
329 | |
330 #------------------------------------------------------------------------- | |
331 # Test that the offsets() function handles corruption in the %_content | |
332 # table correctly. | |
333 # | |
334 do_execsql_test 6.1 { | |
335 CREATE VIRTUAL TABLE t9 USING fts4; | |
336 INSERT INTO t9 VALUES( | |
337 'this record is used to try to dectect corruption' | |
338 ); | |
339 SELECT offsets(t9) FROM t9 WHERE t9 MATCH 'to'; | |
340 } {{0 0 20 2 0 0 27 2}} | |
341 | |
342 do_catchsql_test 6.2 { | |
343 UPDATE t9_content SET c0content = 'this record is used to'; | |
344 SELECT offsets(t9) FROM t9 WHERE t9 MATCH 'to'; | |
345 } {1 {database disk image is malformed}} | |
346 | |
347 #------------------------------------------------------------------------- | |
348 # Test the outcome of matchinfo() when used within a query that does not | |
349 # use the full-text index (i.e. lookup by rowid or full-table scan). | |
350 # | |
351 do_execsql_test 7.1 { | |
352 CREATE VIRTUAL TABLE t10 USING fts4; | |
353 INSERT INTO t10 VALUES('first record'); | |
354 INSERT INTO t10 VALUES('second record'); | |
355 } | |
356 do_execsql_test 7.2 { | |
357 SELECT typeof(matchinfo(t10)), length(matchinfo(t10)) FROM t10; | |
358 } {blob 0 blob 0} | |
359 do_execsql_test 7.3 { | |
360 SELECT typeof(matchinfo(t10)), length(matchinfo(t10)) FROM t10 WHERE docid=1; | |
361 } {blob 0} | |
362 do_execsql_test 7.4 { | |
363 SELECT typeof(matchinfo(t10)), length(matchinfo(t10)) | |
364 FROM t10 WHERE t10 MATCH 'record' | |
365 } {blob 20 blob 20} | |
366 | |
367 #------------------------------------------------------------------------- | |
368 # Test a special case - matchinfo('nxa') with many zero length documents. | |
369 # Special because "x" internally uses a statement used by both "n" and "a". | |
370 # This was causing a problem at one point in the obscure case where the | |
371 # total number of bytes of data stored in an fts3 table was greater than | |
372 # the number of rows. i.e. when the following query returns true: | |
373 # | |
374 # SELECT sum(length(content)) < count(*) FROM fts4table; | |
375 # | |
376 do_execsql_test 8.1 { | |
377 CREATE VIRTUAL TABLE t11 USING fts4; | |
378 INSERT INTO t11(t11) VALUES('nodesize=24'); | |
379 INSERT INTO t11 VALUES('quitealongstringoftext'); | |
380 INSERT INTO t11 VALUES('anotherquitealongstringoftext'); | |
381 INSERT INTO t11 VALUES('athirdlongstringoftext'); | |
382 INSERT INTO t11 VALUES('andonemoreforgoodluck'); | |
383 } | |
384 do_test 8.2 { | |
385 for {set i 0} {$i < 200} {incr i} { | |
386 execsql { INSERT INTO t11 VALUES('') } | |
387 } | |
388 execsql { INSERT INTO t11(t11) VALUES('optimize') } | |
389 } {} | |
390 do_execsql_test 8.3 { | |
391 SELECT mit(matchinfo(t11, 'nxa')) FROM t11 WHERE t11 MATCH 'a*' | |
392 } {{204 1 3 3 0} {204 1 3 3 0} {204 1 3 3 0}} | |
393 | |
394 # Corruption related tests. | |
395 do_execsql_test 8.4.1.1 { UPDATE t11_stat SET value = X'0000'; } | |
396 do_catchsql_test 8.5.1.2 { | |
397 SELECT mit(matchinfo(t11, 'nxa')) FROM t11 WHERE t11 MATCH 'a*' | |
398 } {1 {database disk image is malformed}} | |
399 | |
400 do_execsql_test 8.4.2.1 { UPDATE t11_stat SET value = X'00'; } | |
401 do_catchsql_test 8.5.2.2 { | |
402 SELECT mit(matchinfo(t11, 'nxa')) FROM t11 WHERE t11 MATCH 'a*' | |
403 } {1 {database disk image is malformed}} | |
404 | |
405 do_execsql_test 8.4.3.1 { UPDATE t11_stat SET value = NULL; } | |
406 do_catchsql_test 8.5.3.2 { | |
407 SELECT mit(matchinfo(t11, 'nxa')) FROM t11 WHERE t11 MATCH 'a*' | |
408 } {1 {database disk image is malformed}} | |
409 | |
410 #------------------------------------------------------------------------- | |
411 do_execsql_test 8.1 { | |
412 CREATE VIRTUAL TABLE t12 USING fts4; | |
413 INSERT INTO t12 VALUES('a b c d'); | |
414 SELECT mit(matchinfo(t12, 'x')) FROM t12 WHERE t12 MATCH 'a NEAR/1 d OR a'; | |
415 } {{0 0 0 0 0 0 1 1 1}} | |
416 do_execsql_test 8.2 { | |
417 INSERT INTO t12 VALUES('a d c d'); | |
418 SELECT mit(matchinfo(t12, 'x')) FROM t12 WHERE t12 MATCH 'a NEAR/1 d OR a'; | |
419 } { | |
420 {0 1 1 0 1 1 1 2 2} {1 1 1 1 1 1 1 2 2} | |
421 } | |
422 do_execsql_test 8.3 { | |
423 INSERT INTO t12 VALUES('a d d a'); | |
424 SELECT mit(matchinfo(t12, 'x')) FROM t12 WHERE t12 MATCH 'a NEAR/1 d OR a'; | |
425 } { | |
426 {0 3 2 0 3 2 1 4 3} {1 3 2 1 3 2 1 4 3} {2 3 2 2 3 2 2 4 3} | |
427 } | |
428 | |
429 do_execsql_test 9.1 { | |
430 CREATE VIRTUAL TABLE ft2 USING fts4; | |
431 INSERT INTO ft2 VALUES('a b c d e'); | |
432 INSERT INTO ft2 VALUES('f a b c d'); | |
433 SELECT snippet(ft2, '[', ']', '', -1, 1) FROM ft2 WHERE ft2 MATCH 'c'; | |
434 } {{[c]} {[c]}} | |
435 | |
436 #--------------------------------------------------------------------------- | |
437 # Test for a memory leak | |
438 # | |
439 do_execsql_test 10.1 { | |
440 DROP TABLE t10; | |
441 CREATE VIRTUAL TABLE t10 USING fts4(idx, value); | |
442 INSERT INTO t10 values (1, 'one'),(2, 'two'),(3, 'three'); | |
443 SELECT docId, t10.* | |
444 FROM t10 | |
445 JOIN (SELECT 1 AS idx UNION SELECT 2 UNION SELECT 3) AS x | |
446 WHERE t10 MATCH x.idx | |
447 AND matchinfo(t10) not null | |
448 GROUP BY docId | |
449 ORDER BY 1; | |
450 } {1 1 one 2 2 two 3 3 three} | |
451 | |
452 | |
453 finish_test | |
OLD | NEW |