OLD | NEW |
(Empty) | |
| 1 # 2015 Jan 13 |
| 2 # |
| 3 # The author disclaims copyright to this source code. In place of |
| 4 # a legal notice, here is a blessing: |
| 5 # |
| 6 # May you do good and not evil. |
| 7 # May you find forgiveness for yourself and forgive others. |
| 8 # May you share freely, never taking more than you give. |
| 9 # |
| 10 #*********************************************************************** |
| 11 # |
| 12 # This file contains tests focused on prefix indexes. |
| 13 # |
| 14 |
| 15 source [file join [file dirname [info script]] fts5_common.tcl] |
| 16 set testprefix fts5prefix |
| 17 |
| 18 # If SQLITE_ENABLE_FTS5 is defined, omit this file. |
| 19 ifcapable !fts5 { |
| 20 finish_test |
| 21 return |
| 22 } |
| 23 |
| 24 do_execsql_test 1.0 { |
| 25 CREATE VIRTUAL TABLE xx USING fts5(x, prefix=1); |
| 26 INSERT INTO xx VALUES('one two three'); |
| 27 INSERT INTO xx VALUES('four five six'); |
| 28 INSERT INTO xx VALUES('seven eight nine ten'); |
| 29 } |
| 30 |
| 31 do_execsql_test 1.1 { |
| 32 SELECT rowid FROM xx WHERE xx MATCH 't*' |
| 33 } {1 3} |
| 34 |
| 35 |
| 36 #------------------------------------------------------------------------- |
| 37 # Check that prefix indexes really do index n-character prefixes, not |
| 38 # n-byte prefixes. Use the ascii tokenizer so as not to be confused by |
| 39 # diacritic removal. |
| 40 # |
| 41 do_execsql_test 2.0 { |
| 42 CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = ascii, prefix = 2) |
| 43 } |
| 44 |
| 45 do_test 2.1 { |
| 46 foreach {rowid string} { |
| 47 1 "\xCA\xCB\xCC\xCD" |
| 48 2 "\u1234\u5678\u4321\u8765" |
| 49 } { |
| 50 execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $string) } |
| 51 } |
| 52 } {} |
| 53 |
| 54 do_execsql_test 2.2 { |
| 55 INSERT INTO t1(t1) VALUES('integrity-check'); |
| 56 } |
| 57 |
| 58 foreach {tn q res} { |
| 59 1 "SELECT rowid FROM t1 WHERE t1 MATCH '\xCA\xCB*'" 1 |
| 60 2 "SELECT rowid FROM t1 WHERE t1 MATCH '\u1234\u5678*'" 2 |
| 61 } { |
| 62 do_execsql_test 2.3.$tn $q $res |
| 63 } |
| 64 |
| 65 #------------------------------------------------------------------------- |
| 66 # Check that prefix queries with: |
| 67 # |
| 68 # * a column filter, and |
| 69 # * no prefix index. |
| 70 # |
| 71 # work Ok. |
| 72 # |
| 73 do_execsql_test 3.0 { |
| 74 CREATE VIRTUAL TABLE t3 USING fts5(a, b, c); |
| 75 INSERT INTO t3(t3, rank) VALUES('pgsz', 32); |
| 76 BEGIN; |
| 77 INSERT INTO t3 VALUES('acb ccc bba', 'cca bba bca', 'bbc ccc bca'); -- 1 |
| 78 INSERT INTO t3 VALUES('cbb cac cab', 'abb aac bba', 'aab ccc cac'); -- 2 |
| 79 INSERT INTO t3 VALUES('aac bcb aac', 'acb bcb caa', 'aca bab bca'); -- 3 |
| 80 INSERT INTO t3 VALUES('aab ccb ccc', 'aca cba cca', 'aca aac cbb'); -- 4 |
| 81 INSERT INTO t3 VALUES('bac aab bab', 'ccb bac cba', 'acb aba abb'); -- 5 |
| 82 INSERT INTO t3 VALUES('bab abc ccb', 'acb cba abb', 'cbb aaa cab'); -- 6 |
| 83 INSERT INTO t3 VALUES('cbb bbc baa', 'aab aca baa', 'bcc cca aca'); -- 7 |
| 84 INSERT INTO t3 VALUES('abc bba abb', 'cac abc cba', 'acc aac cac'); -- 8 |
| 85 INSERT INTO t3 VALUES('bbc bbc cab', 'bcb ccb cba', 'bcc cac acb'); -- 9 |
| 86 COMMIT; |
| 87 } |
| 88 |
| 89 foreach {tn match res} { |
| 90 1 "a : c*" {1 2 4 6 7 9} |
| 91 2 "b : c*" {1 3 4 5 6 8 9} |
| 92 3 "c : c*" {1 2 4 6 7 8 9} |
| 93 4 "a : b*" {1 3 5 6 7 8 9} |
| 94 5 "b : b*" {1 2 3 5 7 9} |
| 95 6 "c : b*" {1 3 7 9} |
| 96 7 "a : a*" {1 3 4 5 6 8} |
| 97 8 "b : a*" {2 3 4 6 7 8} |
| 98 9 "c : a*" {2 3 4 5 6 7 8 9} |
| 99 } { |
| 100 do_execsql_test 3.1.$tn { |
| 101 SELECT rowid FROM t3($match) |
| 102 } $res |
| 103 } |
| 104 |
| 105 do_test 3.2 { |
| 106 expr srand(0) |
| 107 execsql { DELETE FROM t3 } |
| 108 for {set i 0} {$i < 1000} {incr i} { |
| 109 set a [fts5_rnddoc 3] |
| 110 set b [fts5_rnddoc 8] |
| 111 set c [fts5_rnddoc 20] |
| 112 execsql { INSERT INTO t3 VALUES($a, $b, $c) } |
| 113 } |
| 114 execsql { INSERT INTO t3(t3) VALUES('integrity-check') } |
| 115 } {} |
| 116 |
| 117 proc gmatch {col pattern} { |
| 118 expr {[lsearch -glob $col $pattern]>=0} |
| 119 } |
| 120 db func gmatch gmatch |
| 121 |
| 122 proc ghl {col pattern} { |
| 123 foreach t $col { |
| 124 if {[string match $pattern $t]} { |
| 125 lappend res "*$t*" |
| 126 } else { |
| 127 lappend res $t |
| 128 } |
| 129 } |
| 130 set res |
| 131 } |
| 132 db func ghl ghl |
| 133 |
| 134 set COLS(a) 0 |
| 135 set COLS(b) 1 |
| 136 set COLS(c) 2 |
| 137 |
| 138 for {set x 0} {$x<2} {incr x} { |
| 139 foreach {tn pattern} { |
| 140 1 {xa*} |
| 141 2 {xb*} |
| 142 3 {xc*} |
| 143 4 {xd*} |
| 144 5 {xe*} |
| 145 6 {xf*} |
| 146 7 {xg*} |
| 147 8 {xh*} |
| 148 9 {xi*} |
| 149 10 {xj*} |
| 150 } { |
| 151 foreach col {a b c} { |
| 152 |
| 153 # Check that the list of returned rowids is correct. |
| 154 # |
| 155 set res [db eval "SELECT rowid FROM t3 WHERE gmatch($col, '$pattern')"] |
| 156 set query "$col : $pattern" |
| 157 do_execsql_test 3.3.$x.$tn.$col.rowid { |
| 158 SELECT rowid FROM t3($query); |
| 159 } $res |
| 160 |
| 161 # Check that the highlight() function works. |
| 162 # |
| 163 set res [db eval \ |
| 164 "SELECT ghl($col, '$pattern') FROM t3 WHERE gmatch($col, '$pattern')" |
| 165 ] |
| 166 set idx $COLS($col) |
| 167 do_execsql_test 3.3.$x.$tn.$col.highlight { |
| 168 SELECT highlight(t3, $idx, '*', '*') FROM t3($query); |
| 169 } $res |
| 170 } |
| 171 |
| 172 foreach colset {{a b} {b c} {c a} {a c} {b a}} { |
| 173 # Check that the list of returned rowids is correct. |
| 174 # |
| 175 foreach {col1 col2} $colset {} |
| 176 set expr "gmatch($col1, '$pattern') OR gmatch($col2, '$pattern')" |
| 177 set res [db eval "SELECT rowid FROM t3 WHERE $expr"] |
| 178 set query "{$colset} : $pattern" |
| 179 do_execsql_test 3.3.$x.$tn.{$colset}.rowid { |
| 180 SELECT rowid FROM t3($query); |
| 181 } $res |
| 182 |
| 183 set resq "SELECT ghl($col1, '$pattern'), ghl($col2, '$pattern')" |
| 184 append resq " FROM t3 WHERE $expr" |
| 185 set res [db eval $resq] |
| 186 set idx1 $COLS($col1) |
| 187 set idx2 $COLS($col2) |
| 188 do_execsql_test 3.3.$x.$tn.{$colset}.highlight { |
| 189 SELECT highlight(t3, $idx1, '*', '*'), highlight(t3, $idx2, '*', '*') |
| 190 FROM t3($query) |
| 191 } $res |
| 192 } |
| 193 } |
| 194 execsql { INSERT INTO t3(t3) VALUES('optimize') } |
| 195 execsql { INSERT INTO t3(t3) VALUES('integrity-check') } |
| 196 } |
| 197 |
| 198 #------------------------------------------------------------------------- |
| 199 # |
| 200 reset_db |
| 201 do_execsql_test 4.0 { |
| 202 CREATE VIRTUAL TABLE t2 USING fts5(c1, c2); |
| 203 INSERT INTO t2 VALUES('xa xb', 'xb xa'); |
| 204 |
| 205 INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 2 |
| 206 INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 4 |
| 207 INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 8 |
| 208 INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 16 |
| 209 INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 32 |
| 210 INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 64 |
| 211 INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 128 |
| 212 INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 256 |
| 213 INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 512 |
| 214 INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 1024 |
| 215 INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 2048 |
| 216 INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 4096 |
| 217 |
| 218 SELECT count(*) FROM t2('x*'); |
| 219 } {4096} |
| 220 |
| 221 do_execsql_test 4.1 { |
| 222 UPDATE t2 SET c2 = 'ya yb'; |
| 223 SELECT count(*) FROM t2('c1:x*'); |
| 224 SELECT count(*) FROM t2('c2:x*'); |
| 225 } {4096 0} |
| 226 |
| 227 do_execsql_test 4.2 { |
| 228 UPDATE t2 SET c2 = 'xa'; |
| 229 SELECT count(*) FROM t2('c1:x*'); |
| 230 SELECT count(*) FROM t2('c2:x*'); |
| 231 } {4096 4096} |
| 232 |
| 233 #------------------------------------------------------------------------- |
| 234 # |
| 235 reset_db |
| 236 proc rnddoc {n} { |
| 237 set map [list a b c d] |
| 238 set doc [list] |
| 239 for {set i 0} {$i < $n} {incr i} { |
| 240 lappend doc "x[lindex $map [expr int(rand()*4)]]" |
| 241 } |
| 242 set doc |
| 243 } |
| 244 set cols [list] |
| 245 for {set i 1} {$i<250} {incr i} { |
| 246 lappend cols "c$i" |
| 247 lappend vals "'[rnddoc 10]'" |
| 248 } |
| 249 |
| 250 do_test 5.0 { |
| 251 execsql "CREATE VIRTUAL TABLE t4 USING fts5([join $cols ,])" |
| 252 execsql {INSERT INTO t4(t4, rank) VALUES('pgsz', 32)} |
| 253 execsql "INSERT INTO t4 VALUES([join $vals ,])" |
| 254 execsql "INSERT INTO t4 VALUES([join $vals ,])" |
| 255 execsql "INSERT INTO t4 VALUES([join $vals ,])" |
| 256 execsql "INSERT INTO t4 VALUES([join $vals ,])" |
| 257 } {} |
| 258 |
| 259 proc gmatch {col pattern} { |
| 260 expr {[lsearch -glob $col $pattern]>=0} |
| 261 } |
| 262 db func gmatch gmatch |
| 263 foreach {tn col pattern} { |
| 264 1 c100 {xa*} |
| 265 2 c200 {xb*} |
| 266 } { |
| 267 set res [db eval "SELECT rowid FROM t4 WHERE gmatch($col, \$pattern)"] |
| 268 set query "$col : $pattern" |
| 269 do_execsql_test 5.$tn { SELECT rowid FROM t4($query) } $res |
| 270 } |
| 271 |
| 272 reset_db |
| 273 db func fts5_rnddoc fts5_rnddoc |
| 274 do_test 6.0 { |
| 275 execsql { |
| 276 CREATE VIRTUAL TABLE t5 USING fts5(x, y); |
| 277 INSERT INTO t5 VALUES( fts5_rnddoc(10000), fts5_rnddoc(10000) ); |
| 278 INSERT INTO t5 VALUES( fts5_rnddoc(10000), fts5_rnddoc(10000) ); |
| 279 INSERT INTO t5 VALUES( fts5_rnddoc(10000), fts5_rnddoc(10000) ); |
| 280 INSERT INTO t5 VALUES( fts5_rnddoc(10000), fts5_rnddoc(10000) ); |
| 281 } |
| 282 } {} |
| 283 |
| 284 proc gmatch {col pattern} { |
| 285 expr {[lsearch -glob $col $pattern]>=0} |
| 286 } |
| 287 db func gmatch gmatch |
| 288 foreach {tn col pattern} { |
| 289 1 y {xa*} |
| 290 2 y {xb*} |
| 291 3 y {xc*} |
| 292 4 x {xa*} |
| 293 5 x {xb*} |
| 294 6 x {xc*} |
| 295 } { |
| 296 set res [db eval "SELECT rowid FROM t5 WHERE gmatch($col, \$pattern)"] |
| 297 set query "$col : $pattern" |
| 298 do_execsql_test 6.$tn { SELECT rowid FROM t5($query) } $res |
| 299 } |
| 300 |
| 301 #------------------------------------------------------------------------- |
| 302 # Check that the various ways of creating prefix indexes produce the |
| 303 # same database on disk. |
| 304 # |
| 305 save_prng_state |
| 306 foreach {tn create} { |
| 307 1 { CREATE VIRTUAL TABLE tt USING fts5(x, y, prefix="1,2,3") } |
| 308 2 { CREATE VIRTUAL TABLE tt USING fts5(x, y, prefix="1 2 3") } |
| 309 3 { CREATE VIRTUAL TABLE tt USING fts5(x, y, prefix=1, prefix=2, prefix=3) } |
| 310 4 { CREATE VIRTUAL TABLE tt USING fts5(x, y, prefix="1 2", prefix=3) } |
| 311 } { |
| 312 execsql { DROP TABLE IF EXISTS tt } |
| 313 restore_prng_state |
| 314 execsql $create |
| 315 execsql { |
| 316 INSERT INTO tt VALUES('cc b ggg ccc aa eee hh', 'aa g b hh a e'); |
| 317 INSERT INTO tt VALUES('cc bb cc gg j g cc', 'ii jjj ggg jjj cc cc'); |
| 318 INSERT INTO tt VALUES('h eee cc h iii', 'aaa iii dd iii dd'); |
| 319 INSERT INTO tt VALUES('jjj hh eee c e b gg', 'j bbb jj ddd jj'); |
| 320 INSERT INTO tt VALUES('ii hhh aaa ff c hhh iii', 'j cc hh bb e'); |
| 321 INSERT INTO tt VALUES('e fff hhh i aaa', 'g b aa gg c aa dd'); |
| 322 INSERT INTO tt VALUES('i aaa ccc gg hhh aa h', 'j bbb bbb d ff'); |
| 323 INSERT INTO tt VALUES('g f gg ff ff jjj d', 'jjj d j fff fff ee j'); |
| 324 INSERT INTO tt VALUES('a cc e ccc jjj c', 'ccc iii d bb a eee g'); |
| 325 INSERT INTO tt VALUES('jj hh hh bb bbb gg', 'j c jjj bb iii f'); |
| 326 INSERT INTO tt VALUES('a ggg g cc ccc aa', 'jjj j j aaa c'); |
| 327 INSERT INTO tt VALUES('ddd j dd b i', 'aaa bbb iii ggg ff ccc ddd'); |
| 328 INSERT INTO tt VALUES('jj ii hh c ii h gg', 'hhh bbb ddd bbb hh g ggg'); |
| 329 INSERT INTO tt VALUES('aa hhh ccc h ggg ccc', 'iii d jj a ff ii'); |
| 330 } |
| 331 |
| 332 #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM tt_data} {puts $r} |
| 333 |
| 334 if {$tn==1} { |
| 335 set ::checksum [execsql {SELECT md5sum(id, block) FROM tt_data}] |
| 336 } else { |
| 337 do_execsql_test 7.$tn { |
| 338 SELECT md5sum(id, block) FROM tt_data |
| 339 } [list $::checksum] |
| 340 } |
| 341 } |
| 342 |
| 343 finish_test |
| 344 |
| 345 |
OLD | NEW |