OLD | NEW |
(Empty) | |
| 1 # 2014 Dec 20 |
| 2 # |
| 3 # The author disclaims copyright to this source code. In place of |
| 4 # a legal notice, here is a blessing: |
| 5 # |
| 6 # May you do good and not evil. |
| 7 # May you find forgiveness for yourself and forgive others. |
| 8 # May you share freely, never taking more than you give. |
| 9 # |
| 10 #*********************************************************************** |
| 11 # |
| 12 # Tests focusing on custom tokenizers that support synonyms. |
| 13 # |
| 14 |
| 15 source [file join [file dirname [info script]] fts5_common.tcl] |
| 16 set testprefix fts5synonym |
| 17 |
| 18 # If SQLITE_ENABLE_FTS5 is defined, omit this file. |
| 19 ifcapable !fts5 { |
| 20 finish_test |
| 21 return |
| 22 } |
| 23 |
| 24 foreach S { |
| 25 {zero 0} |
| 26 {one 1 i} |
| 27 {two 2 ii} |
| 28 {three 3 iii} |
| 29 {four 4 iv} |
| 30 {five 5 v} |
| 31 {six 6 vi} |
| 32 {seven 7 vii} |
| 33 {eight 8 viii} |
| 34 {nine 9 ix} |
| 35 } { |
| 36 foreach s $S { |
| 37 set o [list] |
| 38 foreach x $S {if {$x!=$s} {lappend o $x}} |
| 39 set ::syn($s) $o |
| 40 } |
| 41 } |
| 42 |
| 43 proc tcl_tokenize {tflags text} { |
| 44 foreach {w iStart iEnd} [fts5_tokenize_split $text] { |
| 45 sqlite3_fts5_token $w $iStart $iEnd |
| 46 } |
| 47 } |
| 48 |
| 49 proc tcl_create {args} { |
| 50 return "tcl_tokenize" |
| 51 } |
| 52 |
| 53 sqlite3_fts5_create_tokenizer db tcl tcl_create |
| 54 |
| 55 #------------------------------------------------------------------------- |
| 56 # Warm body test for the code in fts5_tcl.c. |
| 57 # |
| 58 do_execsql_test 1.0 { |
| 59 CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl); |
| 60 INSERT INTO ft VALUES('abc def ghi'); |
| 61 INSERT INTO ft VALUES('jkl mno pqr'); |
| 62 SELECT rowid, x FROM ft WHERE ft MATCH 'def'; |
| 63 SELECT x, rowid FROM ft WHERE ft MATCH 'pqr'; |
| 64 } {1 {abc def ghi} {jkl mno pqr} 2} |
| 65 |
| 66 #------------------------------------------------------------------------- |
| 67 # Test a tokenizer that supports synonyms by adding extra entries to the |
| 68 # FTS index. |
| 69 # |
| 70 |
| 71 proc tcl_tokenize {tflags text} { |
| 72 foreach {w iStart iEnd} [fts5_tokenize_split $text] { |
| 73 sqlite3_fts5_token $w $iStart $iEnd |
| 74 if {$tflags=="document" && [info exists ::syn($w)]} { |
| 75 foreach s $::syn($w) { |
| 76 sqlite3_fts5_token -colo $s $iStart $iEnd |
| 77 } |
| 78 } |
| 79 } |
| 80 } |
| 81 reset_db |
| 82 sqlite3_fts5_create_tokenizer db tcl tcl_create |
| 83 |
| 84 do_execsql_test 2.0 { |
| 85 CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl); |
| 86 INSERT INTO ft VALUES('one two three'); |
| 87 INSERT INTO ft VALUES('four five six'); |
| 88 INSERT INTO ft VALUES('eight nine ten'); |
| 89 } {} |
| 90 |
| 91 foreach {tn expr res} { |
| 92 1 "3" 1 |
| 93 2 "eight OR 8 OR 5" {2 3} |
| 94 3 "10" {} |
| 95 4 "1*" {1} |
| 96 5 "1 + 2" {1} |
| 97 } { |
| 98 do_execsql_test 2.1.$tn { |
| 99 SELECT rowid FROM ft WHERE ft MATCH $expr |
| 100 } $res |
| 101 } |
| 102 |
| 103 #------------------------------------------------------------------------- |
| 104 # Test some broken tokenizers: |
| 105 # |
| 106 # 3.1.*: A tokenizer that declares the very first token to be colocated. |
| 107 # |
| 108 # 3.2.*: A tokenizer that reports two identical tokens at the same position. |
| 109 # This is allowed. |
| 110 # |
| 111 reset_db |
| 112 sqlite3_fts5_create_tokenizer db tcl tcl_create |
| 113 proc tcl_tokenize {tflags text} { |
| 114 set bColo 1 |
| 115 foreach {w iStart iEnd} [fts5_tokenize_split $text] { |
| 116 if {$bColo} { |
| 117 sqlite3_fts5_token -colo $w $iStart $iEnd |
| 118 set bColo 0 |
| 119 } { |
| 120 sqlite3_fts5_token $w $iStart $iEnd |
| 121 } |
| 122 } |
| 123 } |
| 124 do_execsql_test 3.1.0 { |
| 125 CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl); |
| 126 INSERT INTO ft VALUES('one two three'); |
| 127 CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row); |
| 128 SELECT * FROM vv; |
| 129 } { |
| 130 one 1 1 three 1 1 two 1 1 |
| 131 } |
| 132 |
| 133 do_execsql_test 3.1.1 { |
| 134 INSERT INTO ft(ft) VALUES('integrity-check'); |
| 135 } {} |
| 136 |
| 137 proc tcl_tokenize {tflags text} { |
| 138 foreach {w iStart iEnd} [fts5_tokenize_split $text] { |
| 139 sqlite3_fts5_token $w $iStart $iEnd |
| 140 } |
| 141 } |
| 142 |
| 143 do_execsql_test 3.1.2 { |
| 144 SELECT rowid FROM ft WHERE ft MATCH 'one two three' |
| 145 } {1} |
| 146 |
| 147 reset_db |
| 148 sqlite3_fts5_create_tokenizer db tcl tcl_create |
| 149 proc tcl_tokenize {tflags text} { |
| 150 foreach {w iStart iEnd} [fts5_tokenize_split $text] { |
| 151 sqlite3_fts5_token $w $iStart $iEnd |
| 152 sqlite3_fts5_token -colo $w $iStart $iEnd |
| 153 } |
| 154 } |
| 155 do_execsql_test 3.2.0 { |
| 156 CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl); |
| 157 INSERT INTO ft VALUES('one one two three'); |
| 158 CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row); |
| 159 SELECT * FROM vv; |
| 160 } { |
| 161 one 1 4 three 1 2 two 1 2 |
| 162 } |
| 163 do_execsql_test 3.2.1 { |
| 164 SELECT rowid FROM ft WHERE ft MATCH 'one'; |
| 165 } {1} |
| 166 do_execsql_test 3.2.2 { |
| 167 SELECT rowid FROM ft WHERE ft MATCH 'one two three'; |
| 168 } {1} |
| 169 do_execsql_test 3.2.3 { |
| 170 SELECT rowid FROM ft WHERE ft MATCH 'one + one + two + three'; |
| 171 } {1} |
| 172 do_execsql_test 3.2.4 { |
| 173 SELECT rowid FROM ft WHERE ft MATCH 'one two two three'; |
| 174 } {1} |
| 175 do_execsql_test 3.2.5 { |
| 176 SELECT rowid FROM ft WHERE ft MATCH 'one + two + two + three'; |
| 177 } {} |
| 178 |
| 179 #------------------------------------------------------------------------- |
| 180 # Check that expressions with synonyms can be parsed and executed. |
| 181 # |
| 182 reset_db |
| 183 sqlite3_fts5_create_tokenizer db tcl tcl_create |
| 184 proc tcl_tokenize {tflags text} { |
| 185 foreach {w iStart iEnd} [fts5_tokenize_split $text] { |
| 186 sqlite3_fts5_token $w $iStart $iEnd |
| 187 if {$tflags=="query" && [info exists ::syn($w)]} { |
| 188 foreach s $::syn($w) { |
| 189 sqlite3_fts5_token -colo $s $iStart $iEnd |
| 190 } |
| 191 } |
| 192 } |
| 193 } |
| 194 |
| 195 foreach {tn expr res} { |
| 196 1 {abc} {"abc"} |
| 197 2 {one} {"one"|"i"|"1"} |
| 198 3 {3} {"3"|"iii"|"three"} |
| 199 4 {3*} {"3"|"iii"|"three" *} |
| 200 } { |
| 201 do_execsql_test 4.1.$tn {SELECT fts5_expr($expr, 'tokenize=tcl')} [list $res] |
| 202 } |
| 203 |
| 204 do_execsql_test 4.2.1 { |
| 205 CREATE VIRTUAL TABLE xx USING fts5(x, tokenize=tcl); |
| 206 INSERT INTO xx VALUES('one two'); |
| 207 INSERT INTO xx VALUES('three four'); |
| 208 } |
| 209 |
| 210 do_execsql_test 4.2.2 { |
| 211 SELECT rowid FROM xx WHERE xx MATCH '2' |
| 212 } {1} |
| 213 |
| 214 do_execsql_test 4.2.3 { |
| 215 SELECT rowid FROM xx WHERE xx MATCH '3' |
| 216 } {2} |
| 217 |
| 218 do_test 5.0 { |
| 219 execsql { |
| 220 CREATE VIRTUAL TABLE t1 USING fts5(a, b, tokenize=tcl) |
| 221 } |
| 222 foreach {rowid a b} { |
| 223 1 {four v 4 i three} {1 3 five five 4 one} |
| 224 2 {5 1 3 4 i} {2 2 v two 4} |
| 225 3 {5 i 5 2 four 4 1} {iii ii five two 1} |
| 226 4 {ii four 4 one 5 three five} {one 5 1 iii 4 3} |
| 227 5 {three i v i four 4 1} {ii five five five iii} |
| 228 6 {4 2 ii two 2 iii} {three 1 four 4 iv 1 iv} |
| 229 7 {ii ii two three 2 5} {iii i ii iii iii one one} |
| 230 8 {2 ii i two 3 three 2} {two iv v iii 3 five} |
| 231 9 {i 2 iv 3 five four v} {iii 4 three i three ii 1} |
| 232 } { |
| 233 execsql { INSERT INTO t1(rowid, a, b) VALUES($rowid, $a, $b) } |
| 234 } |
| 235 } {} |
| 236 |
| 237 |
| 238 foreach {tn q res} { |
| 239 1 {one} { |
| 240 1 {four v 4 [i] three} {[1] 3 five five 4 [one]} |
| 241 2 {5 [1] 3 4 [i]} {2 2 v two 4} |
| 242 3 {5 [i] 5 2 four 4 [1]} {iii ii five two [1]} |
| 243 4 {ii four 4 [one] 5 three five} {[one] 5 [1] iii 4 3} |
| 244 5 {three [i] v [i] four 4 [1]} {ii five five five iii} |
| 245 6 {4 2 ii two 2 iii} {three [1] four 4 iv [1] iv} |
| 246 7 {ii ii two three 2 5} {iii [i] ii iii iii [one] [one]} |
| 247 8 {2 ii [i] two 3 three 2} {two iv v iii 3 five} |
| 248 9 {[i] 2 iv 3 five four v} {iii 4 three [i] three ii [1]} |
| 249 } |
| 250 2 {five four} { |
| 251 1 {[four] [v] [4] i three} {1 3 [five] [five] [4] one} |
| 252 2 {[5] 1 3 [4] i} {2 2 [v] two [4]} |
| 253 3 {[5] i [5] 2 [four] [4] 1} {iii ii [five] two 1} |
| 254 4 {ii [four] [4] one [5] three [five]} {one [5] 1 iii [4] 3} |
| 255 5 {three i [v] i [four] [4] 1} {ii [five] [five] [five] iii} |
| 256 8 {2 ii i two 3 three 2} {two [iv] [v] iii 3 [five]} |
| 257 9 {i 2 [iv] 3 [five] [four] [v]} {iii [4] three i three ii 1} |
| 258 } |
| 259 3 {one OR two OR iii OR 4 OR v} { |
| 260 1 {[four] [v] [4] [i] [three]} {[1] [3] [five] [five] [4] [one]} |
| 261 2 {[5] [1] [3] [4] [i]} {[2] [2] [v] [two] [4]} |
| 262 3 {[5] [i] [5] [2] [four] [4] [1]} {[iii] [ii] [five] [two] [1]} |
| 263 4 {[ii] [four] [4] [one] [5] [three] [five]} {[one] [5] [1] [iii] [4] [3]} |
| 264 5 {[three] [i] [v] [i] [four] [4] [1]} {[ii] [five] [five] [five] [iii]} |
| 265 6 {[4] [2] [ii] [two] [2] [iii]} {[three] [1] [four] [4] [iv] [1] [iv]} |
| 266 7 {[ii] [ii] [two] [three] [2] [5]} {[iii] [i] [ii] [iii] [iii] [one] [one]} |
| 267 8 {[2] [ii] [i] [two] [3] [three] [2]} {[two] [iv] [v] [iii] [3] [five]} |
| 268 9 {[i] [2] [iv] [3] [five] [four] [v]} {[iii] [4] [three] [i] [three] [ii] [
1]} |
| 269 } |
| 270 |
| 271 4 {5 + 1} { |
| 272 2 {[5 1] 3 4 i} {2 2 v two 4} |
| 273 3 {[5 i] 5 2 four 4 1} {iii ii five two 1} |
| 274 4 {ii four 4 one 5 three five} {one [5 1] iii 4 3} |
| 275 5 {three i [v i] four 4 1} {ii five five five iii} |
| 276 } |
| 277 |
| 278 5 {one + two + three} { |
| 279 7 {ii ii two three 2 5} {iii [i ii iii] iii one one} |
| 280 8 {2 ii [i two 3] three 2} {two iv v iii 3 five} |
| 281 } |
| 282 |
| 283 6 {"v v"} { |
| 284 1 {four v 4 i three} {1 3 [five five] 4 one} |
| 285 5 {three i v i four 4 1} {ii [five five five] iii} |
| 286 } |
| 287 } { |
| 288 do_execsql_test 5.1.$tn { |
| 289 SELECT rowid, highlight(t1, 0, '[', ']'), highlight(t1, 1, '[', ']') |
| 290 FROM t1 WHERE t1 MATCH $q |
| 291 } $res |
| 292 } |
| 293 |
| 294 # Test that the xQueryPhrase() API works with synonyms. |
| 295 # |
| 296 proc mit {blob} { |
| 297 set scan(littleEndian) i* |
| 298 set scan(bigEndian) I* |
| 299 binary scan $blob $scan($::tcl_platform(byteOrder)) r |
| 300 return $r |
| 301 } |
| 302 db func mit mit |
| 303 sqlite3_fts5_register_matchinfo db |
| 304 |
| 305 foreach {tn q res} { |
| 306 1 {one} { |
| 307 1 {1 11 7 2 12 6} 2 {2 11 7 0 12 6} |
| 308 3 {2 11 7 1 12 6} 4 {1 11 7 2 12 6} |
| 309 5 {3 11 7 0 12 6} 6 {0 11 7 2 12 6} |
| 310 7 {0 11 7 3 12 6} 8 {1 11 7 0 12 6} |
| 311 9 {1 11 7 2 12 6} |
| 312 } |
| 313 } { |
| 314 do_execsql_test 5.2.$tn { |
| 315 SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH $q |
| 316 } $res |
| 317 } |
| 318 |
| 319 |
| 320 #------------------------------------------------------------------------- |
| 321 # Test terms with more than 4 synonyms. |
| 322 # |
| 323 reset_db |
| 324 sqlite3_fts5_create_tokenizer db tcl tcl_create |
| 325 proc tcl_tokenize {tflags text} { |
| 326 foreach {w iStart iEnd} [fts5_tokenize_split $text] { |
| 327 sqlite3_fts5_token $w $iStart $iEnd |
| 328 if {$tflags=="query" && [string length $w]==1} { |
| 329 for {set i 2} {$i<=10} {incr i} { |
| 330 sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd |
| 331 } |
| 332 } |
| 333 } |
| 334 } |
| 335 |
| 336 do_execsql_test 6.0.1 { |
| 337 CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize=tcl); |
| 338 INSERT INTO t1 VALUES('yy xx qq'); |
| 339 INSERT INTO t1 VALUES('yy xx xx'); |
| 340 } |
| 341 do_execsql_test 6.0.2 { |
| 342 SELECT * FROM t1 WHERE t1 MATCH 'NEAR(y q)'; |
| 343 } {{yy xx qq}} |
| 344 |
| 345 do_test 6.0.3 { |
| 346 execsql { |
| 347 CREATE VIRTUAL TABLE t2 USING fts5(a, b, tokenize=tcl) |
| 348 } |
| 349 foreach {rowid a b} { |
| 350 1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq aaaa} |
| 351 2 {ww oooooo bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq} |
| 352 3 {zzzz llll gggggg cccc uu} {hhhhhh aaaa ppppp rr ee jjjj} |
| 353 4 {r f i rrrrrr ww hhh} {aa yyy t x aaaaa ii} |
| 354 5 {fffff mm vvvv ooo ffffff kkkk tttt} {cccccc bb e zzz d n} |
| 355 6 {iii dddd hh qqqq ddd ooo} {ttt d c b aaaaaa qqqq} |
| 356 7 {jjjj rrrr v zzzzz u tt t} {ppppp pp dddd mm hhh uuu} |
| 357 8 {gggg rrrrrr kkkk vvvv gggg jjjjjj b} {dddddd jj r w cccc wwwwww ss} |
| 358 9 {kkkkk qqq oooo e tttttt mmm} {e ss qqqqqq hhhh llllll gg} |
| 359 } { |
| 360 execsql { INSERT INTO t2(rowid, a, b) VALUES($rowid, $a, $b) } |
| 361 } |
| 362 } {} |
| 363 |
| 364 foreach {tn q res} { |
| 365 1 {a} { |
| 366 1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq [aaaa]} |
| 367 3 {zzzz llll gggggg cccc uu} {hhhhhh [aaaa] ppppp rr ee jjjj} |
| 368 4 {r f i rrrrrr ww hhh} {[aa] yyy t x [aaaaa] ii} |
| 369 6 {iii dddd hh qqqq ddd ooo} {ttt d c b [aaaaaa] qqqq} |
| 370 } |
| 371 |
| 372 2 {a AND q} { |
| 373 1 {yyyy vvvvv [qq] oo yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]} |
| 374 6 {iii dddd hh [qqqq] ddd ooo} {ttt d c b [aaaaaa] [qqqq]} |
| 375 } |
| 376 |
| 377 3 {o OR (q AND a)} { |
| 378 1 {yyyy vvvvv [qq] [oo] yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]} |
| 379 2 {ww [oooooo] bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq} |
| 380 5 {fffff mm vvvv [ooo] ffffff kkkk tttt} {cccccc bb e zzz d n} |
| 381 6 {iii dddd hh [qqqq] ddd [ooo]} {ttt d c b [aaaaaa] [qqqq]} |
| 382 9 {kkkkk qqq [oooo] e tttttt mmm} {e ss qqqqqq hhhh llllll gg} |
| 383 } |
| 384 |
| 385 4 {NEAR(q y, 20)} { |
| 386 1 {[yyyy] vvvvv [qq] oo [yyyyyy] vvvv eee} {ffff uu r qq aaaa} |
| 387 2 {ww oooooo bbbbb ssssss mm} {ffffff [yy] iiii rr s ccc [qqqqq]} |
| 388 } |
| 389 } { |
| 390 do_execsql_test 6.1.$tn.asc { |
| 391 SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']') |
| 392 FROM t2 WHERE t2 MATCH $q |
| 393 } $res |
| 394 |
| 395 set res2 [list] |
| 396 foreach {rowid a b} $res { |
| 397 set res2 [concat [list $rowid $a $b] $res2] |
| 398 } |
| 399 |
| 400 do_execsql_test 6.1.$tn.desc { |
| 401 SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']') |
| 402 FROM t2 WHERE t2 MATCH $q ORDER BY rowid DESC |
| 403 } $res2 |
| 404 } |
| 405 |
| 406 do_execsql_test 6.2.1 { |
| 407 INSERT INTO t2(rowid, a, b) VALUES(13, |
| 408 'x xx xxx xxxx xxxxx xxxxxx xxxxxxx', 'y yy yyy yyyy yyyyy yyyyyy yyyyyyy' |
| 409 ); |
| 410 SELECT rowid, highlight(t2, 0, '<', '>'), highlight(t2, 1, '(', ')') |
| 411 FROM t2 WHERE t2 MATCH 'x OR y' |
| 412 } { |
| 413 1 {<yyyy> vvvvv qq oo <yyyyyy> vvvv eee} {ffff uu r qq aaaa} |
| 414 2 {ww oooooo bbbbb ssssss mm} {ffffff (yy) iiii rr s ccc qqqqq} |
| 415 4 {r f i rrrrrr ww hhh} {aa (yyy) t (x) aaaaa ii} |
| 416 13 {<x> <xx> <xxx> <xxxx> <xxxxx> <xxxxxx> <xxxxxxx>} |
| 417 {(y) (yy) (yyy) (yyyy) (yyyyy) (yyyyyy) (yyyyyyy)} |
| 418 } |
| 419 |
| 420 #------------------------------------------------------------------------- |
| 421 # Test that the xColumnSize() API is not confused by colocated tokens. |
| 422 # |
| 423 reset_db |
| 424 sqlite3_fts5_create_tokenizer db tcl tcl_create |
| 425 fts5_aux_test_functions db |
| 426 proc tcl_tokenize {tflags text} { |
| 427 foreach {w iStart iEnd} [fts5_tokenize_split $text] { |
| 428 sqlite3_fts5_token $w $iStart $iEnd |
| 429 if {[string length $w]==1} { |
| 430 for {set i 2} {$i<=10} {incr i} { |
| 431 sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd |
| 432 } |
| 433 } |
| 434 } |
| 435 } |
| 436 |
| 437 do_execsql_test 7.0.1 { |
| 438 CREATE VIRTUAL TABLE t1 USING fts5(a, b, columnsize=1, tokenize=tcl); |
| 439 INSERT INTO t1 VALUES('0 2 3', '4 5 6 7'); |
| 440 INSERT INTO t1 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0'); |
| 441 SELECT fts5_test_columnsize(t1) FROM t1 WHERE t1 MATCH '000 AND 00 AND 0'; |
| 442 } {{3 4} {2 10}} |
| 443 |
| 444 do_execsql_test 7.0.2 { |
| 445 INSERT INTO t1(t1) VALUES('integrity-check'); |
| 446 } |
| 447 |
| 448 do_execsql_test 7.1.1 { |
| 449 CREATE VIRTUAL TABLE t2 USING fts5(a, b, columnsize=0, tokenize=tcl); |
| 450 INSERT INTO t2 VALUES('0 2 3', '4 5 6 7'); |
| 451 INSERT INTO t2 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0'); |
| 452 SELECT fts5_test_columnsize(t2) FROM t2 WHERE t2 MATCH '000 AND 00 AND 0'; |
| 453 } {{3 4} {2 10}} |
| 454 |
| 455 do_execsql_test 7.1.2 { |
| 456 INSERT INTO t2(t2) VALUES('integrity-check'); |
| 457 } |
| 458 |
| 459 finish_test |
| 460 |
OLD | NEW |