OLD | NEW |
(Empty) | |
| 1 # 2007 April 26 |
| 2 # |
| 3 # The author disclaims copyright to this source code. |
| 4 # |
| 5 #************************************************************************* |
| 6 # This file implements tests for prefix-searching in the fts2 |
| 7 # component of the SQLite library. |
| 8 # |
| 9 # $Id: fts2n.test,v 1.2 2007/12/13 21:54:11 drh Exp $ |
| 10 # |
| 11 |
| 12 set testdir [file dirname $argv0] |
| 13 source $testdir/tester.tcl |
| 14 |
| 15 # If SQLITE_ENABLE_FTS2 is defined, omit this file. |
| 16 ifcapable !fts2 { |
| 17 finish_test |
| 18 return |
| 19 } |
| 20 |
| 21 # A large string to prime the pump with. |
| 22 set text { |
| 23 Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Maecenas |
| 24 iaculis mollis ipsum. Praesent rhoncus placerat justo. Duis non quam |
| 25 sed turpis posuere placerat. Curabitur et lorem in lorem porttitor |
| 26 aliquet. Pellentesque bibendum tincidunt diam. Vestibulum blandit |
| 27 ante nec elit. In sapien diam, facilisis eget, dictum sed, viverra |
| 28 at, felis. Vestibulum magna. Sed magna dolor, vestibulum rhoncus, |
| 29 ornare vel, vulputate sit amet, felis. Integer malesuada, tellus at |
| 30 luctus gravida, diam nunc porta nibh, nec imperdiet massa metus eu |
| 31 lectus. Aliquam nisi. Nunc fringilla nulla at lectus. Suspendisse |
| 32 potenti. Cum sociis natoque penatibus et magnis dis parturient |
| 33 montes, nascetur ridiculus mus. Pellentesque odio nulla, feugiat eu, |
| 34 suscipit nec, consequat quis, risus. |
| 35 } |
| 36 |
| 37 db eval { |
| 38 CREATE VIRTUAL TABLE t1 USING fts2(c); |
| 39 |
| 40 INSERT INTO t1(rowid, c) VALUES(1, $text); |
| 41 INSERT INTO t1(rowid, c) VALUES(2, 'Another lovely row'); |
| 42 } |
| 43 |
| 44 # Exact match |
| 45 do_test fts2n-1.1 { |
| 46 execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem'" |
| 47 } {1} |
| 48 |
| 49 # And a prefix |
| 50 do_test fts2n-1.2 { |
| 51 execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore*'" |
| 52 } {1} |
| 53 |
| 54 # Prefix includes exact match |
| 55 do_test fts2n-1.3 { |
| 56 execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem*'" |
| 57 } {1} |
| 58 |
| 59 # Make certain everything isn't considered a prefix! |
| 60 do_test fts2n-1.4 { |
| 61 execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore'" |
| 62 } {} |
| 63 |
| 64 # Prefix across multiple rows. |
| 65 do_test fts2n-1.5 { |
| 66 execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo*'" |
| 67 } {1 2} |
| 68 |
| 69 # Likewise, with multiple hits in one document. |
| 70 do_test fts2n-1.6 { |
| 71 execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'l*'" |
| 72 } {1 2} |
| 73 |
| 74 # Prefix which should only hit one document. |
| 75 do_test fts2n-1.7 { |
| 76 execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lov*'" |
| 77 } {2} |
| 78 |
| 79 # * not at end is dropped. |
| 80 do_test fts2n-1.8 { |
| 81 execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo *'" |
| 82 } {} |
| 83 |
| 84 # Stand-alone * is dropped. |
| 85 do_test fts2n-1.9 { |
| 86 execsql "SELECT rowid FROM t1 WHERE t1 MATCH '*'" |
| 87 } {} |
| 88 |
| 89 # Phrase-query prefix. |
| 90 do_test fts2n-1.10 { |
| 91 execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r*\"'" |
| 92 } {2} |
| 93 do_test fts2n-1.11 { |
| 94 execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r\"'" |
| 95 } {} |
| 96 |
| 97 # Phrase query with multiple prefix matches. |
| 98 do_test fts2n-1.12 { |
| 99 execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l*\"'" |
| 100 } {1 2} |
| 101 |
| 102 # Phrase query with multiple prefix matches. |
| 103 do_test fts2n-1.13 { |
| 104 execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l* row\"'" |
| 105 } {2} |
| 106 |
| 107 |
| 108 |
| 109 |
| 110 # Test across updates (and, by implication, deletes). |
| 111 |
| 112 # Version of text without "lorem". |
| 113 regsub -all {[Ll]orem} $text '' ntext |
| 114 |
| 115 db eval { |
| 116 CREATE VIRTUAL TABLE t2 USING fts2(c); |
| 117 |
| 118 INSERT INTO t2(rowid, c) VALUES(1, $text); |
| 119 INSERT INTO t2(rowid, c) VALUES(2, 'Another lovely row'); |
| 120 UPDATE t2 SET c = $ntext WHERE rowid = 1; |
| 121 } |
| 122 |
| 123 # Can't see lorem as an exact match. |
| 124 do_test fts2n-2.1 { |
| 125 execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lorem'" |
| 126 } {} |
| 127 |
| 128 # Can't see a prefix of lorem, either. |
| 129 do_test fts2n-2.2 { |
| 130 execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lore*'" |
| 131 } {} |
| 132 |
| 133 # Can see lovely in the other document. |
| 134 do_test fts2n-2.3 { |
| 135 execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lo*'" |
| 136 } {2} |
| 137 |
| 138 # Can still see other hits. |
| 139 do_test fts2n-2.4 { |
| 140 execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'l*'" |
| 141 } {1 2} |
| 142 |
| 143 # Prefix which should only hit one document. |
| 144 do_test fts2n-2.5 { |
| 145 execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lov*'" |
| 146 } {2} |
| 147 |
| 148 |
| 149 |
| 150 # Test with a segment which will have multiple levels in the tree. |
| 151 |
| 152 # Build a big document with lots of unique terms. |
| 153 set bigtext $text |
| 154 foreach c {a b c d e} { |
| 155 regsub -all {[A-Za-z]+} $bigtext "&$c" t |
| 156 append bigtext $t |
| 157 } |
| 158 |
| 159 # Populate a table with many copies of the big document, so that we |
| 160 # can test the number of hits found. Populate $ret with the expected |
| 161 # hit counts for each row. offsets() returns 4 elements for every |
| 162 # hit. We'll have 6 hits for row 1, 1 for row 2, and 6*(2^5)==192 for |
| 163 # $bigtext. |
| 164 set ret {6 1} |
| 165 db eval { |
| 166 BEGIN; |
| 167 CREATE VIRTUAL TABLE t3 USING fts2(c); |
| 168 |
| 169 INSERT INTO t3(rowid, c) VALUES(1, $text); |
| 170 INSERT INTO t3(rowid, c) VALUES(2, 'Another lovely row'); |
| 171 } |
| 172 for {set i 0} {$i<100} {incr i} { |
| 173 db eval {INSERT INTO t3(rowid, c) VALUES(3+$i, $bigtext)} |
| 174 lappend ret 192 |
| 175 } |
| 176 db eval {COMMIT;} |
| 177 |
| 178 # Test that we get the expected number of hits. |
| 179 do_test fts2n-3.1 { |
| 180 set t {} |
| 181 db eval {SELECT offsets(t3) as o FROM t3 WHERE t3 MATCH 'l*'} { |
| 182 set l [llength $o] |
| 183 lappend t [expr {$l/4}] |
| 184 } |
| 185 set t |
| 186 } $ret |
| 187 |
| 188 # TODO(shess) It would be useful to test a couple edge cases, but I |
| 189 # don't know if we have the precision to manage it from here at this |
| 190 # time. Prefix hits can cross leaves, which the code above _should_ |
| 191 # hit by virtue of size. There are two variations on this. If the |
| 192 # tree is 2 levels high, the code will find the leaf-node extent |
| 193 # directly, but if its higher, the code will have to follow two |
| 194 # separate interior branches down the tree. Both should be tested. |
| 195 |
| 196 finish_test |
OLD | NEW |