Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(153)

Side by Side Diff: third_party/sqlite/sqlite-src-3080704/test/fts4unicode.test

Issue 883353008: [sql] Import reference version of SQLite 3.8.7.4. (Closed) Base URL: http://chromium.googlesource.com/chromium/src.git@master
Patch Set: Hold back encoding change which is messing up patch. Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # 2012 May 25
2 #
3 # The author disclaims copyright to this source code. In place of
4 # a legal notice, here is a blessing:
5 #
6 # May you do good and not evil.
7 # May you find forgiveness for yourself and forgive others.
8 # May you share freely, never taking more than you give.
9 #
10 #*************************************************************************
11 #
12 # The tests in this file focus on testing the "unicode" FTS tokenizer.
13 #
14
15 set testdir [file dirname $argv0]
16 source $testdir/tester.tcl
17 ifcapable !fts3_unicode { finish_test ; return }
18 set ::testprefix fts4unicode
19
20 proc do_unicode_token_test {tn input res} {
21 set input [string map {' ''} $input]
22 uplevel [list do_execsql_test $tn "
23 SELECT fts3_tokenizer_test('unicode61', 'remove_diacritics=0', '$input');
24 " [list [list {*}$res]]]
25 }
26
27 proc do_unicode_token_test2 {tn input res} {
28 set input [string map {' ''} $input]
29 uplevel [list do_execsql_test $tn "
30 SELECT fts3_tokenizer_test('unicode61', '$input');
31 " [list [list {*}$res]]]
32 }
33
34 proc do_unicode_token_test3 {tn args} {
35 set res [lindex $args end]
36 set sql "SELECT fts3_tokenizer_test('unicode61'"
37 foreach a [lrange $args 0 end-1] {
38 append sql ", '"
39 append sql [string map {' ''} $a]
40 append sql "'"
41 }
42 append sql ")"
43 uplevel [list do_execsql_test $tn $sql [list [list {*}$res]]]
44 }
45
46 do_unicode_token_test 1.0 {a B c D} {0 a a 1 b B 2 c c 3 d D}
47
48 do_unicode_token_test 1.1 "\uC4 \uD6 \uDC" \
49 "0 \uE4 \uC4 1 \uF6 \uD6 2 \uFC \uDC"
50
51 do_unicode_token_test 1.2 "x\uC4x x\uD6x x\uDCx" \
52 "0 x\uE4x x\uC4x 1 x\uF6x x\uD6x 2 x\uFCx x\uDCx"
53
54 # 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s.
55 do_unicode_token_test 1.3 "\uDF" "0 \uDF \uDF"
56 do_unicode_token_test 1.4 "\u1E9E" "0 \uDF \u1E9E"
57
58 do_unicode_token_test 1.5 "The quick brown fox" {
59 0 the The 1 quick quick 2 brown brown 3 fox fox
60 }
61 do_unicode_token_test 1.6 "The\u00bfquick\u224ebrown\u2263fox" {
62 0 the The 1 quick quick 2 brown brown 3 fox fox
63 }
64
65 do_unicode_token_test2 1.7 {a B c D} {0 a a 1 b B 2 c c 3 d D}
66 do_unicode_token_test2 1.8 "\uC4 \uD6 \uDC" "0 a \uC4 1 o \uD6 2 u \uDC"
67
68 do_unicode_token_test2 1.9 "x\uC4x x\uD6x x\uDCx" \
69 "0 xax x\uC4x 1 xox x\uD6x 2 xux x\uDCx"
70
71 # Check that diacritics are removed if remove_diacritics=1 is specified.
72 # And that they do not break tokens.
73 do_unicode_token_test2 1.10 "xx\u0301xx" "0 xxxx xx\u301xx"
74
75 # Title-case mappings work
76 do_unicode_token_test 1.11 "\u01c5" "0 \u01c6 \u01c5"
77
78 #-------------------------------------------------------------------------
79 #
80 set docs [list {
81 Enhance the INSERT syntax to allow multiple rows to be inserted via the
82 VALUES clause.
83 } {
84 Enhance the CREATE VIRTUAL TABLE command to support the IF NOT EXISTS clause.
85 } {
86 Added the sqlite3_stricmp() interface as a counterpart to sqlite3_strnicmp().
87 } {
88 Added the sqlite3_db_readonly() interface.
89 } {
90 Added the SQLITE_FCNTL_PRAGMA file control, giving VFS implementations the
91 ability to add new PRAGMA statements or to override built-in PRAGMAs.
92 } {
93 Queries of the form: "SELECT max(x), y FROM table" returns the value of y on
94 the same row that contains the maximum x value.
95 } {
96 Added support for the FTS4 languageid option.
97 } {
98 Documented support for the FTS4 content option. This feature has actually
99 been in the code since version 3.7.9 but is only now considered to be
100 officially supported.
101 } {
102 Pending statements no longer block ROLLBACK. Instead, the pending statement
103 will return SQLITE_ABORT upon next access after the ROLLBACK.
104 } {
105 Improvements to the handling of CSV inputs in the command-line shell
106 } {
107 Fix a bug introduced in version 3.7.10 that might cause a LEFT JOIN to be
108 incorrectly converted into an INNER JOIN if the WHERE clause indexable terms
109 connected by OR.
110 }]
111
112 set map(a) [list "\u00C4" "\u00E4"] ; # LATIN LETTER A WITH DIAERESIS
113 set map(e) [list "\u00CB" "\u00EB"] ; # LATIN LETTER E WITH DIAERESIS
114 set map(i) [list "\u00CF" "\u00EF"] ; # LATIN LETTER I WITH DIAERESIS
115 set map(o) [list "\u00D6" "\u00F6"] ; # LATIN LETTER O WITH DIAERESIS
116 set map(u) [list "\u00DC" "\u00FC"] ; # LATIN LETTER U WITH DIAERESIS
117 set map(y) [list "\u0178" "\u00FF"] ; # LATIN LETTER Y WITH DIAERESIS
118 set map(h) [list "\u1E26" "\u1E27"] ; # LATIN LETTER H WITH DIAERESIS
119 set map(w) [list "\u1E84" "\u1E85"] ; # LATIN LETTER W WITH DIAERESIS
120 set map(x) [list "\u1E8C" "\u1E8D"] ; # LATIN LETTER X WITH DIAERESIS
121 foreach k [array names map] {
122 lappend mappings [string toupper $k] [lindex $map($k) 0]
123 lappend mappings $k [lindex $map($k) 1]
124 }
125 proc mapdoc {doc} {
126 set doc [regsub -all {[[:space:]]+} $doc " "]
127 string map $::mappings [string trim $doc]
128 }
129
130 do_test 2.0 {
131 execsql { CREATE VIRTUAL TABLE t2 USING fts4(tokenize=unicode61, x); }
132 foreach doc $docs {
133 set d [mapdoc $doc]
134 execsql { INSERT INTO t2 VALUES($d) }
135 }
136 } {}
137
138 do_test 2.1 {
139 set q [mapdoc "row"]
140 execsql { SELECT * FROM t2 WHERE t2 MATCH $q }
141 } [list [mapdoc {
142 Queries of the form: "SELECT max(x), y FROM table" returns the value of y on
143 the same row that contains the maximum x value.
144 }]]
145
146 foreach {tn query snippet} {
147 2 "row" {
148 ...returns the value of y on the same [row] that contains
149 the maximum x value.
150 }
151 3 "ROW" {
152 ...returns the value of y on the same [row] that contains
153 the maximum x value.
154 }
155 4 "rollback" {
156 ...[ROLLBACK]. Instead, the pending statement
157 will return SQLITE_ABORT upon next access after the [ROLLBACK].
158 }
159 5 "rOllback" {
160 ...[ROLLBACK]. Instead, the pending statement
161 will return SQLITE_ABORT upon next access after the [ROLLBACK].
162 }
163 6 "lang*" {
164 Added support for the FTS4 [languageid] option.
165 }
166 } {
167 do_test 2.$tn {
168 set q [mapdoc $query]
169 execsql { SELECT snippet(t2, '[', ']', '...') FROM t2 WHERE t2 MATCH $q }
170 } [list [mapdoc $snippet]]
171 }
172
173 #-------------------------------------------------------------------------
174 # Make sure the unicode61 tokenizer does not crash if it is passed a
175 # NULL pointer.
176 reset_db
177 do_execsql_test 3.1 {
178 CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61, x, y);
179 INSERT INTO t1 VALUES(NULL, 'a b c');
180 }
181
182 do_execsql_test 3.2 {
183 SELECT snippet(t1, '[', ']') FROM t1 WHERE t1 MATCH 'b'
184 } {{a [b] c}}
185
186 do_execsql_test 3.3 {
187 BEGIN;
188 DELETE FROM t1;
189 INSERT INTO t1 VALUES('b b b b b b b b b b b', 'b b b b b b b b b b b b b');
190 INSERT INTO t1 SELECT * FROM t1;
191 INSERT INTO t1 SELECT * FROM t1;
192 INSERT INTO t1 SELECT * FROM t1;
193 INSERT INTO t1 SELECT * FROM t1;
194 INSERT INTO t1 SELECT * FROM t1;
195 INSERT INTO t1 SELECT * FROM t1;
196 INSERT INTO t1 SELECT * FROM t1;
197 INSERT INTO t1 SELECT * FROM t1;
198 INSERT INTO t1 SELECT * FROM t1;
199 INSERT INTO t1 SELECT * FROM t1;
200 INSERT INTO t1 SELECT * FROM t1;
201 INSERT INTO t1 SELECT * FROM t1;
202 INSERT INTO t1 SELECT * FROM t1;
203 INSERT INTO t1 SELECT * FROM t1;
204 INSERT INTO t1 SELECT * FROM t1;
205 INSERT INTO t1 SELECT * FROM t1;
206 INSERT INTO t1 VALUES('a b c', NULL);
207 INSERT INTO t1 VALUES('a x c', NULL);
208 COMMIT;
209 }
210
211 do_execsql_test 3.4 {
212 SELECT * FROM t1 WHERE t1 MATCH 'a b';
213 } {{a b c} {}}
214
215 #-------------------------------------------------------------------------
216 #
217 reset_db
218
219 do_test 4.1 {
220 set a "abc\uFFFEdef"
221 set b "abc\uD800def"
222 set c "\uFFFEdef"
223 set d "\uD800def"
224 execsql {
225 CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61, x);
226 INSERT INTO t1 VALUES($a);
227 INSERT INTO t1 VALUES($b);
228 INSERT INTO t1 VALUES($c);
229 INSERT INTO t1 VALUES($d);
230 }
231 } {}
232
233 do_test 4.2 {
234 set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}]
235 set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}]
236 set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
237 set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
238 execsql {
239 INSERT INTO t1 VALUES($a);
240 INSERT INTO t1 VALUES($b);
241 INSERT INTO t1 VALUES($c);
242 INSERT INTO t1 VALUES($d);
243 }
244 } {}
245
246 do_test 4.3 {
247 set a [binary format c* {0xF7 0xBF 0xBF 0xBF}]
248 set b [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF}]
249 set c [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF}]
250 set d [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF}]
251 execsql {
252 INSERT INTO t1 VALUES($a);
253 INSERT INTO t1 VALUES($b);
254 INSERT INTO t1 VALUES($c);
255 INSERT INTO t1 VALUES($d);
256 }
257 } {}
258
259 #-------------------------------------------------------------------------
260
261 do_unicode_token_test3 5.1 {tokenchars=} {
262 sqlite3_reset sqlite3_column_int
263 } {
264 0 sqlite3 sqlite3
265 1 reset reset
266 2 sqlite3 sqlite3
267 3 column column
268 4 int int
269 }
270
271 do_unicode_token_test3 5.2 {tokenchars=_} {
272 sqlite3_reset sqlite3_column_int
273 } {
274 0 sqlite3_reset sqlite3_reset
275 1 sqlite3_column_int sqlite3_column_int
276 }
277
278 do_unicode_token_test3 5.3 {separators=xyz} {
279 Laotianxhorseyrunszfast
280 } {
281 0 laotian Laotian
282 1 horse horse
283 2 runs runs
284 3 fast fast
285 }
286
287 do_unicode_token_test3 5.4 {tokenchars=xyz} {
288 Laotianxhorseyrunszfast
289 } {
290 0 laotianxhorseyrunszfast Laotianxhorseyrunszfast
291 }
292
293 do_unicode_token_test3 5.5 {tokenchars=_} {separators=zyx} {
294 sqlite3_resetxsqlite3_column_intyhonda_phantom
295 } {
296 0 sqlite3_reset sqlite3_reset
297 1 sqlite3_column_int sqlite3_column_int
298 2 honda_phantom honda_phantom
299 }
300
301 do_unicode_token_test3 5.6 "separators=\u05D1" "abc\u05D1def" {
302 0 abc abc 1 def def
303 }
304
305 do_unicode_token_test3 5.7 \
306 "tokenchars=\u2444\u2445" \
307 "separators=\u05D0\u05D1\u05D2" \
308 "\u2444fre\u2445sh\u05D0water\u05D2fish.\u2445timer" \
309 [list \
310 0 \u2444fre\u2445sh \u2444fre\u2445sh \
311 1 water water \
312 2 fish fish \
313 3 \u2445timer \u2445timer \
314 ]
315
316 # Check that it is not possible to add a standalone diacritic codepoint
317 # to either separators or tokenchars.
318 do_unicode_token_test3 5.8 "separators=\u0301" \
319 "hello\u0301world \u0301helloworld" \
320 "0 helloworld hello\u0301world 1 helloworld helloworld"
321
322 do_unicode_token_test3 5.9 "tokenchars=\u0301" \
323 "hello\u0301world \u0301helloworld" \
324 "0 helloworld hello\u0301world 1 helloworld helloworld"
325
326 do_unicode_token_test3 5.10 "separators=\u0301" \
327 "remove_diacritics=0" \
328 "hello\u0301world \u0301helloworld" \
329 "0 hello\u0301world hello\u0301world 1 helloworld helloworld"
330
331 do_unicode_token_test3 5.11 "tokenchars=\u0301" \
332 "remove_diacritics=0" \
333 "hello\u0301world \u0301helloworld" \
334 "0 hello\u0301world hello\u0301world 1 helloworld helloworld"
335
336
337 #-------------------------------------------------------------------------
338
339 proc do_tokenize {tokenizer txt} {
340 set res [list]
341 foreach {a b c} [db one {SELECT fts3_tokenizer_test($tokenizer, $txt)}] {
342 lappend res $b
343 }
344 set res
345 }
346
347 # Argument $lCodepoint must be a list of codepoints (integers) that
348 # correspond to whitespace characters. This command creates a string
349 # $W from the codepoints, then tokenizes "${W}hello{$W}world${W}"
350 # using tokenizer $tokenizer. The test passes if the tokenizer successfully
351 # extracts the two 5 character tokens.
352 #
353 proc do_isspace_test {tn tokenizer lCp} {
354 set whitespace [format [string repeat %c [llength $lCp]] {*}$lCp]
355 set txt "${whitespace}hello${whitespace}world${whitespace}"
356 uplevel [list do_test $tn [list do_tokenize $tokenizer $txt] {hello world}]
357 }
358
359 set tokenizers [list unicode61]
360 ifcapable icu { lappend tokenizers icu }
361
362 # Some tests to check that the tokenizers can both identify white-space
363 # codepoints. All codepoints tested below are of type "Zs" in the
364 # UnicodeData.txt file.
365 foreach T $tokenizers {
366 do_isspace_test 6.$T.1 $T 32
367 do_isspace_test 6.$T.2 $T 160
368 do_isspace_test 6.$T.3 $T 5760
369 do_isspace_test 6.$T.4 $T 6158
370 do_isspace_test 6.$T.5 $T 8192
371 do_isspace_test 6.$T.6 $T 8193
372 do_isspace_test 6.$T.7 $T 8194
373 do_isspace_test 6.$T.8 $T 8195
374 do_isspace_test 6.$T.9 $T 8196
375 do_isspace_test 6.$T.10 $T 8197
376 do_isspace_test 6.$T.11 $T 8198
377 do_isspace_test 6.$T.12 $T 8199
378 do_isspace_test 6.$T.13 $T 8200
379 do_isspace_test 6.$T.14 $T 8201
380 do_isspace_test 6.$T.15 $T 8202
381 do_isspace_test 6.$T.16 $T 8239
382 do_isspace_test 6.$T.17 $T 8287
383 do_isspace_test 6.$T.18 $T 12288
384
385 do_isspace_test 6.$T.19 $T {32 160 5760 6158}
386 do_isspace_test 6.$T.20 $T {8192 8193 8194 8195}
387 do_isspace_test 6.$T.21 $T {8196 8197 8198 8199}
388 do_isspace_test 6.$T.22 $T {8200 8201 8202 8239}
389 do_isspace_test 6.$T.23 $T {8287 12288}
390 }
391
392 #-------------------------------------------------------------------------
393 # Test that the private use ranges are treated as alphanumeric.
394 #
395 foreach {tn1 c} {
396 1 \ue000 2 \ue001 3 \uf000 4 \uf8fe 5 \uf8ff
397 } {
398 foreach {tn2 config res} {
399 1 "" "0 hello*world hello*world"
400 2 "separators=*" "0 hello hello 1 world world"
401 } {
402 set config [string map [list * $c] $config]
403 set input [string map [list * $c] "hello*world"]
404 set output [string map [list * $c] $res]
405 do_unicode_token_test3 7.$tn1.$tn2 {*}$config $input $output
406 }
407 }
408
409 #-------------------------------------------------------------------------
410 # Cursory test of remove_diacritics=0.
411 #
412 # 00C4;LATIN CAPITAL LETTER A WITH DIAERESIS
413 # 00D6;LATIN CAPITAL LETTER O WITH DIAERESIS
414 # 00E4;LATIN SMALL LETTER A WITH DIAERESIS
415 # 00F6;LATIN SMALL LETTER O WITH DIAERESIS
416 #
417 do_execsql_test 8.1.1 "
418 CREATE VIRTUAL TABLE t3 USING fts4(tokenize=unicode61 'remove_diacritics=1');
419 INSERT INTO t3 VALUES('o');
420 INSERT INTO t3 VALUES('a');
421 INSERT INTO t3 VALUES('O');
422 INSERT INTO t3 VALUES('A');
423 INSERT INTO t3 VALUES('\xD6');
424 INSERT INTO t3 VALUES('\xC4');
425 INSERT INTO t3 VALUES('\xF6');
426 INSERT INTO t3 VALUES('\xE4');
427 "
428 do_execsql_test 8.1.2 {
429 SELECT rowid FROM t3 WHERE t3 MATCH 'o';
430 } {1 3 5 7}
431 do_execsql_test 8.1.3 {
432 SELECT rowid FROM t3 WHERE t3 MATCH 'a';
433 } {2 4 6 8}
434 do_execsql_test 8.2.1 {
435 CREATE VIRTUAL TABLE t4 USING fts4(tokenize=unicode61 "remove_diacritics=0");
436 INSERT INTO t4 SELECT * FROM t3;
437 }
438 do_execsql_test 8.2.2 {
439 SELECT rowid FROM t4 WHERE t4 MATCH 'o';
440 } {1 3}
441 do_execsql_test 8.2.3 {
442 SELECT rowid FROM t4 WHERE t4 MATCH 'a';
443 } {2 4}
444
445 #-------------------------------------------------------------------------
446 #
447 foreach {tn sql} {
448 1 {
449 CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 [tokenchars= .]);
450 CREATE VIRTUAL TABLE t6 USING fts4(
451 tokenize=unicode61 [tokenchars=="] "tokenchars=[]");
452 CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 [separators=x\xC4]);
453 }
454 2 {
455 CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 "tokenchars= .");
456 CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 "tokenchars=[=""]");
457 CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 "separators=x\xC4");
458 }
459 3 {
460 CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 'tokenchars= .');
461 CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 'tokenchars=="[]');
462 CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 'separators=x\xC4');
463 }
464 4 {
465 CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 `tokenchars= .`);
466 CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 `tokenchars=[="]`);
467 CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 `separators=x\xC4`);
468 }
469 } {
470 do_execsql_test 9.$tn.0 {
471 DROP TABLE IF EXISTS t5;
472 DROP TABLE IF EXISTS t5aux;
473 DROP TABLE IF EXISTS t6;
474 DROP TABLE IF EXISTS t6aux;
475 DROP TABLE IF EXISTS t7;
476 DROP TABLE IF EXISTS t7aux;
477 }
478 do_execsql_test 9.$tn.1 $sql
479
480 do_execsql_test 9.$tn.2 {
481 CREATE VIRTUAL TABLE t5aux USING fts4aux(t5);
482 INSERT INTO t5 VALUES('one two three/four.five.six');
483 SELECT * FROM t5aux;
484 } {
485 four.five.six * 1 1 four.five.six 0 1 1
486 {one two three} * 1 1 {one two three} 0 1 1
487 }
488
489 do_execsql_test 9.$tn.3 {
490 CREATE VIRTUAL TABLE t6aux USING fts4aux(t6);
491 INSERT INTO t6 VALUES('alpha=beta"gamma/delta[epsilon]zeta');
492 SELECT * FROM t6aux;
493 } {
494 {alpha=beta"gamma} * 1 1 {alpha=beta"gamma} 0 1 1
495 {delta[epsilon]zeta} * 1 1 {delta[epsilon]zeta} 0 1 1
496 }
497
498 do_execsql_test 9.$tn.4 {
499 CREATE VIRTUAL TABLE t7aux USING fts4aux(t7);
500 INSERT INTO t7 VALUES('alephxbeth\xC4gimel');
501 SELECT * FROM t7aux;
502 } {
503 aleph * 1 1 aleph 0 1 1
504 beth * 1 1 beth 0 1 1
505 gimel * 1 1 gimel 0 1 1
506 }
507 }
508
509 # Check that multiple options are handled correctly.
510 #
511 do_execsql_test 10.1 {
512 DROP TABLE IF EXISTS t1;
513 CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61
514 "tokenchars=xyz" "tokenchars=.=" "separators=.=" "separators=xy"
515 "separators=a" "separators=a" "tokenchars=a" "tokenchars=a"
516 );
517
518 INSERT INTO t1 VALUES('oneatwoxthreeyfour');
519 INSERT INTO t1 VALUES('a.single=word');
520 CREATE VIRTUAL TABLE t1aux USING fts4aux(t1);
521 SELECT * FROM t1aux;
522 } {
523 .single=word * 1 1 .single=word 0 1 1
524 four * 1 1 four 0 1 1
525 one * 1 1 one 0 1 1
526 three * 1 1 three 0 1 1
527 two * 1 1 two 0 1 1
528 }
529
530 # Test that case folding happens after tokenization, not before.
531 #
532 do_execsql_test 10.2 {
533 DROP TABLE IF EXISTS t2;
534 CREATE VIRTUAL TABLE t2 USING fts4(tokenize=unicode61 "separators=aB");
535 INSERT INTO t2 VALUES('oneatwoBthree');
536 INSERT INTO t2 VALUES('onebtwoAthree');
537 CREATE VIRTUAL TABLE t2aux USING fts4aux(t2);
538 SELECT * FROM t2aux;
539 } {
540 one * 1 1 one 0 1 1
541 onebtwoathree * 1 1 onebtwoathree 0 1 1
542 three * 1 1 three 0 1 1
543 two * 1 1 two 0 1 1
544 }
545
546 # Test that the tokenchars and separators options work with the
547 # fts3tokenize table.
548 #
549 do_execsql_test 11.1 {
550 CREATE VIRTUAL TABLE ft1 USING fts3tokenize(
551 "unicode61", "tokenchars=@.", "separators=1234567890"
552 );
553 SELECT token FROM ft1 WHERE input = 'berlin@street123sydney.road';
554 } {
555 berlin@street sydney.road
556 }
557
558 finish_test
OLDNEW
« no previous file with comments | « third_party/sqlite/sqlite-src-3080704/test/fts4noti.test ('k') | third_party/sqlite/sqlite-src-3080704/test/full.test » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698