third_party/sqlite/sqlite-src-3170000/ext/fts5/test/fts5unicode2.test - Issue 2747283002: [sql] Import reference version of SQLite 3.17..

Side by Side Diff: third_party/sqlite/sqlite-src-3170000/ext/fts5/test/fts5unicode2.test

Issue 2747283002: [sql] Import reference version of SQLite 3.17.. (Closed)

Patch Set: Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « third_party/sqlite/sqlite-src-3170000/ext/fts5/test/fts5unicode.test ('k') | third_party/sqlite/sqlite-src-3170000/ext/fts5/test/fts5unicode3.test » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 # 2012 May 25

	2 #

	3 # The author disclaims copyright to this source code. In place of

	4 # a legal notice, here is a blessing:

	5 #

	6 # May you do good and not evil.

	7 # May you find forgiveness for yourself and forgive others.

	8 # May you share freely, never taking more than you give.

	9 #

	10 #*************************************************************************

	11 #

	12 # The tests in this file focus on testing the "unicode" FTS tokenizer.

	13 #

	14 # This is a modified copy of FTS4 test file "fts4_unicode.test".

	15 #

	16

	17 source [file join [file dirname [info script]] fts5_common.tcl]

	18 set testprefix fts5unicode2

	19

	20 # If SQLITE_ENABLE_FTS5 is defined, omit this file.

	21 ifcapable !fts5 {

	22 finish_test

	23 return

	24 }

	25

	26 proc do_unicode_token_test {tn input res} {

	27 uplevel [list do_test $tn [list \

	28 sqlite3_fts5_tokenize -subst db "unicode61 remove_diacritics 0" $input

	29 ] [list {*}$res]]

	30 }

	31

	32 proc do_unicode_token_test2 {tn input res} {

	33 uplevel [list do_test $tn [list \

	34 sqlite3_fts5_tokenize -subst db "unicode61" $input

	35 ] [list {*}$res]]

	36 }

	37

	38 proc do_unicode_token_test3 {tn args} {

	39 set tokenizer [concat unicode61 {*}[lrange $args 0 end-2]]

	40 set input [lindex $args end-1]

	41 set res [lindex $args end]

	42 uplevel [list do_test $tn [list \

	43 sqlite3_fts5_tokenize -subst db $tokenizer $input

	44 ] [list {*}$res]]

	45 }

	46

	47 do_unicode_token_test 1.0 {a B c D} {a a b B c c d D}

	48

	49 do_unicode_token_test 1.1 "\uC4 \uD6 \uDC" \

	50 "\uE4 \uC4 \uF6 \uD6 \uFC \uDC"

	51

	52 do_unicode_token_test 1.2 "x\uC4x x\uD6x x\uDCx" \

	53 "x\uE4x x\uC4x x\uF6x x\uD6x x\uFCx x\uDCx"

	54

	55 # 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s.

	56 do_unicode_token_test 1.3 "\uDF" "\uDF \uDF"

	57 do_unicode_token_test 1.4 "\u1E9E" "\uDF \u1E9E"

	58

	59 do_unicode_token_test 1.5 "The quick brown fox" {

	60 the The quick quick brown brown fox fox

	61 }

	62 do_unicode_token_test 1.6 "The\u00bfquick\u224ebrown\u2263fox" {

	63 the The quick quick brown brown fox fox

	64 }

	65

	66 do_unicode_token_test2 1.7 {a B c D} {a a b B c c d D}

	67 do_unicode_token_test2 1.8 "\uC4 \uD6 \uDC" "a \uC4 o \uD6 u \uDC"

	68

	69 do_unicode_token_test2 1.9 "x\uC4x x\uD6x x\uDCx" \

	70 "xax x\uC4x xox x\uD6x xux x\uDCx"

	71

	72 # Check that diacritics are removed if remove_diacritics=1 is specified.

	73 # And that they do not break tokens.

	74 do_unicode_token_test2 1.10 "xx\u0301xx" "xxxx xx\u301xx"

	75

	76 # Title-case mappings work

	77 do_unicode_token_test 1.11 "\u01c5" "\u01c6 \u01c5"

	78

	79 do_unicode_token_test 1.12 "\u00C1abc\u00C2 \u00D1def\u00C3" \

	80 "\u00E1abc\u00E2 \u00C1abc\u00C2 \u00F1def\u00E3 \u00D1def\u00C3"

	81

	82 do_unicode_token_test 1.13 "\u00A2abc\u00A3 \u00A4def\u00A5" \

	83 "abc abc def def"

	84

	85 #-------------------------------------------------------------------------

	86 #

	87 set docs [list {

	88 Enhance the INSERT syntax to allow multiple rows to be inserted via the

	89 VALUES clause.

	90 } {

	91 Enhance the CREATE VIRTUAL TABLE command to support the IF NOT EXISTS clause.

	92 } {

	93 Added the sqlite3_stricmp() interface as a counterpart to sqlite3_strnicmp().

	94 } {

	95 Added the sqlite3_db_readonly() interface.

	96 } {

	97 Added the SQLITE_FCNTL_PRAGMA file control, giving VFS implementations the

	98 ability to add new PRAGMA statements or to override built-in PRAGMAs.

	99 } {

	100 Queries of the form: "SELECT max(x), y FROM table" returns the value of y on

	101 the same row that contains the maximum x value.

	102 } {

	103 Added support for the FTS4 languageid option.

	104 } {

	105 Documented support for the FTS4 content option. This feature has actually

	106 been in the code since version 3.7.9 but is only now considered to be

	107 officially supported.

	108 } {

	109 Pending statements no longer block ROLLBACK. Instead, the pending statement

	110 will return SQLITE_ABORT upon next access after the ROLLBACK.

	111 } {

	112 Improvements to the handling of CSV inputs in the command-line shell

	113 } {

	114 Fix a bug introduced in version 3.7.10 that might cause a LEFT JOIN to be

	115 incorrectly converted into an INNER JOIN if the WHERE clause indexable terms

	116 connected by OR.

	117 }]

	118

	119 set map(a) [list "\u00C4" "\u00E4"] ; # LATIN LETTER A WITH DIAERESIS

	120 set map(e) [list "\u00CB" "\u00EB"] ; # LATIN LETTER E WITH DIAERESIS

	121 set map(i) [list "\u00CF" "\u00EF"] ; # LATIN LETTER I WITH DIAERESIS

	122 set map(o) [list "\u00D6" "\u00F6"] ; # LATIN LETTER O WITH DIAERESIS

	123 set map(u) [list "\u00DC" "\u00FC"] ; # LATIN LETTER U WITH DIAERESIS

	124 set map(y) [list "\u0178" "\u00FF"] ; # LATIN LETTER Y WITH DIAERESIS

	125 set map(h) [list "\u1E26" "\u1E27"] ; # LATIN LETTER H WITH DIAERESIS

	126 set map(w) [list "\u1E84" "\u1E85"] ; # LATIN LETTER W WITH DIAERESIS

	127 set map(x) [list "\u1E8C" "\u1E8D"] ; # LATIN LETTER X WITH DIAERESIS

	128 foreach k [array names map] {

	129 lappend mappings [string toupper $k] [lindex $map($k) 0]

	130 lappend mappings $k [lindex $map($k) 1]

	131 }

	132 proc mapdoc {doc} {

	133 set doc [regsub -all {[[:space:]]+} $doc " "]

	134 string map $::mappings [string trim $doc]

	135 }

	136

	137 do_test 2.0 {

	138 execsql { CREATE VIRTUAL TABLE t2 USING fts5(tokenize=unicode61, x); }

	139 foreach doc $docs {

	140 set d [mapdoc $doc]

	141 execsql { INSERT INTO t2 VALUES($d) }

	142 }

	143 } {}

	144

	145 do_test 2.1 {

	146 set q [mapdoc "row"]

	147 execsql { SELECT * FROM t2 WHERE t2 MATCH $q }

	148 } [list [mapdoc {

	149 Queries of the form: "SELECT max(x), y FROM table" returns the value of y on

	150 the same row that contains the maximum x value.

	151 }]]

	152

	153 foreach {tn query snippet} {

	154 2 "row" {

	155 ...returns the value of y on the same [row] that contains

	156 the maximum x value.

	157 }

	158 3 "ROW" {

	159 ...returns the value of y on the same [row] that contains

	160 the maximum x value.

	161 }

	162 4 "rollback" {

	163 Pending statements no longer block [ROLLBACK]. Instead, the pending

	164 statement will return SQLITE_ABORT upon...

	165 }

	166 5 "rOllback" {

	167 Pending statements no longer block [ROLLBACK]. Instead, the pending

	168 statement will return SQLITE_ABORT upon...

	169 }

	170 6 "lang*" {

	171 Added support for the FTS4 [languageid] option.

	172 }

	173 } {

	174 do_test 2.$tn {

	175 set q [mapdoc $query]

	176 execsql {

	177 SELECT snippet(t2, -1, '[', ']', '...', 15) FROM t2 WHERE t2 MATCH $q

	178 }

	179 } [list [mapdoc $snippet]]

	180 }

	181

	182 #-------------------------------------------------------------------------

	183 # Make sure the unicode61 tokenizer does not crash if it is passed a

	184 # NULL pointer.

	185 reset_db

	186 do_execsql_test 3.1 {

	187 CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x, y);

	188 INSERT INTO t1 VALUES(NULL, 'a b c');

	189 }

	190

	191 do_execsql_test 3.2 {

	192 SELECT snippet(t1, -1, '[', ']', '...', 15) FROM t1 WHERE t1 MATCH 'b'

	193 } {{a [b] c}}

	194

	195 do_execsql_test 3.3 {

	196 BEGIN;

	197 DELETE FROM t1;

	198 INSERT INTO t1 VALUES('b b b b b b b b b b b', 'b b b b b b b b b b b b b');

	199 INSERT INTO t1 SELECT * FROM t1;

	200 INSERT INTO t1 SELECT * FROM t1;

	201 INSERT INTO t1 SELECT * FROM t1;

	202 INSERT INTO t1 SELECT * FROM t1;

	203 INSERT INTO t1 SELECT * FROM t1;

	204 INSERT INTO t1 SELECT * FROM t1;

	205 INSERT INTO t1 SELECT * FROM t1;

	206 INSERT INTO t1 SELECT * FROM t1;

	207 INSERT INTO t1 SELECT * FROM t1;

	208 INSERT INTO t1 SELECT * FROM t1;

	209 INSERT INTO t1 SELECT * FROM t1;

	210 INSERT INTO t1 SELECT * FROM t1;

	211 INSERT INTO t1 SELECT * FROM t1;

	212 INSERT INTO t1 SELECT * FROM t1;

	213 INSERT INTO t1 SELECT * FROM t1;

	214 INSERT INTO t1 SELECT * FROM t1;

	215 INSERT INTO t1 VALUES('a b c', NULL);

	216 INSERT INTO t1 VALUES('a x c', NULL);

	217 COMMIT;

	218 }

	219

	220 do_execsql_test 3.4 {

	221 SELECT * FROM t1 WHERE t1 MATCH 'a b';

	222 } {{a b c} {}}

	223

	224 #-------------------------------------------------------------------------

	225 #

	226 reset_db

	227

	228 do_test 4.1 {

	229 set a "abc\uFFFEdef"

	230 set b "abc\uD800def"

	231 set c "\uFFFEdef"

	232 set d "\uD800def"

	233 execsql {

	234 CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x);

	235 INSERT INTO t1 VALUES($a);

	236 INSERT INTO t1 VALUES($b);

	237 INSERT INTO t1 VALUES($c);

	238 INSERT INTO t1 VALUES($d);

	239 }

	240

	241 execsql "CREATE VIRTUAL TABLE t8 USING fts5(

	242 a, b, tokenize=\"unicode61 separators '\uFFFE\uD800\u00BF'\"

	243 )"

	244 } {}

	245

	246 do_test 4.2 {

	247 set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}]

	248 set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}]

	249 set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]

	250 set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]

	251 execsql {

	252 INSERT INTO t1 VALUES($a);

	253 INSERT INTO t1 VALUES($b);

	254 INSERT INTO t1 VALUES($c);

	255 INSERT INTO t1 VALUES($d);

	256 }

	257 } {}

	258

	259 do_test 4.3 {

	260 set a [binary format c* {0xF7 0xBF 0xBF 0xBF}]

	261 set b [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF}]

	262 set c [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF}]

	263 set d [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF}]

	264 execsql {

	265 INSERT INTO t1 VALUES($a);

	266 INSERT INTO t1 VALUES($b);

	267 INSERT INTO t1 VALUES($c);

	268 INSERT INTO t1 VALUES($d);

	269 }

	270 } {}

	271

	272 do_test 4.4 {

	273 sqlite3_exec_hex db {

	274 CREATE VIRTUAL TABLE t9 USING fts5(a, b,

	275 tokenize="unicode61 separators '%C09004'"

	276 );

	277 INSERT INTO t9(a) VALUES('abc%88def %89ghi%90');

	278 }

	279 } {0 {}}

	280

	281

	282 #-------------------------------------------------------------------------

	283

	284 breakpoint

	285 do_unicode_token_test3 5.1 {tokenchars {}} {

	286 sqlite3_reset sqlite3_column_int

	287 } {

	288 sqlite3 sqlite3

	289 reset reset

	290 sqlite3 sqlite3

	291 column column

	292 int int

	293 }

	294

	295 do_unicode_token_test3 5.2 {tokenchars _} {

	296 sqlite3_reset sqlite3_column_int

	297 } {

	298 sqlite3_reset sqlite3_reset

	299 sqlite3_column_int sqlite3_column_int

	300 }

	301

	302 do_unicode_token_test3 5.3 {separators xyz} {

	303 Laotianxhorseyrunszfast

	304 } {

	305 laotian Laotian

	306 horse horse

	307 runs runs

	308 fast fast

	309 }

	310

	311 do_unicode_token_test3 5.4 {tokenchars xyz} {

	312 Laotianxhorseyrunszfast

	313 } {

	314 laotianxhorseyrunszfast Laotianxhorseyrunszfast

	315 }

	316

	317 do_unicode_token_test3 5.5 {tokenchars _} {separators zyx} {

	318 sqlite3_resetxsqlite3_column_intyhonda_phantom

	319 } {

	320 sqlite3_reset sqlite3_reset

	321 sqlite3_column_int sqlite3_column_int

	322 honda_phantom honda_phantom

	323 }

	324

	325 do_unicode_token_test3 5.6 "separators \u05D1" "abc\u05D1def" {

	326 abc abc def def

	327 }

	328

	329 do_unicode_token_test3 5.7 \

	330 "tokenchars \u2444\u2445" \

	331 "separators \u05D0\u05D1\u05D2" \

	332 "\u2444fre\u2445sh\u05D0water\u05D2fish.\u2445timer" \

	333 [list \

	334 \u2444fre\u2445sh \u2444fre\u2445sh \

	335 water water \

	336 fish fish \

	337 \u2445timer \u2445timer \

	338 ]

	339

	340 # Check that it is not possible to add a standalone diacritic codepoint

	341 # to either separators or tokenchars.

	342 do_unicode_token_test3 5.8 "separators \u0301" \

	343 "hello\u0301world \u0301helloworld" \

	344 "helloworld hello\u0301world helloworld helloworld"

	345

	346 do_unicode_token_test3 5.9 "tokenchars \u0301" \

	347 "hello\u0301world \u0301helloworld" \

	348 "helloworld hello\u0301world helloworld helloworld"

	349

	350 do_unicode_token_test3 5.10 "separators \u0301" \

	351 "remove_diacritics 0" \

	352 "hello\u0301world \u0301helloworld" \

	353 "hello\u0301world hello\u0301world helloworld helloworld"

	354

	355 do_unicode_token_test3 5.11 "tokenchars \u0301" \

	356 "remove_diacritics 0" \

	357 "hello\u0301world \u0301helloworld" \

	358 "hello\u0301world hello\u0301world helloworld helloworld"

	359

	360 #-------------------------------------------------------------------------

	361

	362 proc do_tokenize {tokenizer txt} {

	363 set res [list]

	364 foreach {b c} [sqlite3_fts5_tokenize -subst db $tokenizer $txt] {

	365 lappend res $b

	366 }

	367 set res

	368 }

	369

	370 # Argument $lCodepoint must be a list of codepoints (integers) that

	371 # correspond to whitespace characters. This command creates a string

	372 # $W from the codepoints, then tokenizes "${W}hello{$W}world${W}"

	373 # using tokenizer $tokenizer. The test passes if the tokenizer successfully

	374 # extracts the two 5 character tokens.

	375 #

	376 proc do_isspace_test {tn tokenizer lCp} {

	377 set whitespace [format [string repeat %c [llength $lCp]] {*}$lCp]

	378 set txt "${whitespace}hello${whitespace}world${whitespace}"

	379 uplevel [list do_test $tn [list do_tokenize $tokenizer $txt] {hello world}]

	380 }

	381

	382 set tokenizers [list unicode61]

	383 #ifcapable icu { lappend tokenizers icu }

	384

	385 # Some tests to check that the tokenizers can both identify white-space

	386 # codepoints. All codepoints tested below are of type "Zs" in the

	387 # UnicodeData.txt file.

	388 foreach T $tokenizers {

	389 do_isspace_test 6.$T.1 $T 32

	390 do_isspace_test 6.$T.2 $T 160

	391 do_isspace_test 6.$T.3 $T 5760

	392 do_isspace_test 6.$T.4 $T 6158

	393 do_isspace_test 6.$T.5 $T 8192

	394 do_isspace_test 6.$T.6 $T 8193

	395 do_isspace_test 6.$T.7 $T 8194

	396 do_isspace_test 6.$T.8 $T 8195

	397 do_isspace_test 6.$T.9 $T 8196

	398 do_isspace_test 6.$T.10 $T 8197

	399 do_isspace_test 6.$T.11 $T 8198

	400 do_isspace_test 6.$T.12 $T 8199

	401 do_isspace_test 6.$T.13 $T 8200

	402 do_isspace_test 6.$T.14 $T 8201

	403 do_isspace_test 6.$T.15 $T 8202

	404 do_isspace_test 6.$T.16 $T 8239

	405 do_isspace_test 6.$T.17 $T 8287

	406 do_isspace_test 6.$T.18 $T 12288

	407

	408 do_isspace_test 6.$T.19 $T {32 160 5760 6158}

	409 do_isspace_test 6.$T.20 $T {8192 8193 8194 8195}

	410 do_isspace_test 6.$T.21 $T {8196 8197 8198 8199}

	411 do_isspace_test 6.$T.22 $T {8200 8201 8202 8239}

	412 do_isspace_test 6.$T.23 $T {8287 12288}

	413 }

	414

	415

	416 #-------------------------------------------------------------------------

	417 # Test that the private use ranges are treated as alphanumeric.

	418 #

	419 foreach {tn1 c} {

	420 1 \ue000 2 \ue001 3 \uf000 4 \uf8fe 5 \uf8ff

	421 } {

	422 foreach {tn2 config res} {

	423 1 "" "helloworld helloworld"

	424 2 "separators *" "hello hello world world"

	425 } {

	426 set config [string map [list * $c] $config]

	427 set input [string map [list * $c] "hello*world"]

	428 set output [string map [list * $c] $res]

	429 do_unicode_token_test3 7.$tn1.$tn2 {*}$config $input $output

	430 }

	431 }

	432

	433 #-------------------------------------------------------------------------

	434 # Cursory test of remove_diacritics=0.

	435 #

	436 # 00C4;LATIN CAPITAL LETTER A WITH DIAERESIS

	437 # 00D6;LATIN CAPITAL LETTER O WITH DIAERESIS

	438 # 00E4;LATIN SMALL LETTER A WITH DIAERESIS

	439 # 00F6;LATIN SMALL LETTER O WITH DIAERESIS

	440 #

	441 do_execsql_test 8.1.1 "

	442 CREATE VIRTUAL TABLE t3 USING fts5(

	443 content, tokenize='unicode61 remove_diacritics 1'

	444 );

	445 INSERT INTO t3 VALUES('o');

	446 INSERT INTO t3 VALUES('a');

	447 INSERT INTO t3 VALUES('O');

	448 INSERT INTO t3 VALUES('A');

	449 INSERT INTO t3 VALUES('\xD6');

	450 INSERT INTO t3 VALUES('\xC4');

	451 INSERT INTO t3 VALUES('\xF6');

	452 INSERT INTO t3 VALUES('\xE4');

	453 "

	454 do_execsql_test 8.1.2 {

	455 SELECT rowid FROM t3 WHERE t3 MATCH 'o' ORDER BY rowid ASC;

	456 } {1 3 5 7}

	457 do_execsql_test 8.1.3 {

	458 SELECT rowid FROM t3 WHERE t3 MATCH 'a' ORDER BY rowid ASC;

	459 } {2 4 6 8}

	460 do_execsql_test 8.2.1 {

	461 CREATE VIRTUAL TABLE t4 USING fts5(

	462 content, tokenize='unicode61 remove_diacritics 0'

	463 );

	464 INSERT INTO t4 SELECT * FROM t3 ORDER BY rowid ASC;

	465 }

	466 do_execsql_test 8.2.2 {

	467 SELECT rowid FROM t4 WHERE t4 MATCH 'o' ORDER BY rowid ASC;

	468 } {1 3}

	469 do_execsql_test 8.2.3 {

	470 SELECT rowid FROM t4 WHERE t4 MATCH 'a' ORDER BY rowid ASC;

	471 } {2 4}

	472

	473 #-------------------------------------------------------------------------

	474 #

	475 if 0 {

	476 foreach {tn sql} {

	477 1 {

	478 CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 [tokenchars= .]);

	479 CREATE VIRTUAL TABLE t6 USING fts4(

	480 tokenize=unicode61 [tokenchars=="] "tokenchars=[]");

	481 CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 [separators=x\xC4]);

	482 }

	483 2 {

	484 CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 "tokenchars= .");

	485 CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 "tokenchars=[=""]");

	486 CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 "separators=x\xC4");

	487 }

	488 3 {

	489 CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 'tokenchars= .');

	490 CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 'tokenchars=="[]');

	491 CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 'separators=x\xC4');

	492 }

	493 4 {

	494 CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 `tokenchars= .`);

	495 CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 `tokenchars=[="]`);

	496 CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 `separators=x\xC4`);

	497 }

	498 } {

	499 do_execsql_test 9.$tn.0 {

	500 DROP TABLE IF EXISTS t5;

	501 DROP TABLE IF EXISTS t5aux;

	502 DROP TABLE IF EXISTS t6;

	503 DROP TABLE IF EXISTS t6aux;

	504 DROP TABLE IF EXISTS t7;

	505 DROP TABLE IF EXISTS t7aux;

	506 }

	507 do_execsql_test 9.$tn.1 $sql

	508

	509 do_execsql_test 9.$tn.2 {

	510 CREATE VIRTUAL TABLE t5aux USING fts4aux(t5);

	511 INSERT INTO t5 VALUES('one two three/four.five.six');

	512 SELECT * FROM t5aux;

	513 } {

	514 four.five.six * 1 1 four.five.six 0 1 1

	515 {one two three} * 1 1 {one two three} 0 1 1

	516 }

	517

	518 do_execsql_test 9.$tn.3 {

	519 CREATE VIRTUAL TABLE t6aux USING fts4aux(t6);

	520 INSERT INTO t6 VALUES('alpha=beta"gamma/delta[epsilon]zeta');

	521 SELECT * FROM t6aux;

	522 } {

	523 {alpha=beta"gamma} * 1 1 {alpha=beta"gamma} 0 1 1

	524 {delta[epsilon]zeta} * 1 1 {delta[epsilon]zeta} 0 1 1

	525 }

	526

	527 do_execsql_test 9.$tn.4 {

	528 CREATE VIRTUAL TABLE t7aux USING fts4aux(t7);

	529 INSERT INTO t7 VALUES('alephxbeth\xC4gimel');

	530 SELECT * FROM t7aux;

	531 } {

	532 aleph * 1 1 aleph 0 1 1

	533 beth * 1 1 beth 0 1 1

	534 gimel * 1 1 gimel 0 1 1

	535 }

	536 }

	537

	538 # Check that multiple options are handled correctly.

	539 #

	540 do_execsql_test 10.1 {

	541 DROP TABLE IF EXISTS t1;

	542 CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61

	543 "tokenchars=xyz" "tokenchars=.=" "separators=.=" "separators=xy"

	544 "separators=a" "separators=a" "tokenchars=a" "tokenchars=a"

	545 );

	546

	547 INSERT INTO t1 VALUES('oneatwoxthreeyfour');

	548 INSERT INTO t1 VALUES('a.single=word');

	549 CREATE VIRTUAL TABLE t1aux USING fts4aux(t1);

	550 SELECT * FROM t1aux;

	551 } {

	552 .single=word * 1 1 .single=word 0 1 1

	553 four * 1 1 four 0 1 1

	554 one * 1 1 one 0 1 1

	555 three * 1 1 three 0 1 1

	556 two * 1 1 two 0 1 1

	557 }

	558

	559 # Test that case folding happens after tokenization, not before.

	560 #

	561 do_execsql_test 10.2 {

	562 DROP TABLE IF EXISTS t2;

	563 CREATE VIRTUAL TABLE t2 USING fts4(tokenize=unicode61 "separators=aB");

	564 INSERT INTO t2 VALUES('oneatwoBthree');

	565 INSERT INTO t2 VALUES('onebtwoAthree');

	566 CREATE VIRTUAL TABLE t2aux USING fts4aux(t2);

	567 SELECT * FROM t2aux;

	568 } {

	569 one * 1 1 one 0 1 1

	570 onebtwoathree * 1 1 onebtwoathree 0 1 1

	571 three * 1 1 three 0 1 1

	572 two * 1 1 two 0 1 1

	573 }

	574

	575 # Test that the tokenchars and separators options work with the

	576 # fts3tokenize table.

	577 #

	578 do_execsql_test 11.1 {

	579 CREATE VIRTUAL TABLE ft1 USING fts3tokenize(

	580 "unicode61", "tokenchars=@.", "separators=1234567890"

	581 );

	582 SELECT token FROM ft1 WHERE input = 'berlin@street123sydney.road';

	583 } {

	584 berlin@street sydney.road

	585 }

	586

	587 }

	588

	589 finish_test

OLD	NEW