third_party/sqlite/sqlite-src-3100200/test/fts4unicode.test - Issue 2846743003: [sql] Remove SQLite 3.10.2 reference directory.

Side by Side Diff: third_party/sqlite/sqlite-src-3100200/test/fts4unicode.test

Issue 2846743003: [sql] Remove SQLite 3.10.2 reference directory. (Closed)

Patch Set: Created 3 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 # 2012 May 25

2 #

3 # The author disclaims copyright to this source code. In place of

4 # a legal notice, here is a blessing:

5 #

6 # May you do good and not evil.

7 # May you find forgiveness for yourself and forgive others.

8 # May you share freely, never taking more than you give.

9 #

10 #*************************************************************************

11 #

12 # The tests in this file focus on testing the "unicode" FTS tokenizer.

13 #

14

15 set testdir [file dirname $argv0]

16 source $testdir/tester.tcl

17 ifcapable !fts3_unicode { finish_test ; return }

18 set ::testprefix fts4unicode

19

20 proc do_unicode_token_test {tn input res} {

21 set input [string map {' ''} $input]

22 uplevel [list do_execsql_test $tn "

23 SELECT fts3_tokenizer_test('unicode61', 'remove_diacritics=0', '$input');

24 " [list [list {*}$res]]]

25 }

26

27 proc do_unicode_token_test2 {tn input res} {

28 set input [string map {' ''} $input]

29 uplevel [list do_execsql_test $tn "

30 SELECT fts3_tokenizer_test('unicode61', '$input');

31 " [list [list {*}$res]]]

32 }

33

34 proc do_unicode_token_test3 {tn args} {

35 set res [lindex $args end]

36 set sql "SELECT fts3_tokenizer_test('unicode61'"

37 foreach a [lrange $args 0 end-1] {

38 append sql ", '"

39 append sql [string map {' ''} $a]

40 append sql "'"

41 }

42 append sql ")"

43 uplevel [list do_execsql_test $tn $sql [list [list {*}$res]]]

44 }

45

46 do_unicode_token_test 1.0 {a B c D} {0 a a 1 b B 2 c c 3 d D}

47

48 do_unicode_token_test 1.1 "\uC4 \uD6 \uDC" \

49 "0 \uE4 \uC4 1 \uF6 \uD6 2 \uFC \uDC"

50

51 do_unicode_token_test 1.2 "x\uC4x x\uD6x x\uDCx" \

52 "0 x\uE4x x\uC4x 1 x\uF6x x\uD6x 2 x\uFCx x\uDCx"

53

54 # 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s.

55 do_unicode_token_test 1.3 "\uDF" "0 \uDF \uDF"

56 do_unicode_token_test 1.4 "\u1E9E" "0 \uDF \u1E9E"

57

58 do_unicode_token_test 1.5 "The quick brown fox" {

59 0 the The 1 quick quick 2 brown brown 3 fox fox

60 }

61 do_unicode_token_test 1.6 "The\u00bfquick\u224ebrown\u2263fox" {

62 0 the The 1 quick quick 2 brown brown 3 fox fox

63 }

64

65 do_unicode_token_test2 1.7 {a B c D} {0 a a 1 b B 2 c c 3 d D}

66 do_unicode_token_test2 1.8 "\uC4 \uD6 \uDC" "0 a \uC4 1 o \uD6 2 u \uDC"

67

68 do_unicode_token_test2 1.9 "x\uC4x x\uD6x x\uDCx" \

69 "0 xax x\uC4x 1 xox x\uD6x 2 xux x\uDCx"

70

71 # Check that diacritics are removed if remove_diacritics=1 is specified.

72 # And that they do not break tokens.

73 do_unicode_token_test2 1.10 "xx\u0301xx" "0 xxxx xx\u301xx"

74

75 # Title-case mappings work

76 do_unicode_token_test 1.11 "\u01c5" "0 \u01c6 \u01c5"

77

78 #-------------------------------------------------------------------------

79 #

80 set docs [list {

81 Enhance the INSERT syntax to allow multiple rows to be inserted via the

82 VALUES clause.

83 } {

84 Enhance the CREATE VIRTUAL TABLE command to support the IF NOT EXISTS clause.

85 } {

86 Added the sqlite3_stricmp() interface as a counterpart to sqlite3_strnicmp().

87 } {

88 Added the sqlite3_db_readonly() interface.

89 } {

90 Added the SQLITE_FCNTL_PRAGMA file control, giving VFS implementations the

91 ability to add new PRAGMA statements or to override built-in PRAGMAs.

92 } {

93 Queries of the form: "SELECT max(x), y FROM table" returns the value of y on

94 the same row that contains the maximum x value.

95 } {

96 Added support for the FTS4 languageid option.

97 } {

98 Documented support for the FTS4 content option. This feature has actually

99 been in the code since version 3.7.9 but is only now considered to be

100 officially supported.

101 } {

102 Pending statements no longer block ROLLBACK. Instead, the pending statement

103 will return SQLITE_ABORT upon next access after the ROLLBACK.

104 } {

105 Improvements to the handling of CSV inputs in the command-line shell

106 } {

107 Fix a bug introduced in version 3.7.10 that might cause a LEFT JOIN to be

108 incorrectly converted into an INNER JOIN if the WHERE clause indexable terms

109 connected by OR.

110 }]

111

112 set map(a) [list "\u00C4" "\u00E4"] ; # LATIN LETTER A WITH DIAERESIS

113 set map(e) [list "\u00CB" "\u00EB"] ; # LATIN LETTER E WITH DIAERESIS

114 set map(i) [list "\u00CF" "\u00EF"] ; # LATIN LETTER I WITH DIAERESIS

115 set map(o) [list "\u00D6" "\u00F6"] ; # LATIN LETTER O WITH DIAERESIS

116 set map(u) [list "\u00DC" "\u00FC"] ; # LATIN LETTER U WITH DIAERESIS

117 set map(y) [list "\u0178" "\u00FF"] ; # LATIN LETTER Y WITH DIAERESIS

118 set map(h) [list "\u1E26" "\u1E27"] ; # LATIN LETTER H WITH DIAERESIS

119 set map(w) [list "\u1E84" "\u1E85"] ; # LATIN LETTER W WITH DIAERESIS

120 set map(x) [list "\u1E8C" "\u1E8D"] ; # LATIN LETTER X WITH DIAERESIS

121 foreach k [array names map] {

122 lappend mappings [string toupper $k] [lindex $map($k) 0]

123 lappend mappings $k [lindex $map($k) 1]

124 }

125 proc mapdoc {doc} {

126 set doc [regsub -all {[[:space:]]+} $doc " "]

127 string map $::mappings [string trim $doc]

128 }

129

130 do_test 2.0 {

131 execsql { CREATE VIRTUAL TABLE t2 USING fts4(tokenize=unicode61, x); }

132 foreach doc $docs {

133 set d [mapdoc $doc]

134 execsql { INSERT INTO t2 VALUES($d) }

135 }

136 } {}

137

138 do_test 2.1 {

139 set q [mapdoc "row"]

140 execsql { SELECT * FROM t2 WHERE t2 MATCH $q }

141 } [list [mapdoc {

142 Queries of the form: "SELECT max(x), y FROM table" returns the value of y on

143 the same row that contains the maximum x value.

144 }]]

145

146 foreach {tn query snippet} {

147 2 "row" {

148 ...returns the value of y on the same [row] that contains

149 the maximum x value.

150 }

151 3 "ROW" {

152 ...returns the value of y on the same [row] that contains

153 the maximum x value.

154 }

155 4 "rollback" {

156 ...[ROLLBACK]. Instead, the pending statement

157 will return SQLITE_ABORT upon next access after the [ROLLBACK].

158 }

159 5 "rOllback" {

160 ...[ROLLBACK]. Instead, the pending statement

161 will return SQLITE_ABORT upon next access after the [ROLLBACK].

162 }

163 6 "lang*" {

164 Added support for the FTS4 [languageid] option.

165 }

166 } {

167 do_test 2.$tn {

168 set q [mapdoc $query]

169 execsql { SELECT snippet(t2, '[', ']', '...') FROM t2 WHERE t2 MATCH $q }

170 } [list [mapdoc $snippet]]

171 }

172

173 #-------------------------------------------------------------------------

174 # Make sure the unicode61 tokenizer does not crash if it is passed a

175 # NULL pointer.

176 reset_db

177 do_execsql_test 3.1 {

178 CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61, x, y);

179 INSERT INTO t1 VALUES(NULL, 'a b c');

180 }

181

182 do_execsql_test 3.2 {

183 SELECT snippet(t1, '[', ']') FROM t1 WHERE t1 MATCH 'b'

184 } {{a [b] c}}

185

186 do_execsql_test 3.3 {

187 BEGIN;

188 DELETE FROM t1;

189 INSERT INTO t1 VALUES('b b b b b b b b b b b', 'b b b b b b b b b b b b b');

190 INSERT INTO t1 SELECT * FROM t1;

191 INSERT INTO t1 SELECT * FROM t1;

192 INSERT INTO t1 SELECT * FROM t1;

193 INSERT INTO t1 SELECT * FROM t1;

194 INSERT INTO t1 SELECT * FROM t1;

195 INSERT INTO t1 SELECT * FROM t1;

196 INSERT INTO t1 SELECT * FROM t1;

197 INSERT INTO t1 SELECT * FROM t1;

198 INSERT INTO t1 SELECT * FROM t1;

199 INSERT INTO t1 SELECT * FROM t1;

200 INSERT INTO t1 SELECT * FROM t1;

201 INSERT INTO t1 SELECT * FROM t1;

202 INSERT INTO t1 SELECT * FROM t1;

203 INSERT INTO t1 SELECT * FROM t1;

204 INSERT INTO t1 SELECT * FROM t1;

205 INSERT INTO t1 SELECT * FROM t1;

206 INSERT INTO t1 VALUES('a b c', NULL);

207 INSERT INTO t1 VALUES('a x c', NULL);

208 COMMIT;

209 }

210

211 do_execsql_test 3.4 {

212 SELECT * FROM t1 WHERE t1 MATCH 'a b';

213 } {{a b c} {}}

214

215 #-------------------------------------------------------------------------

216 #

217 reset_db

218

219 do_test 4.1 {

220 set a "abc\uFFFEdef"

221 set b "abc\uD800def"

222 set c "\uFFFEdef"

223 set d "\uD800def"

224 execsql {

225 CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61, x);

226 INSERT INTO t1 VALUES($a);

227 INSERT INTO t1 VALUES($b);

228 INSERT INTO t1 VALUES($c);

229 INSERT INTO t1 VALUES($d);

230 }

231 } {}

232

233 do_test 4.2 {

234 set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}]

235 set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}]

236 set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]

237 set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]

238 execsql {

239 INSERT INTO t1 VALUES($a);

240 INSERT INTO t1 VALUES($b);

241 INSERT INTO t1 VALUES($c);

242 INSERT INTO t1 VALUES($d);

243 }

244 } {}

245

246 do_test 4.3 {

247 set a [binary format c* {0xF7 0xBF 0xBF 0xBF}]

248 set b [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF}]

249 set c [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF}]

250 set d [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF}]

251 execsql {

252 INSERT INTO t1 VALUES($a);

253 INSERT INTO t1 VALUES($b);

254 INSERT INTO t1 VALUES($c);

255 INSERT INTO t1 VALUES($d);

256 }

257 } {}

258

259 #-------------------------------------------------------------------------

260

261 do_unicode_token_test3 5.1 {tokenchars=} {

262 sqlite3_reset sqlite3_column_int

263 } {

264 0 sqlite3 sqlite3

265 1 reset reset

266 2 sqlite3 sqlite3

267 3 column column

268 4 int int

269 }

270

271 do_unicode_token_test3 5.2 {tokenchars=_} {

272 sqlite3_reset sqlite3_column_int

273 } {

274 0 sqlite3_reset sqlite3_reset

275 1 sqlite3_column_int sqlite3_column_int

276 }

277

278 do_unicode_token_test3 5.3 {separators=xyz} {

279 Laotianxhorseyrunszfast

280 } {

281 0 laotian Laotian

282 1 horse horse

283 2 runs runs

284 3 fast fast

285 }

286

287 do_unicode_token_test3 5.4 {tokenchars=xyz} {

288 Laotianxhorseyrunszfast

289 } {

290 0 laotianxhorseyrunszfast Laotianxhorseyrunszfast

291 }

292

293 do_unicode_token_test3 5.5 {tokenchars=_} {separators=zyx} {

294 sqlite3_resetxsqlite3_column_intyhonda_phantom

295 } {

296 0 sqlite3_reset sqlite3_reset

297 1 sqlite3_column_int sqlite3_column_int

298 2 honda_phantom honda_phantom

299 }

300

301 do_unicode_token_test3 5.6 "separators=\u05D1" "abc\u05D1def" {

302 0 abc abc 1 def def

303 }

304

305 do_unicode_token_test3 5.7 \

306 "tokenchars=\u2444\u2445" \

307 "separators=\u05D0\u05D1\u05D2" \

308 "\u2444fre\u2445sh\u05D0water\u05D2fish.\u2445timer" \

309 [list \

310 0 \u2444fre\u2445sh \u2444fre\u2445sh \

311 1 water water \

312 2 fish fish \

313 3 \u2445timer \u2445timer \

314 ]

315

316 # Check that it is not possible to add a standalone diacritic codepoint

317 # to either separators or tokenchars.

318 do_unicode_token_test3 5.8 "separators=\u0301" \

319 "hello\u0301world \u0301helloworld" \

320 "0 helloworld hello\u0301world 1 helloworld helloworld"

321

322 do_unicode_token_test3 5.9 "tokenchars=\u0301" \

323 "hello\u0301world \u0301helloworld" \

324 "0 helloworld hello\u0301world 1 helloworld helloworld"

325

326 do_unicode_token_test3 5.10 "separators=\u0301" \

327 "remove_diacritics=0" \

328 "hello\u0301world \u0301helloworld" \

329 "0 hello\u0301world hello\u0301world 1 helloworld helloworld"

330

331 do_unicode_token_test3 5.11 "tokenchars=\u0301" \

332 "remove_diacritics=0" \

333 "hello\u0301world \u0301helloworld" \

334 "0 hello\u0301world hello\u0301world 1 helloworld helloworld"

335

336

337 #-------------------------------------------------------------------------

338

339 proc do_tokenize {tokenizer txt} {

340 set res [list]

341 foreach {a b c} [db one {SELECT fts3_tokenizer_test($tokenizer, $txt)}] {

342 lappend res $b

343 }

344 set res

345 }

346

347 # Argument $lCodepoint must be a list of codepoints (integers) that

348 # correspond to whitespace characters. This command creates a string

349 # $W from the codepoints, then tokenizes "${W}hello{$W}world${W}"

350 # using tokenizer $tokenizer. The test passes if the tokenizer successfully

351 # extracts the two 5 character tokens.

352 #

353 proc do_isspace_test {tn tokenizer lCp} {

354 set whitespace [format [string repeat %c [llength $lCp]] {*}$lCp]

355 set txt "${whitespace}hello${whitespace}world${whitespace}"

356 uplevel [list do_test $tn [list do_tokenize $tokenizer $txt] {hello world}]

357 }

358

359 set tokenizers [list unicode61]

360 ifcapable icu { lappend tokenizers icu }

361

362 # Some tests to check that the tokenizers can both identify white-space

363 # codepoints. All codepoints tested below are of type "Zs" in the

364 # UnicodeData.txt file.

365 #

366 # Note that codepoint 6158 has changed from Zs to Cf in recent versions

367 # of UnicodeData.txt. So take that into account for the "icu" tests.

368 #

369 foreach T $tokenizers {

370 do_isspace_test 6.$T.1 $T 32

371 do_isspace_test 6.$T.2 $T 160

372 do_isspace_test 6.$T.3 $T 5760

373 if {$T!="icu"} {

374 do_isspace_test 6.$T.4 $T 6158

375 }

376 do_isspace_test 6.$T.5 $T 8192

377 do_isspace_test 6.$T.6 $T 8193

378 do_isspace_test 6.$T.7 $T 8194

379 do_isspace_test 6.$T.8 $T 8195

380 do_isspace_test 6.$T.9 $T 8196

381 do_isspace_test 6.$T.10 $T 8197

382 do_isspace_test 6.$T.11 $T 8198

383 do_isspace_test 6.$T.12 $T 8199

384 do_isspace_test 6.$T.13 $T 8200

385 do_isspace_test 6.$T.14 $T 8201

386 do_isspace_test 6.$T.15 $T 8202

387 do_isspace_test 6.$T.16 $T 8239

388 do_isspace_test 6.$T.17 $T 8287

389 do_isspace_test 6.$T.18 $T 12288

390

391 if {$T!="icu"} {

392 do_isspace_test 6.$T.19 $T {32 160 5760 6158}

393 } else {

394 do_isspace_test 6.$T.19 $T {32 160 5760 8192}

395 }

396 do_isspace_test 6.$T.20 $T {8192 8193 8194 8195}

397 do_isspace_test 6.$T.21 $T {8196 8197 8198 8199}

398 do_isspace_test 6.$T.22 $T {8200 8201 8202 8239}

399 do_isspace_test 6.$T.23 $T {8287 12288}

400 }

401

402 #-------------------------------------------------------------------------

403 # Test that the private use ranges are treated as alphanumeric.

404 #

405 foreach {tn1 c} {

406 1 \ue000 2 \ue001 3 \uf000 4 \uf8fe 5 \uf8ff

407 } {

408 foreach {tn2 config res} {

409 1 "" "0 helloworld helloworld"

410 2 "separators=*" "0 hello hello 1 world world"

411 } {

412 set config [string map [list * $c] $config]

413 set input [string map [list * $c] "hello*world"]

414 set output [string map [list * $c] $res]

415 do_unicode_token_test3 7.$tn1.$tn2 {*}$config $input $output

416 }

417 }

418

419 #-------------------------------------------------------------------------

420 # Cursory test of remove_diacritics=0.

421 #

422 # 00C4;LATIN CAPITAL LETTER A WITH DIAERESIS

423 # 00D6;LATIN CAPITAL LETTER O WITH DIAERESIS

424 # 00E4;LATIN SMALL LETTER A WITH DIAERESIS

425 # 00F6;LATIN SMALL LETTER O WITH DIAERESIS

426 #

427 do_execsql_test 8.1.1 "

428 CREATE VIRTUAL TABLE t3 USING fts4(tokenize=unicode61 'remove_diacritics=1');

429 INSERT INTO t3 VALUES('o');

430 INSERT INTO t3 VALUES('a');

431 INSERT INTO t3 VALUES('O');

432 INSERT INTO t3 VALUES('A');

433 INSERT INTO t3 VALUES('\xD6');

434 INSERT INTO t3 VALUES('\xC4');

435 INSERT INTO t3 VALUES('\xF6');

436 INSERT INTO t3 VALUES('\xE4');

437 "

438 do_execsql_test 8.1.2 {

439 SELECT rowid FROM t3 WHERE t3 MATCH 'o';

440 } {1 3 5 7}

441 do_execsql_test 8.1.3 {

442 SELECT rowid FROM t3 WHERE t3 MATCH 'a';

443 } {2 4 6 8}

444 do_execsql_test 8.2.1 {

445 CREATE VIRTUAL TABLE t4 USING fts4(tokenize=unicode61 "remove_diacritics=0");

446 INSERT INTO t4 SELECT * FROM t3;

447 }

448 do_execsql_test 8.2.2 {

449 SELECT rowid FROM t4 WHERE t4 MATCH 'o';

450 } {1 3}

451 do_execsql_test 8.2.3 {

452 SELECT rowid FROM t4 WHERE t4 MATCH 'a';

453 } {2 4}

454

455 #-------------------------------------------------------------------------

456 #

457 foreach {tn sql} {

458 1 {

459 CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 [tokenchars= .]);

460 CREATE VIRTUAL TABLE t6 USING fts4(

461 tokenize=unicode61 [tokenchars=="] "tokenchars=[]");

462 CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 [separators=x\xC4]);

463 }

464 2 {

465 CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 "tokenchars= .");

466 CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 "tokenchars=[=""]");

467 CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 "separators=x\xC4");

468 }

469 3 {

470 CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 'tokenchars= .');

471 CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 'tokenchars=="[]');

472 CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 'separators=x\xC4');

473 }

474 4 {

475 CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 `tokenchars= .`);

476 CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 `tokenchars=[="]`);

477 CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 `separators=x\xC4`);

478 }

479 } {

480 do_execsql_test 9.$tn.0 {

481 DROP TABLE IF EXISTS t5;

482 DROP TABLE IF EXISTS t5aux;

483 DROP TABLE IF EXISTS t6;

484 DROP TABLE IF EXISTS t6aux;

485 DROP TABLE IF EXISTS t7;

486 DROP TABLE IF EXISTS t7aux;

487 }

488 do_execsql_test 9.$tn.1 $sql

489

490 do_execsql_test 9.$tn.2 {

491 CREATE VIRTUAL TABLE t5aux USING fts4aux(t5);

492 INSERT INTO t5 VALUES('one two three/four.five.six');

493 SELECT * FROM t5aux;

494 } {

495 four.five.six * 1 1 four.five.six 0 1 1

496 {one two three} * 1 1 {one two three} 0 1 1

497 }

498

499 do_execsql_test 9.$tn.3 {

500 CREATE VIRTUAL TABLE t6aux USING fts4aux(t6);

501 INSERT INTO t6 VALUES('alpha=beta"gamma/delta[epsilon]zeta');

502 SELECT * FROM t6aux;

503 } {

504 {alpha=beta"gamma} * 1 1 {alpha=beta"gamma} 0 1 1

505 {delta[epsilon]zeta} * 1 1 {delta[epsilon]zeta} 0 1 1

506 }

507

508 do_execsql_test 9.$tn.4 {

509 CREATE VIRTUAL TABLE t7aux USING fts4aux(t7);

510 INSERT INTO t7 VALUES('alephxbeth\xC4gimel');

511 SELECT * FROM t7aux;

512 } {

513 aleph * 1 1 aleph 0 1 1

514 beth * 1 1 beth 0 1 1

515 gimel * 1 1 gimel 0 1 1

516 }

517 }

518

519 # Check that multiple options are handled correctly.

520 #

521 do_execsql_test 10.1 {

522 DROP TABLE IF EXISTS t1;

523 CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61

524 "tokenchars=xyz" "tokenchars=.=" "separators=.=" "separators=xy"

525 "separators=a" "separators=a" "tokenchars=a" "tokenchars=a"

526 );

527

528 INSERT INTO t1 VALUES('oneatwoxthreeyfour');

529 INSERT INTO t1 VALUES('a.single=word');

530 CREATE VIRTUAL TABLE t1aux USING fts4aux(t1);

531 SELECT * FROM t1aux;

532 } {

533 .single=word * 1 1 .single=word 0 1 1

534 four * 1 1 four 0 1 1

535 one * 1 1 one 0 1 1

536 three * 1 1 three 0 1 1

537 two * 1 1 two 0 1 1

538 }

539

540 # Test that case folding happens after tokenization, not before.

541 #

542 do_execsql_test 10.2 {

543 DROP TABLE IF EXISTS t2;

544 CREATE VIRTUAL TABLE t2 USING fts4(tokenize=unicode61 "separators=aB");

545 INSERT INTO t2 VALUES('oneatwoBthree');

546 INSERT INTO t2 VALUES('onebtwoAthree');

547 CREATE VIRTUAL TABLE t2aux USING fts4aux(t2);

548 SELECT * FROM t2aux;

549 } {

550 one * 1 1 one 0 1 1

551 onebtwoathree * 1 1 onebtwoathree 0 1 1

552 three * 1 1 three 0 1 1

553 two * 1 1 two 0 1 1

554 }

555

556 # Test that the tokenchars and separators options work with the

557 # fts3tokenize table.

558 #

559 do_execsql_test 11.1 {

560 CREATE VIRTUAL TABLE ft1 USING fts3tokenize(

561 "unicode61", "tokenchars=@.", "separators=1234567890"

562 );

563 SELECT token FROM ft1 WHERE input = 'berlin@street123sydney.road';

564 } {

565 berlin@street sydney.road

566 }

567

568 finish_test

OLD	NEW