OLD | NEW |
1 # 2012 May 25 | 1 # 2012 May 25 |
2 # | 2 # |
3 # The author disclaims copyright to this source code. In place of | 3 # The author disclaims copyright to this source code. In place of |
4 # a legal notice, here is a blessing: | 4 # a legal notice, here is a blessing: |
5 # | 5 # |
6 # May you do good and not evil. | 6 # May you do good and not evil. |
7 # May you find forgiveness for yourself and forgive others. | 7 # May you find forgiveness for yourself and forgive others. |
8 # May you share freely, never taking more than you give. | 8 # May you share freely, never taking more than you give. |
9 # | 9 # |
10 #************************************************************************* | 10 #************************************************************************* |
(...skipping 344 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
355 set txt "${whitespace}hello${whitespace}world${whitespace}" | 355 set txt "${whitespace}hello${whitespace}world${whitespace}" |
356 uplevel [list do_test $tn [list do_tokenize $tokenizer $txt] {hello world}] | 356 uplevel [list do_test $tn [list do_tokenize $tokenizer $txt] {hello world}] |
357 } | 357 } |
358 | 358 |
359 set tokenizers [list unicode61] | 359 set tokenizers [list unicode61] |
360 ifcapable icu { lappend tokenizers icu } | 360 ifcapable icu { lappend tokenizers icu } |
361 | 361 |
362 # Some tests to check that the tokenizers can both identify white-space | 362 # Some tests to check that the tokenizers can both identify white-space |
363 # codepoints. All codepoints tested below are of type "Zs" in the | 363 # codepoints. All codepoints tested below are of type "Zs" in the |
364 # UnicodeData.txt file. | 364 # UnicodeData.txt file. |
| 365 # |
| 366 # Note that codepoint 6158 has changed from Zs to Cf in recent versions |
| 367 # of UnicodeData.txt. So take that into account for the "icu" tests. |
| 368 # |
365 foreach T $tokenizers { | 369 foreach T $tokenizers { |
366 do_isspace_test 6.$T.1 $T 32 | 370 do_isspace_test 6.$T.1 $T 32 |
367 do_isspace_test 6.$T.2 $T 160 | 371 do_isspace_test 6.$T.2 $T 160 |
368 do_isspace_test 6.$T.3 $T 5760 | 372 do_isspace_test 6.$T.3 $T 5760 |
369 do_isspace_test 6.$T.4 $T 6158 | 373 if {$T!="icu"} { |
| 374 do_isspace_test 6.$T.4 $T 6158 |
| 375 } |
370 do_isspace_test 6.$T.5 $T 8192 | 376 do_isspace_test 6.$T.5 $T 8192 |
371 do_isspace_test 6.$T.6 $T 8193 | 377 do_isspace_test 6.$T.6 $T 8193 |
372 do_isspace_test 6.$T.7 $T 8194 | 378 do_isspace_test 6.$T.7 $T 8194 |
373 do_isspace_test 6.$T.8 $T 8195 | 379 do_isspace_test 6.$T.8 $T 8195 |
374 do_isspace_test 6.$T.9 $T 8196 | 380 do_isspace_test 6.$T.9 $T 8196 |
375 do_isspace_test 6.$T.10 $T 8197 | 381 do_isspace_test 6.$T.10 $T 8197 |
376 do_isspace_test 6.$T.11 $T 8198 | 382 do_isspace_test 6.$T.11 $T 8198 |
377 do_isspace_test 6.$T.12 $T 8199 | 383 do_isspace_test 6.$T.12 $T 8199 |
378 do_isspace_test 6.$T.13 $T 8200 | 384 do_isspace_test 6.$T.13 $T 8200 |
379 do_isspace_test 6.$T.14 $T 8201 | 385 do_isspace_test 6.$T.14 $T 8201 |
380 do_isspace_test 6.$T.15 $T 8202 | 386 do_isspace_test 6.$T.15 $T 8202 |
381 do_isspace_test 6.$T.16 $T 8239 | 387 do_isspace_test 6.$T.16 $T 8239 |
382 do_isspace_test 6.$T.17 $T 8287 | 388 do_isspace_test 6.$T.17 $T 8287 |
383 do_isspace_test 6.$T.18 $T 12288 | 389 do_isspace_test 6.$T.18 $T 12288 |
384 | 390 |
385 do_isspace_test 6.$T.19 $T {32 160 5760 6158} | 391 if {$T!="icu"} { |
| 392 do_isspace_test 6.$T.19 $T {32 160 5760 6158} |
| 393 } else { |
| 394 do_isspace_test 6.$T.19 $T {32 160 5760 8192} |
| 395 } |
386 do_isspace_test 6.$T.20 $T {8192 8193 8194 8195} | 396 do_isspace_test 6.$T.20 $T {8192 8193 8194 8195} |
387 do_isspace_test 6.$T.21 $T {8196 8197 8198 8199} | 397 do_isspace_test 6.$T.21 $T {8196 8197 8198 8199} |
388 do_isspace_test 6.$T.22 $T {8200 8201 8202 8239} | 398 do_isspace_test 6.$T.22 $T {8200 8201 8202 8239} |
389 do_isspace_test 6.$T.23 $T {8287 12288} | 399 do_isspace_test 6.$T.23 $T {8287 12288} |
390 } | 400 } |
391 | 401 |
392 #------------------------------------------------------------------------- | 402 #------------------------------------------------------------------------- |
393 # Test that the private use ranges are treated as alphanumeric. | 403 # Test that the private use ranges are treated as alphanumeric. |
394 # | 404 # |
395 foreach {tn1 c} { | 405 foreach {tn1 c} { |
(...skipping 153 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
549 do_execsql_test 11.1 { | 559 do_execsql_test 11.1 { |
550 CREATE VIRTUAL TABLE ft1 USING fts3tokenize( | 560 CREATE VIRTUAL TABLE ft1 USING fts3tokenize( |
551 "unicode61", "tokenchars=@.", "separators=1234567890" | 561 "unicode61", "tokenchars=@.", "separators=1234567890" |
552 ); | 562 ); |
553 SELECT token FROM ft1 WHERE input = 'berlin@street123sydney.road'; | 563 SELECT token FROM ft1 WHERE input = 'berlin@street123sydney.road'; |
554 } { | 564 } { |
555 berlin@street sydney.road | 565 berlin@street sydney.road |
556 } | 566 } |
557 | 567 |
558 finish_test | 568 finish_test |
OLD | NEW |