| OLD | NEW |
| 1 # 2012 May 25 | 1 # 2012 May 25 |
| 2 # | 2 # |
| 3 # The author disclaims copyright to this source code. In place of | 3 # The author disclaims copyright to this source code. In place of |
| 4 # a legal notice, here is a blessing: | 4 # a legal notice, here is a blessing: |
| 5 # | 5 # |
| 6 # May you do good and not evil. | 6 # May you do good and not evil. |
| 7 # May you find forgiveness for yourself and forgive others. | 7 # May you find forgiveness for yourself and forgive others. |
| 8 # May you share freely, never taking more than you give. | 8 # May you share freely, never taking more than you give. |
| 9 # | 9 # |
| 10 #************************************************************************* | 10 #************************************************************************* |
| (...skipping 344 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 355 set txt "${whitespace}hello${whitespace}world${whitespace}" | 355 set txt "${whitespace}hello${whitespace}world${whitespace}" |
| 356 uplevel [list do_test $tn [list do_tokenize $tokenizer $txt] {hello world}] | 356 uplevel [list do_test $tn [list do_tokenize $tokenizer $txt] {hello world}] |
| 357 } | 357 } |
| 358 | 358 |
| 359 set tokenizers [list unicode61] | 359 set tokenizers [list unicode61] |
| 360 ifcapable icu { lappend tokenizers icu } | 360 ifcapable icu { lappend tokenizers icu } |
| 361 | 361 |
| 362 # Some tests to check that the tokenizers can both identify white-space | 362 # Some tests to check that the tokenizers can both identify white-space |
| 363 # codepoints. All codepoints tested below are of type "Zs" in the | 363 # codepoints. All codepoints tested below are of type "Zs" in the |
| 364 # UnicodeData.txt file. | 364 # UnicodeData.txt file. |
| 365 # |
| 366 # Note that codepoint 6158 has changed from Zs to Cf in recent versions |
| 367 # of UnicodeData.txt. So take that into account for the "icu" tests. |
| 368 # |
| 365 foreach T $tokenizers { | 369 foreach T $tokenizers { |
| 366 do_isspace_test 6.$T.1 $T 32 | 370 do_isspace_test 6.$T.1 $T 32 |
| 367 do_isspace_test 6.$T.2 $T 160 | 371 do_isspace_test 6.$T.2 $T 160 |
| 368 do_isspace_test 6.$T.3 $T 5760 | 372 do_isspace_test 6.$T.3 $T 5760 |
| 369 do_isspace_test 6.$T.4 $T 6158 | 373 if {$T!="icu"} { |
| 374 do_isspace_test 6.$T.4 $T 6158 |
| 375 } |
| 370 do_isspace_test 6.$T.5 $T 8192 | 376 do_isspace_test 6.$T.5 $T 8192 |
| 371 do_isspace_test 6.$T.6 $T 8193 | 377 do_isspace_test 6.$T.6 $T 8193 |
| 372 do_isspace_test 6.$T.7 $T 8194 | 378 do_isspace_test 6.$T.7 $T 8194 |
| 373 do_isspace_test 6.$T.8 $T 8195 | 379 do_isspace_test 6.$T.8 $T 8195 |
| 374 do_isspace_test 6.$T.9 $T 8196 | 380 do_isspace_test 6.$T.9 $T 8196 |
| 375 do_isspace_test 6.$T.10 $T 8197 | 381 do_isspace_test 6.$T.10 $T 8197 |
| 376 do_isspace_test 6.$T.11 $T 8198 | 382 do_isspace_test 6.$T.11 $T 8198 |
| 377 do_isspace_test 6.$T.12 $T 8199 | 383 do_isspace_test 6.$T.12 $T 8199 |
| 378 do_isspace_test 6.$T.13 $T 8200 | 384 do_isspace_test 6.$T.13 $T 8200 |
| 379 do_isspace_test 6.$T.14 $T 8201 | 385 do_isspace_test 6.$T.14 $T 8201 |
| 380 do_isspace_test 6.$T.15 $T 8202 | 386 do_isspace_test 6.$T.15 $T 8202 |
| 381 do_isspace_test 6.$T.16 $T 8239 | 387 do_isspace_test 6.$T.16 $T 8239 |
| 382 do_isspace_test 6.$T.17 $T 8287 | 388 do_isspace_test 6.$T.17 $T 8287 |
| 383 do_isspace_test 6.$T.18 $T 12288 | 389 do_isspace_test 6.$T.18 $T 12288 |
| 384 | 390 |
| 385 do_isspace_test 6.$T.19 $T {32 160 5760 6158} | 391 if {$T!="icu"} { |
| 392 do_isspace_test 6.$T.19 $T {32 160 5760 6158} |
| 393 } else { |
| 394 do_isspace_test 6.$T.19 $T {32 160 5760 8192} |
| 395 } |
| 386 do_isspace_test 6.$T.20 $T {8192 8193 8194 8195} | 396 do_isspace_test 6.$T.20 $T {8192 8193 8194 8195} |
| 387 do_isspace_test 6.$T.21 $T {8196 8197 8198 8199} | 397 do_isspace_test 6.$T.21 $T {8196 8197 8198 8199} |
| 388 do_isspace_test 6.$T.22 $T {8200 8201 8202 8239} | 398 do_isspace_test 6.$T.22 $T {8200 8201 8202 8239} |
| 389 do_isspace_test 6.$T.23 $T {8287 12288} | 399 do_isspace_test 6.$T.23 $T {8287 12288} |
| 390 } | 400 } |
| 391 | 401 |
| 392 #------------------------------------------------------------------------- | 402 #------------------------------------------------------------------------- |
| 393 # Test that the private use ranges are treated as alphanumeric. | 403 # Test that the private use ranges are treated as alphanumeric. |
| 394 # | 404 # |
| 395 foreach {tn1 c} { | 405 foreach {tn1 c} { |
| (...skipping 153 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 549 do_execsql_test 11.1 { | 559 do_execsql_test 11.1 { |
| 550 CREATE VIRTUAL TABLE ft1 USING fts3tokenize( | 560 CREATE VIRTUAL TABLE ft1 USING fts3tokenize( |
| 551 "unicode61", "tokenchars=@.", "separators=1234567890" | 561 "unicode61", "tokenchars=@.", "separators=1234567890" |
| 552 ); | 562 ); |
| 553 SELECT token FROM ft1 WHERE input = 'berlin@street123sydney.road'; | 563 SELECT token FROM ft1 WHERE input = 'berlin@street123sydney.road'; |
| 554 } { | 564 } { |
| 555 berlin@street sydney.road | 565 berlin@street sydney.road |
| 556 } | 566 } |
| 557 | 567 |
| 558 finish_test | 568 finish_test |
| OLD | NEW |