OLD | NEW |
(Empty) | |
| 1 # 2014 Dec 20 |
| 2 # |
| 3 # The author disclaims copyright to this source code. In place of |
| 4 # a legal notice, here is a blessing: |
| 5 # |
| 6 # May you do good and not evil. |
| 7 # May you find forgiveness for yourself and forgive others. |
| 8 # May you share freely, never taking more than you give. |
| 9 # |
| 10 #*********************************************************************** |
| 11 # |
| 12 # Tests focusing on the fts5 tokenizers |
| 13 # |
| 14 |
| 15 source [file join [file dirname [info script]] fts5_common.tcl] |
| 16 set testprefix fts5unicode |
| 17 |
| 18 # If SQLITE_ENABLE_FTS5 is defined, omit this file. |
| 19 ifcapable !fts5 { |
| 20 finish_test |
| 21 return |
| 22 } |
| 23 |
| 24 proc tokenize_test {tn tokenizer input output} { |
| 25 uplevel [list do_test $tn [subst -nocommands { |
| 26 set ret {} |
| 27 foreach {z s e} [sqlite3_fts5_tokenize db {$tokenizer} {$input}] { |
| 28 lappend ret [set z] |
| 29 } |
| 30 set ret |
| 31 }] [list {*}$output]] |
| 32 } |
| 33 |
| 34 foreach {tn t} {1 ascii 2 unicode61} { |
| 35 tokenize_test 1.$tn.0 $t {A B C D} {a b c d} |
| 36 tokenize_test 1.$tn.1 $t {May you share freely,} {may you share freely} |
| 37 tokenize_test 1.$tn.2 $t {..May...you.shAre.freely} {may you share freely} |
| 38 tokenize_test 1.$tn.3 $t {} {} |
| 39 } |
| 40 |
| 41 #------------------------------------------------------------------------- |
| 42 # Check that "unicode61" really is the default tokenizer. |
| 43 # |
| 44 |
| 45 do_execsql_test 2.0 " |
| 46 CREATE VIRTUAL TABLE t1 USING fts5(x); |
| 47 CREATE VIRTUAL TABLE t2 USING fts5(x, tokenize = unicode61); |
| 48 CREATE VIRTUAL TABLE t3 USING fts5(x, tokenize = ascii); |
| 49 INSERT INTO t1 VALUES('\xC0\xC8\xCC'); |
| 50 INSERT INTO t2 VALUES('\xC0\xC8\xCC'); |
| 51 INSERT INTO t3 VALUES('\xC0\xC8\xCC'); |
| 52 " |
| 53 breakpoint |
| 54 do_execsql_test 2.1 " |
| 55 SELECT 't1' FROM t1 WHERE t1 MATCH '\xE0\xE8\xEC'; |
| 56 SELECT 't2' FROM t2 WHERE t2 MATCH '\xE0\xE8\xEC'; |
| 57 SELECT 't3' FROM t3 WHERE t3 MATCH '\xE0\xE8\xEC'; |
| 58 " {t1 t2} |
| 59 |
| 60 |
| 61 finish_test |
| 62 |
OLD | NEW |