Index: third_party/sqlite/sqlite-src-3170000/ext/fts5/test/fts5unicode.test |
diff --git a/third_party/sqlite/sqlite-src-3170000/ext/fts5/test/fts5unicode.test b/third_party/sqlite/sqlite-src-3170000/ext/fts5/test/fts5unicode.test |
new file mode 100644 |
index 0000000000000000000000000000000000000000..46f4c4f1aaaaa429f37d9da4e1eee8b8f4ecb420 |
--- /dev/null |
+++ b/third_party/sqlite/sqlite-src-3170000/ext/fts5/test/fts5unicode.test |
@@ -0,0 +1,62 @@ |
+# 2014 Dec 20 |
+# |
+# The author disclaims copyright to this source code. In place of |
+# a legal notice, here is a blessing: |
+# |
+# May you do good and not evil. |
+# May you find forgiveness for yourself and forgive others. |
+# May you share freely, never taking more than you give. |
+# |
+#*********************************************************************** |
+# |
+# Tests focusing on the fts5 tokenizers |
+# |
+ |
+source [file join [file dirname [info script]] fts5_common.tcl] |
+set testprefix fts5unicode |
+ |
+# If SQLITE_ENABLE_FTS5 is defined, omit this file. |
+ifcapable !fts5 { |
+ finish_test |
+ return |
+} |
+ |
+proc tokenize_test {tn tokenizer input output} { |
+ uplevel [list do_test $tn [subst -nocommands { |
+ set ret {} |
+ foreach {z s e} [sqlite3_fts5_tokenize db {$tokenizer} {$input}] { |
+ lappend ret [set z] |
+ } |
+ set ret |
+ }] [list {*}$output]] |
+} |
+ |
+foreach {tn t} {1 ascii 2 unicode61} { |
+ tokenize_test 1.$tn.0 $t {A B C D} {a b c d} |
+ tokenize_test 1.$tn.1 $t {May you share freely,} {may you share freely} |
+ tokenize_test 1.$tn.2 $t {..May...you.shAre.freely} {may you share freely} |
+ tokenize_test 1.$tn.3 $t {} {} |
+} |
+ |
+#------------------------------------------------------------------------- |
+# Check that "unicode61" really is the default tokenizer. |
+# |
+ |
+do_execsql_test 2.0 " |
+ CREATE VIRTUAL TABLE t1 USING fts5(x); |
+ CREATE VIRTUAL TABLE t2 USING fts5(x, tokenize = unicode61); |
+ CREATE VIRTUAL TABLE t3 USING fts5(x, tokenize = ascii); |
+ INSERT INTO t1 VALUES('\xC0\xC8\xCC'); |
+ INSERT INTO t2 VALUES('\xC0\xC8\xCC'); |
+ INSERT INTO t3 VALUES('\xC0\xC8\xCC'); |
+" |
+breakpoint |
+do_execsql_test 2.1 " |
+ SELECT 't1' FROM t1 WHERE t1 MATCH '\xE0\xE8\xEC'; |
+ SELECT 't2' FROM t2 WHERE t2 MATCH '\xE0\xE8\xEC'; |
+ SELECT 't3' FROM t3 WHERE t3 MATCH '\xE0\xE8\xEC'; |
+" {t1 t2} |
+ |
+ |
+finish_test |
+ |