Index: third_party/sqlite/sqlite-src-3100200/ext/fts5/test/fts5tokenizer.test |
diff --git a/third_party/sqlite/sqlite-src-3100200/ext/fts5/test/fts5tokenizer.test b/third_party/sqlite/sqlite-src-3100200/ext/fts5/test/fts5tokenizer.test |
deleted file mode 100644 |
index 9316d3c23452c3e1f8ef3d2b3dfc805518bd8ee2..0000000000000000000000000000000000000000 |
--- a/third_party/sqlite/sqlite-src-3100200/ext/fts5/test/fts5tokenizer.test |
+++ /dev/null |
@@ -1,266 +0,0 @@ |
-# 2014 Dec 20 |
-# |
-# The author disclaims copyright to this source code. In place of |
-# a legal notice, here is a blessing: |
-# |
-# May you do good and not evil. |
-# May you find forgiveness for yourself and forgive others. |
-# May you share freely, never taking more than you give. |
-# |
-#*********************************************************************** |
-# |
-# Tests focusing on the built-in fts5 tokenizers. |
-# |
- |
-source [file join [file dirname [info script]] fts5_common.tcl] |
-set testprefix fts5tokenizer |
- |
-# If SQLITE_ENABLE_FTS5 is defined, omit this file. |
-ifcapable !fts5 { |
- finish_test |
- return |
-} |
- |
- |
-do_execsql_test 1.0 { |
- CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize=porter); |
- DROP TABLE ft1; |
-} |
-do_execsql_test 1.1 { |
- CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize='porter'); |
- DROP TABLE ft1; |
-} |
-do_execsql_test 1.2 { |
- CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = porter); |
- DROP TABLE ft1; |
-} |
-do_execsql_test 1.3 { |
- CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'porter'); |
- DROP TABLE ft1; |
-} |
-do_execsql_test 1.4 { |
- CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'porter ascii'); |
- DROP TABLE ft1; |
-} |
- |
-do_catchsql_test 1.5 { |
- CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'nosuch'); |
-} {1 {no such tokenizer: nosuch}} |
- |
-do_catchsql_test 1.6 { |
- CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'porter nosuch'); |
-} {1 {error in tokenizer constructor}} |
- |
-do_execsql_test 2.0 { |
- CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize=porter); |
- INSERT INTO ft1 VALUES('embedded databases'); |
-} |
-do_execsql_test 2.1 { SELECT rowid FROM ft1 WHERE ft1 MATCH 'embedding' } 1 |
-do_execsql_test 2.2 { SELECT rowid FROM ft1 WHERE ft1 MATCH 'database' } 1 |
-do_execsql_test 2.3 { |
- SELECT rowid FROM ft1 WHERE ft1 MATCH 'database embedding' |
-} 1 |
- |
-proc tcl_create {args} { |
- set ::targs $args |
- error "failed" |
-} |
-sqlite3_fts5_create_tokenizer db tcl tcl_create |
- |
-foreach {tn directive expected} { |
- 1 {tokenize='tcl a b c'} {a b c} |
- 2 {tokenize='tcl ''d'' ''e'' ''f'''} {d e f} |
- 3 {tokenize="tcl 'g' 'h' 'i'"} {g h i} |
- 4 {tokenize = tcl} {} |
-} { |
- do_catchsql_test 3.$tn.1 " |
- CREATE VIRTUAL TABLE ft2 USING fts5(x, $directive) |
- " {1 {error in tokenizer constructor}} |
- do_test 3.$tn.2 { set ::targs } $expected |
-} |
- |
-do_catchsql_test 4.1 { |
- CREATE VIRTUAL TABLE ft2 USING fts5(x, tokenize = tcl abc); |
-} {1 {parse error in "tokenize = tcl abc"}} |
-do_catchsql_test 4.2 { |
- CREATE VIRTUAL TABLE ft2 USING fts5(x y) |
-} {1 {unrecognized column option: y}} |
- |
-#------------------------------------------------------------------------- |
-# Test the "separators" and "tokenchars" options a bit. |
-# |
-foreach {tn tokenizer} {1 ascii 2 unicode61} { |
- reset_db |
- set T "$tokenizer tokenchars ',.:' separators 'xyz'" |
- execsql "CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = \"$T\")" |
- do_execsql_test 5.$tn.1 { |
- INSERT INTO t1 VALUES('abcxdefyghizjkl.mno,pqr:stu/vwx+yz'); |
- } |
- foreach {tn2 token res} { |
- 1 abc 1 2 def 1 3 ghi 1 4 jkl {} |
- 5 mno {} 6 pqr {} 7 stu {} 8 jkl.mno,pqr:stu 1 |
- 9 vw 1 |
- } { |
- do_execsql_test 5.$tn.2.$tn2 " |
- SELECT rowid FROM t1 WHERE t1 MATCH '\"$token\"' |
- " $res |
- } |
-} |
- |
-#------------------------------------------------------------------------- |
-# Miscellaneous tests for the ascii tokenizer. |
-# |
-# 5.1.*: Test that the ascii tokenizer ignores non-ASCII characters in the |
-# 'separators' option. But unicode61 does not. |
-# |
-# 5.2.*: An option without an argument is an error. |
-# |
- |
-do_test 5.1.1 { |
- execsql " |
- CREATE VIRTUAL TABLE a1 USING fts5(x, tokenize=`ascii separators '\u1234'`); |
- INSERT INTO a1 VALUES('abc\u1234def'); |
- " |
- execsql { SELECT rowid FROM a1 WHERE a1 MATCH 'def' } |
-} {} |
- |
-do_test 5.1.2 { |
- execsql " |
- CREATE VIRTUAL TABLE a2 USING fts5( |
- x, tokenize=`unicode61 separators '\u1234'`); |
- INSERT INTO a2 VALUES('abc\u1234def'); |
- " |
- execsql { SELECT rowid FROM a2 WHERE a2 MATCH 'def' } |
-} {1} |
- |
-do_catchsql_test 5.2 { |
- CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'ascii tokenchars'); |
-} {1 {error in tokenizer constructor}} |
-do_catchsql_test 5.3 { |
- CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'ascii opt arg'); |
-} {1 {error in tokenizer constructor}} |
- |
-#------------------------------------------------------------------------- |
-# Test that the ASCII and unicode61 tokenizers both handle SQLITE_DONE |
-# correctly. |
-# |
- |
-proc test_token_cb {varname token iStart iEnd} { |
- upvar $varname var |
- lappend var $token |
- if {[llength $var]==3} { return "SQLITE_DONE" } |
- return "SQLITE_OK" |
-} |
- |
-proc tokenize {cmd} { |
- set res [list] |
- $cmd xTokenize [$cmd xColumnText 0] [list test_token_cb res] |
- set res |
-} |
-sqlite3_fts5_create_function db tokenize tokenize |
- |
-do_execsql_test 6.0 { |
- CREATE VIRTUAL TABLE x1 USING fts5(a, tokenize=ascii); |
- INSERT INTO x1 VALUES('q w e r t y'); |
- INSERT INTO x1 VALUES('y t r e w q'); |
- SELECT tokenize(x1) FROM x1 WHERE x1 MATCH 'e AND r'; |
-} { |
- {q w e} {y t r} |
-} |
- |
-do_execsql_test 6.1 { |
- CREATE VIRTUAL TABLE x2 USING fts5(a, tokenize=unicode61); |
- INSERT INTO x2 VALUES('q w e r t y'); |
- INSERT INTO x2 VALUES('y t r e w q'); |
- SELECT tokenize(x2) FROM x2 WHERE x2 MATCH 'e AND r'; |
-} { |
- {q w e} {y t r} |
-} |
- |
- |
-#------------------------------------------------------------------------- |
-# Miscellaneous tests for the unicode tokenizer. |
-# |
-do_catchsql_test 6.1 { |
- CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'unicode61 tokenchars'); |
-} {1 {error in tokenizer constructor}} |
-do_catchsql_test 6.2 { |
- CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'unicode61 a b'); |
-} {1 {error in tokenizer constructor}} |
-do_catchsql_test 6.3 { |
- CREATE VIRTUAL TABLE a3 USING fts5( |
- x, y, tokenize = 'unicode61 remove_diacritics 2' |
- ); |
-} {1 {error in tokenizer constructor}} |
-do_catchsql_test 6.4 { |
- CREATE VIRTUAL TABLE a3 USING fts5( |
- x, y, tokenize = 'unicode61 remove_diacritics 10' |
- ); |
-} {1 {error in tokenizer constructor}} |
- |
-#------------------------------------------------------------------------- |
-# Porter tokenizer with very large tokens. |
-# |
-set a [string repeat a 100] |
-set b [string repeat b 500] |
-set c [string repeat c 1000] |
-do_execsql_test 7.0 { |
- CREATE VIRTUAL TABLE e5 USING fts5(x, tokenize=porter); |
- INSERT INTO e5 VALUES($a || ' ' || $b); |
- INSERT INTO e5 VALUES($b || ' ' || $c); |
- INSERT INTO e5 VALUES($c || ' ' || $a); |
-} |
- |
-do_execsql_test 7.1 {SELECT rowid FROM e5 WHERE e5 MATCH $a} { 1 3 } |
-do_execsql_test 7.2 {SELECT rowid FROM e5 WHERE e5 MATCH $b} { 1 2 } |
-do_execsql_test 7.3 {SELECT rowid FROM e5 WHERE e5 MATCH $c} { 2 3 } |
- |
-#------------------------------------------------------------------------- |
-# Test the 'separators' option with the unicode61 tokenizer. |
-# |
-do_execsql_test 8.1 { |
- BEGIN; |
- CREATE VIRTUAL TABLE e6 USING fts5(x, |
- tokenize="unicode61 separators ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
- ); |
- INSERT INTO e6 VALUES('theAquickBbrownCfoxDjumpedWoverXtheYlazyZdog'); |
- CREATE VIRTUAL TABLE e7 USING fts5vocab(e6, 'row'); |
- SELECT term FROM e7; |
- ROLLBACK; |
-} { |
- brown dog fox jumped lazy over quick the |
-} |
- |
-do_execsql_test 8.2 [subst { |
- BEGIN; |
- CREATE VIRTUAL TABLE e6 USING fts5(x, |
- tokenize="unicode61 separators '\u0E01\u0E02\u0E03\u0E04\u0E05\u0E06\u0E07'" |
- ); |
- INSERT INTO e6 VALUES('the\u0E01quick\u0E01brown\u0E01fox\u0E01' |
- || 'jumped\u0E01over\u0E01the\u0E01lazy\u0E01dog' |
- ); |
- INSERT INTO e6 VALUES('\u0E08\u0E07\u0E09'); |
- CREATE VIRTUAL TABLE e7 USING fts5vocab(e6, 'row'); |
- SELECT term FROM e7; |
- ROLLBACK; |
-}] [subst { |
- brown dog fox jumped lazy over quick the \u0E08 \u0E09 |
-}] |
- |
-# Test that the porter tokenizer correctly passes arguments through to |
-# its parent tokenizer. |
-do_execsql_test 8.3 { |
- BEGIN; |
- CREATE VIRTUAL TABLE e6 USING fts5(x, |
- tokenize="porter unicode61 separators ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
- ); |
- INSERT INTO e6 VALUES('theAquickBbrownCfoxDjumpedWoverXtheYlazyZdog'); |
- CREATE VIRTUAL TABLE e7 USING fts5vocab(e6, 'row'); |
- SELECT term FROM e7; |
- ROLLBACK; |
-} { |
- brown dog fox jump lazi over quick the |
-} |
- |
-finish_test |
- |