Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1360)

Unified Diff: third_party/sqlite/sqlite-src-3100200/ext/fts5/test/fts5unicode2.test

Issue 1610543003: [sql] Import reference version of SQLite 3.10.2. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/sqlite/sqlite-src-3100200/ext/fts5/test/fts5unicode2.test
diff --git a/third_party/sqlite/src/test/fts4unicode.test b/third_party/sqlite/sqlite-src-3100200/ext/fts5/test/fts5unicode2.test
similarity index 75%
copy from third_party/sqlite/src/test/fts4unicode.test
copy to third_party/sqlite/sqlite-src-3100200/ext/fts5/test/fts5unicode2.test
index f237119a1871b250da71515ad124aa2de706f19d..d3ff5128dafc0def5089a0dac74aa7c8c50a73d9 100644
--- a/third_party/sqlite/src/test/fts4unicode.test
+++ b/third_party/sqlite/sqlite-src-3100200/ext/fts5/test/fts5unicode2.test
@@ -11,69 +11,76 @@
#
# The tests in this file focus on testing the "unicode" FTS tokenizer.
#
+# This is a modified copy of FTS4 test file "fts4_unicode.test".
+#
+
+source [file join [file dirname [info script]] fts5_common.tcl]
+set testprefix fts5unicode2
-set testdir [file dirname $argv0]
-source $testdir/tester.tcl
-ifcapable !fts3_unicode { finish_test ; return }
-set ::testprefix fts4unicode
+# If SQLITE_ENABLE_FTS5 is defined, omit this file.
+ifcapable !fts5 {
+ finish_test
+ return
+}
proc do_unicode_token_test {tn input res} {
- set input [string map {' ''} $input]
- uplevel [list do_execsql_test $tn "
- SELECT fts3_tokenizer_test('unicode61', 'remove_diacritics=0', '$input');
- " [list [list {*}$res]]]
+ uplevel [list do_test $tn [list \
+ sqlite3_fts5_tokenize -subst db "unicode61 remove_diacritics 0" $input
+ ] [list {*}$res]]
}
proc do_unicode_token_test2 {tn input res} {
- set input [string map {' ''} $input]
- uplevel [list do_execsql_test $tn "
- SELECT fts3_tokenizer_test('unicode61', '$input');
- " [list [list {*}$res]]]
+ uplevel [list do_test $tn [list \
+ sqlite3_fts5_tokenize -subst db "unicode61" $input
+ ] [list {*}$res]]
}
proc do_unicode_token_test3 {tn args} {
- set res [lindex $args end]
- set sql "SELECT fts3_tokenizer_test('unicode61'"
- foreach a [lrange $args 0 end-1] {
- append sql ", '"
- append sql [string map {' ''} $a]
- append sql "'"
- }
- append sql ")"
- uplevel [list do_execsql_test $tn $sql [list [list {*}$res]]]
+ set tokenizer [concat unicode61 {*}[lrange $args 0 end-2]]
+ set input [lindex $args end-1]
+ set res [lindex $args end]
+ uplevel [list do_test $tn [list \
+ sqlite3_fts5_tokenize -subst db $tokenizer $input
+ ] [list {*}$res]]
}
-do_unicode_token_test 1.0 {a B c D} {0 a a 1 b B 2 c c 3 d D}
+do_unicode_token_test 1.0 {a B c D} {a a b B c c d D}
do_unicode_token_test 1.1 "\uC4 \uD6 \uDC" \
- "0 \uE4 \uC4 1 \uF6 \uD6 2 \uFC \uDC"
+ "\uE4 \uC4 \uF6 \uD6 \uFC \uDC"
do_unicode_token_test 1.2 "x\uC4x x\uD6x x\uDCx" \
- "0 x\uE4x x\uC4x 1 x\uF6x x\uD6x 2 x\uFCx x\uDCx"
+ "x\uE4x x\uC4x x\uF6x x\uD6x x\uFCx x\uDCx"
# 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s.
-do_unicode_token_test 1.3 "\uDF" "0 \uDF \uDF"
-do_unicode_token_test 1.4 "\u1E9E" "0 \uDF \u1E9E"
+do_unicode_token_test 1.3 "\uDF" "\uDF \uDF"
+do_unicode_token_test 1.4 "\u1E9E" "\uDF \u1E9E"
do_unicode_token_test 1.5 "The quick brown fox" {
- 0 the The 1 quick quick 2 brown brown 3 fox fox
+ the The quick quick brown brown fox fox
}
do_unicode_token_test 1.6 "The\u00bfquick\u224ebrown\u2263fox" {
- 0 the The 1 quick quick 2 brown brown 3 fox fox
+ the The quick quick brown brown fox fox
}
-do_unicode_token_test2 1.7 {a B c D} {0 a a 1 b B 2 c c 3 d D}
-do_unicode_token_test2 1.8 "\uC4 \uD6 \uDC" "0 a \uC4 1 o \uD6 2 u \uDC"
+do_unicode_token_test2 1.7 {a B c D} {a a b B c c d D}
+do_unicode_token_test2 1.8 "\uC4 \uD6 \uDC" "a \uC4 o \uD6 u \uDC"
do_unicode_token_test2 1.9 "x\uC4x x\uD6x x\uDCx" \
- "0 xax x\uC4x 1 xox x\uD6x 2 xux x\uDCx"
+ "xax x\uC4x xox x\uD6x xux x\uDCx"
# Check that diacritics are removed if remove_diacritics=1 is specified.
# And that they do not break tokens.
-do_unicode_token_test2 1.10 "xx\u0301xx" "0 xxxx xx\u301xx"
+do_unicode_token_test2 1.10 "xx\u0301xx" "xxxx xx\u301xx"
# Title-case mappings work
-do_unicode_token_test 1.11 "\u01c5" "0 \u01c6 \u01c5"
+do_unicode_token_test 1.11 "\u01c5" "\u01c6 \u01c5"
+
+do_unicode_token_test 1.12 "\u00C1abc\u00C2 \u00D1def\u00C3" \
+ "\u00E1abc\u00E2 \u00C1abc\u00C2 \u00F1def\u00E3 \u00D1def\u00C3"
+
+do_unicode_token_test 1.13 "\u00A2abc\u00A3 \u00A4def\u00A5" \
+ "abc abc def def"
#-------------------------------------------------------------------------
#
@@ -128,7 +135,7 @@ proc mapdoc {doc} {
}
do_test 2.0 {
- execsql { CREATE VIRTUAL TABLE t2 USING fts4(tokenize=unicode61, x); }
+ execsql { CREATE VIRTUAL TABLE t2 USING fts5(tokenize=unicode61, x); }
foreach doc $docs {
set d [mapdoc $doc]
execsql { INSERT INTO t2 VALUES($d) }
@@ -166,7 +173,9 @@ foreach {tn query snippet} {
} {
do_test 2.$tn {
set q [mapdoc $query]
- execsql { SELECT snippet(t2, '[', ']', '...') FROM t2 WHERE t2 MATCH $q }
+ execsql {
+ SELECT snippet(t2, -1, '[', ']', '...', 15) FROM t2 WHERE t2 MATCH $q
+ }
} [list [mapdoc $snippet]]
}
@@ -175,12 +184,12 @@ foreach {tn query snippet} {
# NULL pointer.
reset_db
do_execsql_test 3.1 {
- CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61, x, y);
+ CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x, y);
INSERT INTO t1 VALUES(NULL, 'a b c');
}
do_execsql_test 3.2 {
- SELECT snippet(t1, '[', ']') FROM t1 WHERE t1 MATCH 'b'
+ SELECT snippet(t1, -1, '[', ']', '...', 15) FROM t1 WHERE t1 MATCH 'b'
} {{a [b] c}}
do_execsql_test 3.3 {
@@ -222,12 +231,16 @@ do_test 4.1 {
set c "\uFFFEdef"
set d "\uD800def"
execsql {
- CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61, x);
+ CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x);
INSERT INTO t1 VALUES($a);
INSERT INTO t1 VALUES($b);
INSERT INTO t1 VALUES($c);
INSERT INTO t1 VALUES($d);
}
+
+ execsql "CREATE VIRTUAL TABLE t8 USING fts5(
+ a, b, tokenize=\"unicode61 separators '\uFFFE\uD800\u00BF'\"
+ )"
} {}
do_test 4.2 {
@@ -256,89 +269,99 @@ do_test 4.3 {
}
} {}
+do_test 4.4 {
+ sqlite3_exec_hex db {
+ CREATE VIRTUAL TABLE t9 USING fts5(a, b,
+ tokenize="unicode61 separators '%C09004'"
+ );
+ INSERT INTO t9(a) VALUES('abc%88def %89ghi%90');
+ }
+} {0 {}}
+
+
#-------------------------------------------------------------------------
-do_unicode_token_test3 5.1 {tokenchars=} {
+breakpoint
+do_unicode_token_test3 5.1 {tokenchars {}} {
sqlite3_reset sqlite3_column_int
} {
- 0 sqlite3 sqlite3
- 1 reset reset
- 2 sqlite3 sqlite3
- 3 column column
- 4 int int
+ sqlite3 sqlite3
+ reset reset
+ sqlite3 sqlite3
+ column column
+ int int
}
-do_unicode_token_test3 5.2 {tokenchars=_} {
+do_unicode_token_test3 5.2 {tokenchars _} {
sqlite3_reset sqlite3_column_int
} {
- 0 sqlite3_reset sqlite3_reset
- 1 sqlite3_column_int sqlite3_column_int
+ sqlite3_reset sqlite3_reset
+ sqlite3_column_int sqlite3_column_int
}
-do_unicode_token_test3 5.3 {separators=xyz} {
+do_unicode_token_test3 5.3 {separators xyz} {
Laotianxhorseyrunszfast
} {
- 0 laotian Laotian
- 1 horse horse
- 2 runs runs
- 3 fast fast
+ laotian Laotian
+ horse horse
+ runs runs
+ fast fast
}
-do_unicode_token_test3 5.4 {tokenchars=xyz} {
+do_unicode_token_test3 5.4 {tokenchars xyz} {
Laotianxhorseyrunszfast
} {
- 0 laotianxhorseyrunszfast Laotianxhorseyrunszfast
+ laotianxhorseyrunszfast Laotianxhorseyrunszfast
}
-do_unicode_token_test3 5.5 {tokenchars=_} {separators=zyx} {
+do_unicode_token_test3 5.5 {tokenchars _} {separators zyx} {
sqlite3_resetxsqlite3_column_intyhonda_phantom
} {
- 0 sqlite3_reset sqlite3_reset
- 1 sqlite3_column_int sqlite3_column_int
- 2 honda_phantom honda_phantom
+ sqlite3_reset sqlite3_reset
+ sqlite3_column_int sqlite3_column_int
+ honda_phantom honda_phantom
}
-do_unicode_token_test3 5.6 "separators=\u05D1" "abc\u05D1def" {
- 0 abc abc 1 def def
+do_unicode_token_test3 5.6 "separators \u05D1" "abc\u05D1def" {
+ abc abc def def
}
do_unicode_token_test3 5.7 \
- "tokenchars=\u2444\u2445" \
- "separators=\u05D0\u05D1\u05D2" \
+ "tokenchars \u2444\u2445" \
+ "separators \u05D0\u05D1\u05D2" \
"\u2444fre\u2445sh\u05D0water\u05D2fish.\u2445timer" \
[list \
- 0 \u2444fre\u2445sh \u2444fre\u2445sh \
- 1 water water \
- 2 fish fish \
- 3 \u2445timer \u2445timer \
+ \u2444fre\u2445sh \u2444fre\u2445sh \
+ water water \
+ fish fish \
+ \u2445timer \u2445timer \
]
# Check that it is not possible to add a standalone diacritic codepoint
# to either separators or tokenchars.
-do_unicode_token_test3 5.8 "separators=\u0301" \
+do_unicode_token_test3 5.8 "separators \u0301" \
"hello\u0301world \u0301helloworld" \
- "0 helloworld hello\u0301world 1 helloworld helloworld"
+ "helloworld hello\u0301world helloworld helloworld"
-do_unicode_token_test3 5.9 "tokenchars=\u0301" \
+do_unicode_token_test3 5.9 "tokenchars \u0301" \
"hello\u0301world \u0301helloworld" \
- "0 helloworld hello\u0301world 1 helloworld helloworld"
+ "helloworld hello\u0301world helloworld helloworld"
-do_unicode_token_test3 5.10 "separators=\u0301" \
- "remove_diacritics=0" \
+do_unicode_token_test3 5.10 "separators \u0301" \
+ "remove_diacritics 0" \
"hello\u0301world \u0301helloworld" \
- "0 hello\u0301world hello\u0301world 1 helloworld helloworld"
+ "hello\u0301world hello\u0301world helloworld helloworld"
-do_unicode_token_test3 5.11 "tokenchars=\u0301" \
- "remove_diacritics=0" \
+do_unicode_token_test3 5.11 "tokenchars \u0301" \
+ "remove_diacritics 0" \
"hello\u0301world \u0301helloworld" \
- "0 hello\u0301world hello\u0301world 1 helloworld helloworld"
-
+ "hello\u0301world hello\u0301world helloworld helloworld"
#-------------------------------------------------------------------------
proc do_tokenize {tokenizer txt} {
set res [list]
- foreach {a b c} [db one {SELECT fts3_tokenizer_test($tokenizer, $txt)}] {
+ foreach {b c} [sqlite3_fts5_tokenize -subst db $tokenizer $txt] {
lappend res $b
}
set res
@@ -357,7 +380,7 @@ proc do_isspace_test {tn tokenizer lCp} {
}
set tokenizers [list unicode61]
-ifcapable icu { lappend tokenizers icu }
+#ifcapable icu { lappend tokenizers icu }
# Some tests to check that the tokenizers can both identify white-space
# codepoints. All codepoints tested below are of type "Zs" in the
@@ -389,6 +412,7 @@ foreach T $tokenizers {
do_isspace_test 6.$T.23 $T {8287 12288}
}
+
#-------------------------------------------------------------------------
# Test that the private use ranges are treated as alphanumeric.
#
@@ -396,8 +420,8 @@ foreach {tn1 c} {
1 \ue000 2 \ue001 3 \uf000 4 \uf8fe 5 \uf8ff
} {
foreach {tn2 config res} {
- 1 "" "0 hello*world hello*world"
- 2 "separators=*" "0 hello hello 1 world world"
+ 1 "" "hello*world hello*world"
+ 2 "separators *" "hello hello world world"
} {
set config [string map [list * $c] $config]
set input [string map [list * $c] "hello*world"]
@@ -415,7 +439,9 @@ foreach {tn1 c} {
# 00F6;LATIN SMALL LETTER O WITH DIAERESIS
#
do_execsql_test 8.1.1 "
- CREATE VIRTUAL TABLE t3 USING fts4(tokenize=unicode61 'remove_diacritics=1');
+ CREATE VIRTUAL TABLE t3 USING fts5(
+ content, tokenize='unicode61 remove_diacritics 1'
+ );
INSERT INTO t3 VALUES('o');
INSERT INTO t3 VALUES('a');
INSERT INTO t3 VALUES('O');
@@ -426,24 +452,27 @@ do_execsql_test 8.1.1 "
INSERT INTO t3 VALUES('\xE4');
"
do_execsql_test 8.1.2 {
- SELECT rowid FROM t3 WHERE t3 MATCH 'o';
+ SELECT rowid FROM t3 WHERE t3 MATCH 'o' ORDER BY rowid ASC;
} {1 3 5 7}
do_execsql_test 8.1.3 {
- SELECT rowid FROM t3 WHERE t3 MATCH 'a';
+ SELECT rowid FROM t3 WHERE t3 MATCH 'a' ORDER BY rowid ASC;
} {2 4 6 8}
do_execsql_test 8.2.1 {
- CREATE VIRTUAL TABLE t4 USING fts4(tokenize=unicode61 "remove_diacritics=0");
- INSERT INTO t4 SELECT * FROM t3;
+ CREATE VIRTUAL TABLE t4 USING fts5(
+ content, tokenize='unicode61 remove_diacritics 0'
+ );
+ INSERT INTO t4 SELECT * FROM t3 ORDER BY rowid ASC;
}
do_execsql_test 8.2.2 {
- SELECT rowid FROM t4 WHERE t4 MATCH 'o';
+ SELECT rowid FROM t4 WHERE t4 MATCH 'o' ORDER BY rowid ASC;
} {1 3}
do_execsql_test 8.2.3 {
- SELECT rowid FROM t4 WHERE t4 MATCH 'a';
+ SELECT rowid FROM t4 WHERE t4 MATCH 'a' ORDER BY rowid ASC;
} {2 4}
#-------------------------------------------------------------------------
#
+if 0 {
foreach {tn sql} {
1 {
CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 [tokenchars= .]);
@@ -555,4 +584,6 @@ do_execsql_test 11.1 {
berlin@street sydney.road
}
+}
+
finish_test

Powered by Google App Engine
This is Rietveld 408576698