OLD | NEW |
| (Empty) |
1 # 2014 Dec 20 | |
2 # | |
3 # The author disclaims copyright to this source code. In place of | |
4 # a legal notice, here is a blessing: | |
5 # | |
6 # May you do good and not evil. | |
7 # May you find forgiveness for yourself and forgive others. | |
8 # May you share freely, never taking more than you give. | |
9 # | |
10 #*********************************************************************** | |
11 # | |
12 # Tests focusing on the fts5 tokenizers | |
13 # | |
14 | |
15 source [file join [file dirname [info script]] fts5_common.tcl] | |
16 | |
17 # If SQLITE_ENABLE_FTS5 is defined, omit this file. | |
18 ifcapable !fts5 { | |
19 finish_test | |
20 return | |
21 } | |
22 | |
23 proc fts3_unicode_path {file} { | |
24 file join [file dirname [info script]] .. .. fts3 unicode $file | |
25 } | |
26 | |
27 source [fts3_unicode_path parseunicode.tcl] | |
28 set testprefix fts5unicode3 | |
29 | |
30 set CF [fts3_unicode_path CaseFolding.txt] | |
31 set UD [fts3_unicode_path UnicodeData.txt] | |
32 | |
33 tl_load_casefolding_txt $CF | |
34 foreach x [an_load_unicodedata_text $UD] { | |
35 set aNotAlnum($x) 1 | |
36 } | |
37 | |
38 foreach {y} [rd_load_unicodedata_text $UD] { | |
39 foreach {code ascii} $y {} | |
40 if {$ascii==""} { | |
41 set int 0 | |
42 } else { | |
43 binary scan $ascii c int | |
44 } | |
45 set aDiacritic($code) $int | |
46 } | |
47 | |
48 proc tcl_fold {i {bRemoveDiacritic 0}} { | |
49 global tl_lookup_table | |
50 global aDiacritic | |
51 | |
52 if {[info exists tl_lookup_table($i)]} { | |
53 set i $tl_lookup_table($i) | |
54 } | |
55 if {$bRemoveDiacritic && [info exists aDiacritic($i)]} { | |
56 set i $aDiacritic($i) | |
57 } | |
58 expr $i | |
59 } | |
60 db func tcl_fold tcl_fold | |
61 | |
62 proc tcl_isalnum {i} { | |
63 global aNotAlnum | |
64 expr {![info exists aNotAlnum($i)]} | |
65 } | |
66 db func tcl_isalnum tcl_isalnum | |
67 | |
68 | |
69 do_catchsql_test 1.0.1 { | |
70 SELECT fts5_isalnum(1, 2, 3); | |
71 } {1 {wrong number of arguments to function fts5_isalnum}} | |
72 do_catchsql_test 1.0.2 { | |
73 SELECT fts5_fold(); | |
74 } {1 {wrong number of arguments to function fts5_fold}} | |
75 do_catchsql_test 1.0.3 { | |
76 SELECT fts5_fold(1,2,3); | |
77 } {1 {wrong number of arguments to function fts5_fold}} | |
78 | |
79 do_execsql_test 1.1 { | |
80 WITH ii(i) AS ( | |
81 SELECT -1 | |
82 UNION ALL | |
83 SELECT i+1 FROM ii WHERE i<100000 | |
84 ) | |
85 SELECT count(*), min(i) FROM ii WHERE fts5_fold(i)!=CAST(tcl_fold(i) AS int); | |
86 } {0 {}} | |
87 | |
88 do_execsql_test 1.2 { | |
89 WITH ii(i) AS ( | |
90 SELECT -1 | |
91 UNION ALL | |
92 SELECT i+1 FROM ii WHERE i<100000 | |
93 ) | |
94 SELECT count(*), min(i) FROM ii | |
95 WHERE fts5_fold(i,1)!=CAST(tcl_fold(i,1) AS int); | |
96 } {0 {}} | |
97 | |
98 do_execsql_test 1.3 { | |
99 WITH ii(i) AS ( | |
100 SELECT -1 | |
101 UNION ALL | |
102 SELECT i+1 FROM ii WHERE i<100000 | |
103 ) | |
104 SELECT count(*), min(i) FROM ii | |
105 WHERE fts5_isalnum(i)!=CAST(tcl_isalnum(i) AS int); | |
106 } {0 {}} | |
107 | |
108 do_test 1.4 { | |
109 set str {CREATE VIRTUAL TABLE f3 USING fts5(a, tokenize=} | |
110 append str {"unicode61 separators '} | |
111 for {set i 700} {$i<900} {incr i} { | |
112 append str [format %c $i] | |
113 } | |
114 append str {'");} | |
115 execsql $str | |
116 } {} | |
117 do_test 1.5 { | |
118 set str {CREATE VIRTUAL TABLE f5 USING fts5(a, tokenize=} | |
119 append str {"unicode61 tokenchars '} | |
120 for {set i 700} {$i<900} {incr i} { | |
121 append str [format %c $i] | |
122 } | |
123 append str {'");} | |
124 execsql $str | |
125 } {} | |
126 | |
127 | |
128 finish_test | |
129 | |
OLD | NEW |