OLD | NEW |
(Empty) | |
| 1 # 2014 Dec 20 |
| 2 # |
| 3 # The author disclaims copyright to this source code. In place of |
| 4 # a legal notice, here is a blessing: |
| 5 # |
| 6 # May you do good and not evil. |
| 7 # May you find forgiveness for yourself and forgive others. |
| 8 # May you share freely, never taking more than you give. |
| 9 # |
| 10 #*********************************************************************** |
| 11 # |
| 12 # Tests focusing on the fts5 tokenizers |
| 13 # |
| 14 |
| 15 source [file join [file dirname [info script]] fts5_common.tcl] |
| 16 |
| 17 # If SQLITE_ENABLE_FTS5 is defined, omit this file. |
| 18 ifcapable !fts5 { |
| 19 finish_test |
| 20 return |
| 21 } |
| 22 |
| 23 proc fts3_unicode_path {file} { |
| 24 file join [file dirname [info script]] .. .. fts3 unicode $file |
| 25 } |
| 26 |
| 27 source [fts3_unicode_path parseunicode.tcl] |
| 28 set testprefix fts5unicode3 |
| 29 |
| 30 set CF [fts3_unicode_path CaseFolding.txt] |
| 31 set UD [fts3_unicode_path UnicodeData.txt] |
| 32 |
| 33 tl_load_casefolding_txt $CF |
| 34 foreach x [an_load_unicodedata_text $UD] { |
| 35 set aNotAlnum($x) 1 |
| 36 } |
| 37 |
| 38 foreach {y} [rd_load_unicodedata_text $UD] { |
| 39 foreach {code ascii} $y {} |
| 40 if {$ascii==""} { |
| 41 set int 0 |
| 42 } else { |
| 43 binary scan $ascii c int |
| 44 } |
| 45 set aDiacritic($code) $int |
| 46 } |
| 47 |
| 48 proc tcl_fold {i {bRemoveDiacritic 0}} { |
| 49 global tl_lookup_table |
| 50 global aDiacritic |
| 51 |
| 52 if {[info exists tl_lookup_table($i)]} { |
| 53 set i $tl_lookup_table($i) |
| 54 } |
| 55 if {$bRemoveDiacritic && [info exists aDiacritic($i)]} { |
| 56 set i $aDiacritic($i) |
| 57 } |
| 58 expr $i |
| 59 } |
| 60 db func tcl_fold tcl_fold |
| 61 |
| 62 proc tcl_isalnum {i} { |
| 63 global aNotAlnum |
| 64 expr {![info exists aNotAlnum($i)]} |
| 65 } |
| 66 db func tcl_isalnum tcl_isalnum |
| 67 |
| 68 |
| 69 do_catchsql_test 1.0.1 { |
| 70 SELECT fts5_isalnum(1, 2, 3); |
| 71 } {1 {wrong number of arguments to function fts5_isalnum}} |
| 72 do_catchsql_test 1.0.2 { |
| 73 SELECT fts5_fold(); |
| 74 } {1 {wrong number of arguments to function fts5_fold}} |
| 75 do_catchsql_test 1.0.3 { |
| 76 SELECT fts5_fold(1,2,3); |
| 77 } {1 {wrong number of arguments to function fts5_fold}} |
| 78 |
| 79 do_execsql_test 1.1 { |
| 80 WITH ii(i) AS ( |
| 81 SELECT -1 |
| 82 UNION ALL |
| 83 SELECT i+1 FROM ii WHERE i<100000 |
| 84 ) |
| 85 SELECT count(*), min(i) FROM ii WHERE fts5_fold(i)!=CAST(tcl_fold(i) AS int); |
| 86 } {0 {}} |
| 87 |
| 88 do_execsql_test 1.2 { |
| 89 WITH ii(i) AS ( |
| 90 SELECT -1 |
| 91 UNION ALL |
| 92 SELECT i+1 FROM ii WHERE i<100000 |
| 93 ) |
| 94 SELECT count(*), min(i) FROM ii |
| 95 WHERE fts5_fold(i,1)!=CAST(tcl_fold(i,1) AS int); |
| 96 } {0 {}} |
| 97 |
| 98 do_execsql_test 1.3 { |
| 99 WITH ii(i) AS ( |
| 100 SELECT -1 |
| 101 UNION ALL |
| 102 SELECT i+1 FROM ii WHERE i<100000 |
| 103 ) |
| 104 SELECT count(*), min(i) FROM ii |
| 105 WHERE fts5_isalnum(i)!=CAST(tcl_isalnum(i) AS int); |
| 106 } {0 {}} |
| 107 |
| 108 do_test 1.4 { |
| 109 set str {CREATE VIRTUAL TABLE f3 USING fts5(a, tokenize=} |
| 110 append str {"unicode61 separators '} |
| 111 for {set i 700} {$i<900} {incr i} { |
| 112 append str [format %c $i] |
| 113 } |
| 114 append str {'");} |
| 115 execsql $str |
| 116 } {} |
| 117 do_test 1.5 { |
| 118 set str {CREATE VIRTUAL TABLE f5 USING fts5(a, tokenize=} |
| 119 append str {"unicode61 tokenchars '} |
| 120 for {set i 700} {$i<900} {incr i} { |
| 121 append str [format %c $i] |
| 122 } |
| 123 append str {'");} |
| 124 execsql $str |
| 125 } {} |
| 126 |
| 127 |
| 128 finish_test |
| 129 |
OLD | NEW |