| OLD | NEW |
| (Empty) |
| 1 # 2014 Dec 20 | |
| 2 # | |
| 3 # The author disclaims copyright to this source code. In place of | |
| 4 # a legal notice, here is a blessing: | |
| 5 # | |
| 6 # May you do good and not evil. | |
| 7 # May you find forgiveness for yourself and forgive others. | |
| 8 # May you share freely, never taking more than you give. | |
| 9 # | |
| 10 #*********************************************************************** | |
| 11 # | |
| 12 # Tests focusing on the fts5 tokenizers | |
| 13 # | |
| 14 | |
| 15 source [file join [file dirname [info script]] fts5_common.tcl] | |
| 16 | |
| 17 # If SQLITE_ENABLE_FTS5 is defined, omit this file. | |
| 18 ifcapable !fts5 { | |
| 19 finish_test | |
| 20 return | |
| 21 } | |
| 22 | |
| 23 proc fts3_unicode_path {file} { | |
| 24 file join [file dirname [info script]] .. .. fts3 unicode $file | |
| 25 } | |
| 26 | |
| 27 source [fts3_unicode_path parseunicode.tcl] | |
| 28 set testprefix fts5unicode3 | |
| 29 | |
| 30 set CF [fts3_unicode_path CaseFolding.txt] | |
| 31 set UD [fts3_unicode_path UnicodeData.txt] | |
| 32 | |
| 33 tl_load_casefolding_txt $CF | |
| 34 foreach x [an_load_unicodedata_text $UD] { | |
| 35 set aNotAlnum($x) 1 | |
| 36 } | |
| 37 | |
| 38 foreach {y} [rd_load_unicodedata_text $UD] { | |
| 39 foreach {code ascii} $y {} | |
| 40 if {$ascii==""} { | |
| 41 set int 0 | |
| 42 } else { | |
| 43 binary scan $ascii c int | |
| 44 } | |
| 45 set aDiacritic($code) $int | |
| 46 } | |
| 47 | |
| 48 proc tcl_fold {i {bRemoveDiacritic 0}} { | |
| 49 global tl_lookup_table | |
| 50 global aDiacritic | |
| 51 | |
| 52 if {[info exists tl_lookup_table($i)]} { | |
| 53 set i $tl_lookup_table($i) | |
| 54 } | |
| 55 if {$bRemoveDiacritic && [info exists aDiacritic($i)]} { | |
| 56 set i $aDiacritic($i) | |
| 57 } | |
| 58 expr $i | |
| 59 } | |
| 60 db func tcl_fold tcl_fold | |
| 61 | |
| 62 proc tcl_isalnum {i} { | |
| 63 global aNotAlnum | |
| 64 expr {![info exists aNotAlnum($i)]} | |
| 65 } | |
| 66 db func tcl_isalnum tcl_isalnum | |
| 67 | |
| 68 | |
| 69 do_catchsql_test 1.0.1 { | |
| 70 SELECT fts5_isalnum(1, 2, 3); | |
| 71 } {1 {wrong number of arguments to function fts5_isalnum}} | |
| 72 do_catchsql_test 1.0.2 { | |
| 73 SELECT fts5_fold(); | |
| 74 } {1 {wrong number of arguments to function fts5_fold}} | |
| 75 do_catchsql_test 1.0.3 { | |
| 76 SELECT fts5_fold(1,2,3); | |
| 77 } {1 {wrong number of arguments to function fts5_fold}} | |
| 78 | |
| 79 do_execsql_test 1.1 { | |
| 80 WITH ii(i) AS ( | |
| 81 SELECT -1 | |
| 82 UNION ALL | |
| 83 SELECT i+1 FROM ii WHERE i<100000 | |
| 84 ) | |
| 85 SELECT count(*), min(i) FROM ii WHERE fts5_fold(i)!=CAST(tcl_fold(i) AS int); | |
| 86 } {0 {}} | |
| 87 | |
| 88 do_execsql_test 1.2 { | |
| 89 WITH ii(i) AS ( | |
| 90 SELECT -1 | |
| 91 UNION ALL | |
| 92 SELECT i+1 FROM ii WHERE i<100000 | |
| 93 ) | |
| 94 SELECT count(*), min(i) FROM ii | |
| 95 WHERE fts5_fold(i,1)!=CAST(tcl_fold(i,1) AS int); | |
| 96 } {0 {}} | |
| 97 | |
| 98 do_execsql_test 1.3 { | |
| 99 WITH ii(i) AS ( | |
| 100 SELECT -1 | |
| 101 UNION ALL | |
| 102 SELECT i+1 FROM ii WHERE i<100000 | |
| 103 ) | |
| 104 SELECT count(*), min(i) FROM ii | |
| 105 WHERE fts5_isalnum(i)!=CAST(tcl_isalnum(i) AS int); | |
| 106 } {0 {}} | |
| 107 | |
| 108 do_test 1.4 { | |
| 109 set str {CREATE VIRTUAL TABLE f3 USING fts5(a, tokenize=} | |
| 110 append str {"unicode61 separators '} | |
| 111 for {set i 700} {$i<900} {incr i} { | |
| 112 append str [format %c $i] | |
| 113 } | |
| 114 append str {'");} | |
| 115 execsql $str | |
| 116 } {} | |
| 117 do_test 1.5 { | |
| 118 set str {CREATE VIRTUAL TABLE f5 USING fts5(a, tokenize=} | |
| 119 append str {"unicode61 tokenchars '} | |
| 120 for {set i 700} {$i<900} {incr i} { | |
| 121 append str [format %c $i] | |
| 122 } | |
| 123 append str {'");} | |
| 124 execsql $str | |
| 125 } {} | |
| 126 | |
| 127 | |
| 128 finish_test | |
| 129 | |
| OLD | NEW |