OLD | NEW |
1 | 1 ########################################################################## |
2 | 2 # 2016 Jan 27 |
3 proc usage {} { | 3 # |
4 puts stderr "$::argv0 ?OPTIONS? DATABASE FILE1..." | 4 # The author disclaims copyright to this source code. In place of |
5 puts stderr "" | 5 # a legal notice, here is a blessing: |
6 puts stderr "Options are" | 6 # |
7 puts stderr " -fts5" | 7 # May you do good and not evil. |
8 puts stderr " -fts4" | 8 # May you find forgiveness for yourself and forgive others. |
9 puts stderr " -colsize <list of column sizes>" | 9 # May you share freely, never taking more than you give. |
10 puts stderr { | 10 # |
11 This script is designed to create fts4/5 tables with more than one column. | 11 proc process_cmdline {} { |
12 The -colsize option should be set to a Tcl list of integer values, one for | 12 cmdline::process ::A $::argv { |
13 each column in the table. Each value is the number of tokens that will be | 13 {fts5 "use fts5 (this is the default)"} |
14 inserted into the column value for each row. For example, setting the -colsize | 14 {fts4 "use fts4"} |
15 option to "5 10" creates an FTS table with 2 columns, with roughly 5 and 10 | 15 {colsize "10 10 10" "list of column sizes"} |
16 tokens per row in each, respectively. | 16 {tblname "t1" "table name to create"} |
17 | 17 {detail "full" "Fts5 detail mode to use"} |
18 Each "FILE" argument should be a text file. The contents of these text files is | 18 {repeat 1 "Load each file this many times"} |
19 split on whitespace characters to form a list of tokens. The first N1 tokens | 19 {prefix "" "Fts prefix= option"} |
20 are used for the first column of the first row, where N1 is the first element | 20 {trans 1 "True to use a transaction"} |
21 of the -colsize list. The next N2 are used for the second column of the first | 21 database |
22 row, and so on. Rows are added to the table until the entire list of tokens | 22 file... |
23 is exhausted. | 23 } { |
24 } | 24 This script is designed to create fts4/5 tables with more than one column. |
25 exit -1 | 25 The -colsize option should be set to a Tcl list of integer values, one for |
26 } | 26 each column in the table. Each value is the number of tokens that will be |
27 | 27 inserted into the column value for each row. For example, setting the -colsize |
28 set O(aColSize) [list 10 10 10] | 28 option to "5 10" creates an FTS table with 2 columns, with roughly 5 and 10 |
29 set O(tblname) t1 | 29 tokens per row in each, respectively. |
30 set O(fts) fts5 | 30 |
31 | 31 Each "FILE" argument should be a text file. The contents of these text files |
32 | 32 is split on whitespace characters to form a list of tokens. The first N1 |
33 set options_with_values {-colsize} | 33 tokens are used for the first column of the first row, where N1 is the first |
34 | 34 element of the -colsize list. The next N2 are used for the second column of |
35 for {set i 0} {$i < [llength $argv]} {incr i} { | 35 the first row, and so on. Rows are added to the table until the entire list |
36 set opt [lindex $argv $i] | 36 of tokens is exhausted. |
37 if {[string range $opt 0 0]!="-"} break | |
38 | |
39 if {[lsearch $options_with_values $opt]>=0} { | |
40 incr i | |
41 if {$i==[llength $argv]} usage | |
42 set val [lindex $argv $i] | |
43 } | |
44 | |
45 switch -- $opt { | |
46 -colsize { | |
47 set O(aColSize) $val | |
48 } | |
49 | |
50 -fts4 { | |
51 set O(fts) fts4 | |
52 } | |
53 | |
54 -fts5 { | |
55 set O(fts) fts5 | |
56 } | |
57 } | 37 } |
58 } | 38 } |
59 | 39 |
60 if {$i > [llength $argv]-2} usage | 40 ########################################################################### |
61 set O(db) [lindex $argv $i] | 41 ########################################################################### |
62 set O(files) [lrange $argv [expr $i+1] end] | 42 # Command line options processor. This is generic code that can be copied |
| 43 # between scripts. |
| 44 # |
| 45 namespace eval cmdline { |
| 46 proc cmdline_error {O E {msg ""}} { |
| 47 if {$msg != ""} { |
| 48 puts stderr "Error: $msg" |
| 49 puts stderr "" |
| 50 } |
| 51 |
| 52 set L [list] |
| 53 foreach o $O { |
| 54 if {[llength $o]==1} { |
| 55 lappend L [string toupper $o] |
| 56 } |
| 57 } |
| 58 |
| 59 puts stderr "Usage: $::argv0 ?SWITCHES? $L" |
| 60 puts stderr "" |
| 61 puts stderr "Switches are:" |
| 62 foreach o $O { |
| 63 if {[llength $o]==3} { |
| 64 foreach {a b c} $o {} |
| 65 puts stderr [format " -%-15s %s (default \"%s\")" "$a VAL" $c $b] |
| 66 } elseif {[llength $o]==2} { |
| 67 foreach {a b} $o {} |
| 68 puts stderr [format " -%-15s %s" $a $b] |
| 69 } |
| 70 } |
| 71 puts stderr "" |
| 72 puts stderr $E |
| 73 exit -1 |
| 74 } |
| 75 |
| 76 proc process {avar lArgs O E} { |
| 77 upvar $avar A |
| 78 set zTrailing "" ;# True if ... is present in $O |
| 79 set lPosargs [list] |
| 80 |
| 81 # Populate A() with default values. Also, for each switch in the command |
| 82 # line spec, set an entry in the idx() array as follows: |
| 83 # |
| 84 # {tblname t1 "table name to use"} |
| 85 # -> [set idx(-tblname) {tblname t1 "table name to use"} |
| 86 # |
| 87 # For each position parameter, append its name to $lPosargs. If the ... |
| 88 # specifier is present, set $zTrailing to the name of the prefix. |
| 89 # |
| 90 foreach o $O { |
| 91 set nm [lindex $o 0] |
| 92 set nArg [llength $o] |
| 93 switch -- $nArg { |
| 94 1 { |
| 95 if {[string range $nm end-2 end]=="..."} { |
| 96 set zTrailing [string range $nm 0 end-3] |
| 97 } else { |
| 98 lappend lPosargs $nm |
| 99 } |
| 100 } |
| 101 2 { |
| 102 set A($nm) 0 |
| 103 set idx(-$nm) $o |
| 104 } |
| 105 3 { |
| 106 set A($nm) [lindex $o 1] |
| 107 set idx(-$nm) $o |
| 108 } |
| 109 default { |
| 110 error "Error in command line specification" |
| 111 } |
| 112 } |
| 113 } |
| 114 |
| 115 # Set explicitly specified option values |
| 116 # |
| 117 set nArg [llength $lArgs] |
| 118 for {set i 0} {$i < $nArg} {incr i} { |
| 119 set opt [lindex $lArgs $i] |
| 120 if {[string range $opt 0 0]!="-" || $opt=="--"} break |
| 121 set c [array names idx "${opt}*"] |
| 122 if {[llength $c]==0} { cmdline_error $O $E "Unrecognized option: $opt"} |
| 123 if {[llength $c]>1} { cmdline_error $O $E "Ambiguous option: $opt"} |
| 124 |
| 125 if {[llength $idx($c)]==3} { |
| 126 if {$i==[llength $lArgs]-1} { |
| 127 cmdline_error $O $E "Option requires argument: $c" |
| 128 } |
| 129 incr i |
| 130 set A([lindex $idx($c) 0]) [lindex $lArgs $i] |
| 131 } else { |
| 132 set A([lindex $idx($c) 0]) 1 |
| 133 } |
| 134 } |
| 135 |
| 136 # Deal with position arguments. |
| 137 # |
| 138 set nPosarg [llength $lPosargs] |
| 139 set nRem [expr $nArg - $i] |
| 140 if {$nRem < $nPosarg || ($zTrailing=="" && $nRem > $nPosarg)} { |
| 141 cmdline_error $O $E |
| 142 } |
| 143 for {set j 0} {$j < $nPosarg} {incr j} { |
| 144 set A([lindex $lPosargs $j]) [lindex $lArgs [expr $j+$i]] |
| 145 } |
| 146 if {$zTrailing!=""} { |
| 147 set A($zTrailing) [lrange $lArgs [expr $j+$i] end] |
| 148 } |
| 149 } |
| 150 } ;# namespace eval cmdline |
| 151 # End of command line options processor. |
| 152 ########################################################################### |
| 153 ########################################################################### |
63 | 154 |
64 sqlite3 db $O(db) | 155 process_cmdline |
| 156 |
| 157 # If -fts4 was specified, use fts4. Otherwise, fts5. |
| 158 if {$A(fts4)} { |
| 159 set A(fts) fts4 |
| 160 } else { |
| 161 set A(fts) fts5 |
| 162 } |
| 163 |
| 164 sqlite3 db $A(database) |
65 | 165 |
66 # Create the FTS table in the db. Return a list of the table columns. | 166 # Create the FTS table in the db. Return a list of the table columns. |
67 # | 167 # |
68 proc create_table {} { | 168 proc create_table {} { |
69 global O | 169 global A |
70 set cols [list a b c d e f g h i j k l m n o p q r s t u v w x y z] | 170 set cols [list a b c d e f g h i j k l m n o p q r s t u v w x y z] |
71 | 171 |
72 set nCol [llength $O(aColSize)] | 172 set nCol [llength $A(colsize)] |
73 set cols [lrange $cols 0 [expr $nCol-1]] | 173 set cols [lrange $cols 0 [expr $nCol-1]] |
74 | 174 |
75 set sql "CREATE VIRTUAL TABLE IF NOT EXISTS $O(tblname) USING $O(fts) (" | 175 set sql "CREATE VIRTUAL TABLE IF NOT EXISTS $A(tblname) USING $A(fts) (" |
76 append sql [join $cols ,] | 176 append sql [join $cols ,] |
77 append sql ");" | 177 if {$A(fts)=="fts5"} { append sql ",detail=$A(detail)" } |
| 178 append sql ", prefix='$A(prefix)');" |
78 | 179 |
79 db eval $sql | 180 db eval $sql |
80 return $cols | 181 return $cols |
81 } | 182 } |
82 | 183 |
83 # Return a list of tokens from the named file. | 184 # Return a list of tokens from the named file. |
84 # | 185 # |
85 proc readfile {file} { | 186 proc readfile {file} { |
86 set fd [open $file] | 187 set fd [open $file] |
87 set data [read $fd] | 188 set data [read $fd] |
88 close $fd | 189 close $fd |
89 split $data | 190 split $data |
90 } | 191 } |
91 | 192 |
| 193 proc repeat {L n} { |
| 194 set res [list] |
| 195 for {set i 0} {$i < $n} {incr i} { |
| 196 set res [concat $res $L] |
| 197 } |
| 198 set res |
| 199 } |
| 200 |
92 | 201 |
93 # Load all the data into a big list of tokens. | 202 # Load all the data into a big list of tokens. |
94 # | 203 # |
95 set tokens [list] | 204 set tokens [list] |
96 foreach f $O(files) { | 205 foreach f $A(file) { |
97 set tokens [concat $tokens [readfile $f]] | 206 set tokens [concat $tokens [repeat [readfile $f] $A(repeat)]] |
98 } | 207 } |
99 | 208 |
100 set N [llength $tokens] | 209 set N [llength $tokens] |
101 set i 0 | 210 set i 0 |
102 set cols [create_table] | 211 set cols [create_table] |
103 set sql "INSERT INTO $O(tblname) VALUES(\$[lindex $cols 0]" | 212 set sql "INSERT INTO $A(tblname) VALUES(\$R([lindex $cols 0])" |
104 foreach c [lrange $cols 1 end] { | 213 foreach c [lrange $cols 1 end] { |
105 append sql ", \$A($c)" | 214 append sql ", \$R($c)" |
106 } | 215 } |
107 append sql ")" | 216 append sql ")" |
108 | 217 |
109 db eval BEGIN | 218 if {$A(trans)} { db eval BEGIN } |
110 while {$i < $N} { | 219 while {$i < $N} { |
111 foreach c $cols s $O(aColSize) { | 220 foreach c $cols s $A(colsize) { |
112 set A($c) [lrange $tokens $i [expr $i+$s-1]] | 221 set R($c) [lrange $tokens $i [expr $i+$s-1]] |
113 incr i $s | 222 incr i $s |
114 } | 223 } |
115 db eval $sql | 224 db eval $sql |
116 } | 225 } |
117 db eval COMMIT | 226 if {$A(trans)} { db eval COMMIT } |
118 | 227 |
119 | 228 |
120 | 229 |
OLD | NEW |