OLD | NEW |
(Empty) | |
| 1 ########################################################################## |
| 2 # 2016 Jan 27 |
| 3 # |
| 4 # The author disclaims copyright to this source code. In place of |
| 5 # a legal notice, here is a blessing: |
| 6 # |
| 7 # May you do good and not evil. |
| 8 # May you find forgiveness for yourself and forgive others. |
| 9 # May you share freely, never taking more than you give. |
| 10 # |
| 11 proc process_cmdline {} { |
| 12 cmdline::process ::A $::argv { |
| 13 {fts5 "use fts5 (this is the default)"} |
| 14 {fts4 "use fts4"} |
| 15 {colsize "10 10 10" "list of column sizes"} |
| 16 {tblname "t1" "table name to create"} |
| 17 {detail "full" "Fts5 detail mode to use"} |
| 18 {repeat 1 "Load each file this many times"} |
| 19 {prefix "" "Fts prefix= option"} |
| 20 {trans 1 "True to use a transaction"} |
| 21 database |
| 22 file... |
| 23 } { |
| 24 This script is designed to create fts4/5 tables with more than one column. |
| 25 The -colsize option should be set to a Tcl list of integer values, one for |
| 26 each column in the table. Each value is the number of tokens that will be |
| 27 inserted into the column value for each row. For example, setting the -colsize |
| 28 option to "5 10" creates an FTS table with 2 columns, with roughly 5 and 10 |
| 29 tokens per row in each, respectively. |
| 30 |
| 31 Each "FILE" argument should be a text file. The contents of these text files |
| 32 is split on whitespace characters to form a list of tokens. The first N1 |
| 33 tokens are used for the first column of the first row, where N1 is the first |
| 34 element of the -colsize list. The next N2 are used for the second column of |
| 35 the first row, and so on. Rows are added to the table until the entire list |
| 36 of tokens is exhausted. |
| 37 } |
| 38 } |
| 39 |
| 40 ########################################################################### |
| 41 ########################################################################### |
| 42 # Command line options processor. This is generic code that can be copied |
| 43 # between scripts. |
| 44 # |
| 45 namespace eval cmdline { |
| 46 proc cmdline_error {O E {msg ""}} { |
| 47 if {$msg != ""} { |
| 48 puts stderr "Error: $msg" |
| 49 puts stderr "" |
| 50 } |
| 51 |
| 52 set L [list] |
| 53 foreach o $O { |
| 54 if {[llength $o]==1} { |
| 55 lappend L [string toupper $o] |
| 56 } |
| 57 } |
| 58 |
| 59 puts stderr "Usage: $::argv0 ?SWITCHES? $L" |
| 60 puts stderr "" |
| 61 puts stderr "Switches are:" |
| 62 foreach o $O { |
| 63 if {[llength $o]==3} { |
| 64 foreach {a b c} $o {} |
| 65 puts stderr [format " -%-15s %s (default \"%s\")" "$a VAL" $c $b] |
| 66 } elseif {[llength $o]==2} { |
| 67 foreach {a b} $o {} |
| 68 puts stderr [format " -%-15s %s" $a $b] |
| 69 } |
| 70 } |
| 71 puts stderr "" |
| 72 puts stderr $E |
| 73 exit -1 |
| 74 } |
| 75 |
| 76 proc process {avar lArgs O E} { |
| 77 upvar $avar A |
| 78 set zTrailing "" ;# True if ... is present in $O |
| 79 set lPosargs [list] |
| 80 |
| 81 # Populate A() with default values. Also, for each switch in the command |
| 82 # line spec, set an entry in the idx() array as follows: |
| 83 # |
| 84 # {tblname t1 "table name to use"} |
| 85 # -> [set idx(-tblname) {tblname t1 "table name to use"} |
| 86 # |
| 87 # For each position parameter, append its name to $lPosargs. If the ... |
| 88 # specifier is present, set $zTrailing to the name of the prefix. |
| 89 # |
| 90 foreach o $O { |
| 91 set nm [lindex $o 0] |
| 92 set nArg [llength $o] |
| 93 switch -- $nArg { |
| 94 1 { |
| 95 if {[string range $nm end-2 end]=="..."} { |
| 96 set zTrailing [string range $nm 0 end-3] |
| 97 } else { |
| 98 lappend lPosargs $nm |
| 99 } |
| 100 } |
| 101 2 { |
| 102 set A($nm) 0 |
| 103 set idx(-$nm) $o |
| 104 } |
| 105 3 { |
| 106 set A($nm) [lindex $o 1] |
| 107 set idx(-$nm) $o |
| 108 } |
| 109 default { |
| 110 error "Error in command line specification" |
| 111 } |
| 112 } |
| 113 } |
| 114 |
| 115 # Set explicitly specified option values |
| 116 # |
| 117 set nArg [llength $lArgs] |
| 118 for {set i 0} {$i < $nArg} {incr i} { |
| 119 set opt [lindex $lArgs $i] |
| 120 if {[string range $opt 0 0]!="-" || $opt=="--"} break |
| 121 set c [array names idx "${opt}*"] |
| 122 if {[llength $c]==0} { cmdline_error $O $E "Unrecognized option: $opt"} |
| 123 if {[llength $c]>1} { cmdline_error $O $E "Ambiguous option: $opt"} |
| 124 |
| 125 if {[llength $idx($c)]==3} { |
| 126 if {$i==[llength $lArgs]-1} { |
| 127 cmdline_error $O $E "Option requires argument: $c" |
| 128 } |
| 129 incr i |
| 130 set A([lindex $idx($c) 0]) [lindex $lArgs $i] |
| 131 } else { |
| 132 set A([lindex $idx($c) 0]) 1 |
| 133 } |
| 134 } |
| 135 |
| 136 # Deal with position arguments. |
| 137 # |
| 138 set nPosarg [llength $lPosargs] |
| 139 set nRem [expr $nArg - $i] |
| 140 if {$nRem < $nPosarg || ($zTrailing=="" && $nRem > $nPosarg)} { |
| 141 cmdline_error $O $E |
| 142 } |
| 143 for {set j 0} {$j < $nPosarg} {incr j} { |
| 144 set A([lindex $lPosargs $j]) [lindex $lArgs [expr $j+$i]] |
| 145 } |
| 146 if {$zTrailing!=""} { |
| 147 set A($zTrailing) [lrange $lArgs [expr $j+$i] end] |
| 148 } |
| 149 } |
| 150 } ;# namespace eval cmdline |
| 151 # End of command line options processor. |
| 152 ########################################################################### |
| 153 ########################################################################### |
| 154 |
| 155 process_cmdline |
| 156 |
| 157 # If -fts4 was specified, use fts4. Otherwise, fts5. |
| 158 if {$A(fts4)} { |
| 159 set A(fts) fts4 |
| 160 } else { |
| 161 set A(fts) fts5 |
| 162 } |
| 163 |
| 164 sqlite3 db $A(database) |
| 165 |
| 166 # Create the FTS table in the db. Return a list of the table columns. |
| 167 # |
| 168 proc create_table {} { |
| 169 global A |
| 170 set cols [list a b c d e f g h i j k l m n o p q r s t u v w x y z] |
| 171 |
| 172 set nCol [llength $A(colsize)] |
| 173 set cols [lrange $cols 0 [expr $nCol-1]] |
| 174 |
| 175 set sql "CREATE VIRTUAL TABLE IF NOT EXISTS $A(tblname) USING $A(fts) (" |
| 176 append sql [join $cols ,] |
| 177 if {$A(fts)=="fts5"} { append sql ",detail=$A(detail)" } |
| 178 append sql ", prefix='$A(prefix)');" |
| 179 |
| 180 db eval $sql |
| 181 return $cols |
| 182 } |
| 183 |
| 184 # Return a list of tokens from the named file. |
| 185 # |
| 186 proc readfile {file} { |
| 187 set fd [open $file] |
| 188 set data [read $fd] |
| 189 close $fd |
| 190 split $data |
| 191 } |
| 192 |
| 193 proc repeat {L n} { |
| 194 set res [list] |
| 195 for {set i 0} {$i < $n} {incr i} { |
| 196 set res [concat $res $L] |
| 197 } |
| 198 set res |
| 199 } |
| 200 |
| 201 |
| 202 # Load all the data into a big list of tokens. |
| 203 # |
| 204 set tokens [list] |
| 205 foreach f $A(file) { |
| 206 set tokens [concat $tokens [repeat [readfile $f] $A(repeat)]] |
| 207 } |
| 208 |
| 209 set N [llength $tokens] |
| 210 set i 0 |
| 211 set cols [create_table] |
| 212 set sql "INSERT INTO $A(tblname) VALUES(\$R([lindex $cols 0])" |
| 213 foreach c [lrange $cols 1 end] { |
| 214 append sql ", \$R($c)" |
| 215 } |
| 216 append sql ")" |
| 217 |
| 218 if {$A(trans)} { db eval BEGIN } |
| 219 while {$i < $N} { |
| 220 foreach c $cols s $A(colsize) { |
| 221 set R($c) [lrange $tokens $i [expr $i+$s-1]] |
| 222 incr i $s |
| 223 } |
| 224 db eval $sql |
| 225 } |
| 226 if {$A(trans)} { db eval COMMIT } |
| 227 |
| 228 |
| 229 |
OLD | NEW |