Index: third_party/sqlite/src/ext/fts5/tool/fts5txt2db.tcl |
diff --git a/third_party/sqlite/src/ext/fts5/tool/fts5txt2db.tcl b/third_party/sqlite/src/ext/fts5/tool/fts5txt2db.tcl |
index c22c5dae7270b7803ac5178cf9448b8030ec396d..4766b00b06b5c56040f5b6ef6b6f800d3064b95f 100644 |
--- a/third_party/sqlite/src/ext/fts5/tool/fts5txt2db.tcl |
+++ b/third_party/sqlite/src/ext/fts5/tool/fts5txt2db.tcl |
@@ -1,80 +1,181 @@ |
- |
- |
-proc usage {} { |
- puts stderr "$::argv0 ?OPTIONS? DATABASE FILE1..." |
- puts stderr "" |
- puts stderr "Options are" |
- puts stderr " -fts5" |
- puts stderr " -fts4" |
- puts stderr " -colsize <list of column sizes>" |
- puts stderr { |
-This script is designed to create fts4/5 tables with more than one column. |
-The -colsize option should be set to a Tcl list of integer values, one for |
-each column in the table. Each value is the number of tokens that will be |
-inserted into the column value for each row. For example, setting the -colsize |
-option to "5 10" creates an FTS table with 2 columns, with roughly 5 and 10 |
-tokens per row in each, respectively. |
- |
-Each "FILE" argument should be a text file. The contents of these text files is |
-split on whitespace characters to form a list of tokens. The first N1 tokens |
-are used for the first column of the first row, where N1 is the first element |
-of the -colsize list. The next N2 are used for the second column of the first |
-row, and so on. Rows are added to the table until the entire list of tokens |
-is exhausted. |
-} |
- exit -1 |
+########################################################################## |
+# 2016 Jan 27 |
+# |
+# The author disclaims copyright to this source code. In place of |
+# a legal notice, here is a blessing: |
+# |
+# May you do good and not evil. |
+# May you find forgiveness for yourself and forgive others. |
+# May you share freely, never taking more than you give. |
+# |
+proc process_cmdline {} { |
+ cmdline::process ::A $::argv { |
+ {fts5 "use fts5 (this is the default)"} |
+ {fts4 "use fts4"} |
+ {colsize "10 10 10" "list of column sizes"} |
+ {tblname "t1" "table name to create"} |
+ {detail "full" "Fts5 detail mode to use"} |
+ {repeat 1 "Load each file this many times"} |
+ {prefix "" "Fts prefix= option"} |
+ {trans 1 "True to use a transaction"} |
+ database |
+ file... |
+ } { |
+ This script is designed to create fts4/5 tables with more than one column. |
+ The -colsize option should be set to a Tcl list of integer values, one for |
+ each column in the table. Each value is the number of tokens that will be |
+ inserted into the column value for each row. For example, setting the -colsize |
+ option to "5 10" creates an FTS table with 2 columns, with roughly 5 and 10 |
+ tokens per row in each, respectively. |
+ |
+ Each "FILE" argument should be a text file. The contents of these text files |
+ is split on whitespace characters to form a list of tokens. The first N1 |
+ tokens are used for the first column of the first row, where N1 is the first |
+ element of the -colsize list. The next N2 are used for the second column of |
+ the first row, and so on. Rows are added to the table until the entire list |
+ of tokens is exhausted. |
+ } |
} |
-set O(aColSize) [list 10 10 10] |
-set O(tblname) t1 |
-set O(fts) fts5 |
- |
- |
-set options_with_values {-colsize} |
- |
-for {set i 0} {$i < [llength $argv]} {incr i} { |
- set opt [lindex $argv $i] |
- if {[string range $opt 0 0]!="-"} break |
- |
- if {[lsearch $options_with_values $opt]>=0} { |
- incr i |
- if {$i==[llength $argv]} usage |
- set val [lindex $argv $i] |
+########################################################################### |
+########################################################################### |
+# Command line options processor. This is generic code that can be copied |
+# between scripts. |
+# |
+namespace eval cmdline { |
+ proc cmdline_error {O E {msg ""}} { |
+ if {$msg != ""} { |
+ puts stderr "Error: $msg" |
+ puts stderr "" |
+ } |
+ |
+ set L [list] |
+ foreach o $O { |
+ if {[llength $o]==1} { |
+ lappend L [string toupper $o] |
+ } |
+ } |
+ |
+ puts stderr "Usage: $::argv0 ?SWITCHES? $L" |
+ puts stderr "" |
+ puts stderr "Switches are:" |
+ foreach o $O { |
+ if {[llength $o]==3} { |
+ foreach {a b c} $o {} |
+ puts stderr [format " -%-15s %s (default \"%s\")" "$a VAL" $c $b] |
+ } elseif {[llength $o]==2} { |
+ foreach {a b} $o {} |
+ puts stderr [format " -%-15s %s" $a $b] |
+ } |
+ } |
+ puts stderr "" |
+ puts stderr $E |
+ exit -1 |
} |
- |
- switch -- $opt { |
- -colsize { |
- set O(aColSize) $val |
+ |
+ proc process {avar lArgs O E} { |
+ upvar $avar A |
+ set zTrailing "" ;# True if ... is present in $O |
+ set lPosargs [list] |
+ |
+ # Populate A() with default values. Also, for each switch in the command |
+ # line spec, set an entry in the idx() array as follows: |
+ # |
+ # {tblname t1 "table name to use"} |
+ # -> [set idx(-tblname) {tblname t1 "table name to use"} |
+ # |
+ # For each position parameter, append its name to $lPosargs. If the ... |
+ # specifier is present, set $zTrailing to the name of the prefix. |
+ # |
+ foreach o $O { |
+ set nm [lindex $o 0] |
+ set nArg [llength $o] |
+ switch -- $nArg { |
+ 1 { |
+ if {[string range $nm end-2 end]=="..."} { |
+ set zTrailing [string range $nm 0 end-3] |
+ } else { |
+ lappend lPosargs $nm |
+ } |
+ } |
+ 2 { |
+ set A($nm) 0 |
+ set idx(-$nm) $o |
+ } |
+ 3 { |
+ set A($nm) [lindex $o 1] |
+ set idx(-$nm) $o |
+ } |
+ default { |
+ error "Error in command line specification" |
+ } |
+ } |
} |
- |
- -fts4 { |
- set O(fts) fts4 |
+ |
+ # Set explicitly specified option values |
+ # |
+ set nArg [llength $lArgs] |
+ for {set i 0} {$i < $nArg} {incr i} { |
+ set opt [lindex $lArgs $i] |
+ if {[string range $opt 0 0]!="-" || $opt=="--"} break |
+ set c [array names idx "${opt}*"] |
+ if {[llength $c]==0} { cmdline_error $O $E "Unrecognized option: $opt"} |
+ if {[llength $c]>1} { cmdline_error $O $E "Ambiguous option: $opt"} |
+ |
+ if {[llength $idx($c)]==3} { |
+ if {$i==[llength $lArgs]-1} { |
+ cmdline_error $O $E "Option requires argument: $c" |
+ } |
+ incr i |
+ set A([lindex $idx($c) 0]) [lindex $lArgs $i] |
+ } else { |
+ set A([lindex $idx($c) 0]) 1 |
+ } |
} |
- |
- -fts5 { |
- set O(fts) fts5 |
+ |
+ # Deal with position arguments. |
+ # |
+ set nPosarg [llength $lPosargs] |
+ set nRem [expr $nArg - $i] |
+ if {$nRem < $nPosarg || ($zTrailing=="" && $nRem > $nPosarg)} { |
+ cmdline_error $O $E |
+ } |
+ for {set j 0} {$j < $nPosarg} {incr j} { |
+ set A([lindex $lPosargs $j]) [lindex $lArgs [expr $j+$i]] |
+ } |
+ if {$zTrailing!=""} { |
+ set A($zTrailing) [lrange $lArgs [expr $j+$i] end] |
} |
} |
+} ;# namespace eval cmdline |
+# End of command line options processor. |
+########################################################################### |
+########################################################################### |
+ |
+process_cmdline |
+ |
+# If -fts4 was specified, use fts4. Otherwise, fts5. |
+if {$A(fts4)} { |
+ set A(fts) fts4 |
+} else { |
+ set A(fts) fts5 |
} |
-if {$i > [llength $argv]-2} usage |
-set O(db) [lindex $argv $i] |
-set O(files) [lrange $argv [expr $i+1] end] |
- |
-sqlite3 db $O(db) |
+sqlite3 db $A(database) |
# Create the FTS table in the db. Return a list of the table columns. |
# |
proc create_table {} { |
- global O |
+ global A |
set cols [list a b c d e f g h i j k l m n o p q r s t u v w x y z] |
- set nCol [llength $O(aColSize)] |
+ set nCol [llength $A(colsize)] |
set cols [lrange $cols 0 [expr $nCol-1]] |
- set sql "CREATE VIRTUAL TABLE IF NOT EXISTS $O(tblname) USING $O(fts) (" |
+ set sql "CREATE VIRTUAL TABLE IF NOT EXISTS $A(tblname) USING $A(fts) (" |
append sql [join $cols ,] |
- append sql ");" |
+ if {$A(fts)=="fts5"} { append sql ",detail=$A(detail)" } |
+ append sql ", prefix='$A(prefix)');" |
db eval $sql |
return $cols |
@@ -89,32 +190,40 @@ proc readfile {file} { |
split $data |
} |
+proc repeat {L n} { |
+ set res [list] |
+ for {set i 0} {$i < $n} {incr i} { |
+ set res [concat $res $L] |
+ } |
+ set res |
+} |
+ |
# Load all the data into a big list of tokens. |
# |
set tokens [list] |
-foreach f $O(files) { |
- set tokens [concat $tokens [readfile $f]] |
+foreach f $A(file) { |
+ set tokens [concat $tokens [repeat [readfile $f] $A(repeat)]] |
} |
set N [llength $tokens] |
set i 0 |
set cols [create_table] |
-set sql "INSERT INTO $O(tblname) VALUES(\$[lindex $cols 0]" |
+set sql "INSERT INTO $A(tblname) VALUES(\$R([lindex $cols 0])" |
foreach c [lrange $cols 1 end] { |
- append sql ", \$A($c)" |
+ append sql ", \$R($c)" |
} |
append sql ")" |
-db eval BEGIN |
+if {$A(trans)} { db eval BEGIN } |
while {$i < $N} { |
- foreach c $cols s $O(aColSize) { |
- set A($c) [lrange $tokens $i [expr $i+$s-1]] |
+ foreach c $cols s $A(colsize) { |
+ set R($c) [lrange $tokens $i [expr $i+$s-1]] |
incr i $s |
} |
db eval $sql |
} |
-db eval COMMIT |
+if {$A(trans)} { db eval COMMIT } |