| Index: third_party/sqlite/src/ext/fts5/tool/fts5txt2db.tcl
|
| diff --git a/third_party/sqlite/src/ext/fts5/tool/fts5txt2db.tcl b/third_party/sqlite/src/ext/fts5/tool/fts5txt2db.tcl
|
| index c22c5dae7270b7803ac5178cf9448b8030ec396d..4766b00b06b5c56040f5b6ef6b6f800d3064b95f 100644
|
| --- a/third_party/sqlite/src/ext/fts5/tool/fts5txt2db.tcl
|
| +++ b/third_party/sqlite/src/ext/fts5/tool/fts5txt2db.tcl
|
| @@ -1,80 +1,181 @@
|
| -
|
| -
|
| -proc usage {} {
|
| - puts stderr "$::argv0 ?OPTIONS? DATABASE FILE1..."
|
| - puts stderr ""
|
| - puts stderr "Options are"
|
| - puts stderr " -fts5"
|
| - puts stderr " -fts4"
|
| - puts stderr " -colsize <list of column sizes>"
|
| - puts stderr {
|
| -This script is designed to create fts4/5 tables with more than one column.
|
| -The -colsize option should be set to a Tcl list of integer values, one for
|
| -each column in the table. Each value is the number of tokens that will be
|
| -inserted into the column value for each row. For example, setting the -colsize
|
| -option to "5 10" creates an FTS table with 2 columns, with roughly 5 and 10
|
| -tokens per row in each, respectively.
|
| -
|
| -Each "FILE" argument should be a text file. The contents of these text files is
|
| -split on whitespace characters to form a list of tokens. The first N1 tokens
|
| -are used for the first column of the first row, where N1 is the first element
|
| -of the -colsize list. The next N2 are used for the second column of the first
|
| -row, and so on. Rows are added to the table until the entire list of tokens
|
| -is exhausted.
|
| -}
|
| - exit -1
|
| +##########################################################################
|
| +# 2016 Jan 27
|
| +#
|
| +# The author disclaims copyright to this source code. In place of
|
| +# a legal notice, here is a blessing:
|
| +#
|
| +# May you do good and not evil.
|
| +# May you find forgiveness for yourself and forgive others.
|
| +# May you share freely, never taking more than you give.
|
| +#
|
| +proc process_cmdline {} {
|
| + cmdline::process ::A $::argv {
|
| + {fts5 "use fts5 (this is the default)"}
|
| + {fts4 "use fts4"}
|
| + {colsize "10 10 10" "list of column sizes"}
|
| + {tblname "t1" "table name to create"}
|
| + {detail "full" "Fts5 detail mode to use"}
|
| + {repeat 1 "Load each file this many times"}
|
| + {prefix "" "Fts prefix= option"}
|
| + {trans 1 "True to use a transaction"}
|
| + database
|
| + file...
|
| + } {
|
| + This script is designed to create fts4/5 tables with more than one column.
|
| + The -colsize option should be set to a Tcl list of integer values, one for
|
| + each column in the table. Each value is the number of tokens that will be
|
| + inserted into the column value for each row. For example, setting the -colsize
|
| + option to "5 10" creates an FTS table with 2 columns, with roughly 5 and 10
|
| + tokens per row in each, respectively.
|
| +
|
| + Each "FILE" argument should be a text file. The contents of these text files
|
| + is split on whitespace characters to form a list of tokens. The first N1
|
| + tokens are used for the first column of the first row, where N1 is the first
|
| + element of the -colsize list. The next N2 are used for the second column of
|
| + the first row, and so on. Rows are added to the table until the entire list
|
| + of tokens is exhausted.
|
| + }
|
| }
|
|
|
| -set O(aColSize) [list 10 10 10]
|
| -set O(tblname) t1
|
| -set O(fts) fts5
|
| -
|
| -
|
| -set options_with_values {-colsize}
|
| -
|
| -for {set i 0} {$i < [llength $argv]} {incr i} {
|
| - set opt [lindex $argv $i]
|
| - if {[string range $opt 0 0]!="-"} break
|
| -
|
| - if {[lsearch $options_with_values $opt]>=0} {
|
| - incr i
|
| - if {$i==[llength $argv]} usage
|
| - set val [lindex $argv $i]
|
| +###########################################################################
|
| +###########################################################################
|
| +# Command line options processor. This is generic code that can be copied
|
| +# between scripts.
|
| +#
|
| +namespace eval cmdline {
|
| + proc cmdline_error {O E {msg ""}} {
|
| + if {$msg != ""} {
|
| + puts stderr "Error: $msg"
|
| + puts stderr ""
|
| + }
|
| +
|
| + set L [list]
|
| + foreach o $O {
|
| + if {[llength $o]==1} {
|
| + lappend L [string toupper $o]
|
| + }
|
| + }
|
| +
|
| + puts stderr "Usage: $::argv0 ?SWITCHES? $L"
|
| + puts stderr ""
|
| + puts stderr "Switches are:"
|
| + foreach o $O {
|
| + if {[llength $o]==3} {
|
| + foreach {a b c} $o {}
|
| + puts stderr [format " -%-15s %s (default \"%s\")" "$a VAL" $c $b]
|
| + } elseif {[llength $o]==2} {
|
| + foreach {a b} $o {}
|
| + puts stderr [format " -%-15s %s" $a $b]
|
| + }
|
| + }
|
| + puts stderr ""
|
| + puts stderr $E
|
| + exit -1
|
| }
|
| -
|
| - switch -- $opt {
|
| - -colsize {
|
| - set O(aColSize) $val
|
| +
|
| + proc process {avar lArgs O E} {
|
| + upvar $avar A
|
| + set zTrailing "" ;# True if ... is present in $O
|
| + set lPosargs [list]
|
| +
|
| + # Populate A() with default values. Also, for each switch in the command
|
| + # line spec, set an entry in the idx() array as follows:
|
| + #
|
| + # {tblname t1 "table name to use"}
|
| + # -> [set idx(-tblname) {tblname t1 "table name to use"}
|
| + #
|
| + # For each position parameter, append its name to $lPosargs. If the ...
|
| + # specifier is present, set $zTrailing to the name of the prefix.
|
| + #
|
| + foreach o $O {
|
| + set nm [lindex $o 0]
|
| + set nArg [llength $o]
|
| + switch -- $nArg {
|
| + 1 {
|
| + if {[string range $nm end-2 end]=="..."} {
|
| + set zTrailing [string range $nm 0 end-3]
|
| + } else {
|
| + lappend lPosargs $nm
|
| + }
|
| + }
|
| + 2 {
|
| + set A($nm) 0
|
| + set idx(-$nm) $o
|
| + }
|
| + 3 {
|
| + set A($nm) [lindex $o 1]
|
| + set idx(-$nm) $o
|
| + }
|
| + default {
|
| + error "Error in command line specification"
|
| + }
|
| + }
|
| }
|
| -
|
| - -fts4 {
|
| - set O(fts) fts4
|
| +
|
| + # Set explicitly specified option values
|
| + #
|
| + set nArg [llength $lArgs]
|
| + for {set i 0} {$i < $nArg} {incr i} {
|
| + set opt [lindex $lArgs $i]
|
| + if {[string range $opt 0 0]!="-" || $opt=="--"} break
|
| + set c [array names idx "${opt}*"]
|
| + if {[llength $c]==0} { cmdline_error $O $E "Unrecognized option: $opt"}
|
| + if {[llength $c]>1} { cmdline_error $O $E "Ambiguous option: $opt"}
|
| +
|
| + if {[llength $idx($c)]==3} {
|
| + if {$i==[llength $lArgs]-1} {
|
| + cmdline_error $O $E "Option requires argument: $c"
|
| + }
|
| + incr i
|
| + set A([lindex $idx($c) 0]) [lindex $lArgs $i]
|
| + } else {
|
| + set A([lindex $idx($c) 0]) 1
|
| + }
|
| }
|
| -
|
| - -fts5 {
|
| - set O(fts) fts5
|
| +
|
| + # Deal with position arguments.
|
| + #
|
| + set nPosarg [llength $lPosargs]
|
| + set nRem [expr $nArg - $i]
|
| + if {$nRem < $nPosarg || ($zTrailing=="" && $nRem > $nPosarg)} {
|
| + cmdline_error $O $E
|
| + }
|
| + for {set j 0} {$j < $nPosarg} {incr j} {
|
| + set A([lindex $lPosargs $j]) [lindex $lArgs [expr $j+$i]]
|
| + }
|
| + if {$zTrailing!=""} {
|
| + set A($zTrailing) [lrange $lArgs [expr $j+$i] end]
|
| }
|
| }
|
| +} ;# namespace eval cmdline
|
| +# End of command line options processor.
|
| +###########################################################################
|
| +###########################################################################
|
| +
|
| +process_cmdline
|
| +
|
| +# If -fts4 was specified, use fts4. Otherwise, fts5.
|
| +if {$A(fts4)} {
|
| + set A(fts) fts4
|
| +} else {
|
| + set A(fts) fts5
|
| }
|
|
|
| -if {$i > [llength $argv]-2} usage
|
| -set O(db) [lindex $argv $i]
|
| -set O(files) [lrange $argv [expr $i+1] end]
|
| -
|
| -sqlite3 db $O(db)
|
| +sqlite3 db $A(database)
|
|
|
| # Create the FTS table in the db. Return a list of the table columns.
|
| #
|
| proc create_table {} {
|
| - global O
|
| + global A
|
| set cols [list a b c d e f g h i j k l m n o p q r s t u v w x y z]
|
|
|
| - set nCol [llength $O(aColSize)]
|
| + set nCol [llength $A(colsize)]
|
| set cols [lrange $cols 0 [expr $nCol-1]]
|
|
|
| - set sql "CREATE VIRTUAL TABLE IF NOT EXISTS $O(tblname) USING $O(fts) ("
|
| + set sql "CREATE VIRTUAL TABLE IF NOT EXISTS $A(tblname) USING $A(fts) ("
|
| append sql [join $cols ,]
|
| - append sql ");"
|
| + if {$A(fts)=="fts5"} { append sql ",detail=$A(detail)" }
|
| + append sql ", prefix='$A(prefix)');"
|
|
|
| db eval $sql
|
| return $cols
|
| @@ -89,32 +190,40 @@ proc readfile {file} {
|
| split $data
|
| }
|
|
|
| +proc repeat {L n} {
|
| + set res [list]
|
| + for {set i 0} {$i < $n} {incr i} {
|
| + set res [concat $res $L]
|
| + }
|
| + set res
|
| +}
|
| +
|
|
|
| # Load all the data into a big list of tokens.
|
| #
|
| set tokens [list]
|
| -foreach f $O(files) {
|
| - set tokens [concat $tokens [readfile $f]]
|
| +foreach f $A(file) {
|
| + set tokens [concat $tokens [repeat [readfile $f] $A(repeat)]]
|
| }
|
|
|
| set N [llength $tokens]
|
| set i 0
|
| set cols [create_table]
|
| -set sql "INSERT INTO $O(tblname) VALUES(\$[lindex $cols 0]"
|
| +set sql "INSERT INTO $A(tblname) VALUES(\$R([lindex $cols 0])"
|
| foreach c [lrange $cols 1 end] {
|
| - append sql ", \$A($c)"
|
| + append sql ", \$R($c)"
|
| }
|
| append sql ")"
|
|
|
| -db eval BEGIN
|
| +if {$A(trans)} { db eval BEGIN }
|
| while {$i < $N} {
|
| - foreach c $cols s $O(aColSize) {
|
| - set A($c) [lrange $tokens $i [expr $i+$s-1]]
|
| + foreach c $cols s $A(colsize) {
|
| + set R($c) [lrange $tokens $i [expr $i+$s-1]]
|
| incr i $s
|
| }
|
| db eval $sql
|
| }
|
| -db eval COMMIT
|
| +if {$A(trans)} { db eval COMMIT }
|
|
|
|
|
|
|
|
|