Index: third_party/sqlite/src/tool/spaceanal.tcl |
diff --git a/third_party/sqlite/src/tool/spaceanal.tcl b/third_party/sqlite/src/tool/spaceanal.tcl |
index bf6244e8c994c269b3f7d4cfd356bd7da6548574..a227b8524359c00b9e75f052c092d5e65ab0575f 100644 |
--- a/third_party/sqlite/src/tool/spaceanal.tcl |
+++ b/third_party/sqlite/src/tool/spaceanal.tcl |
@@ -4,36 +4,114 @@ |
# |
if {[catch { |
- |
# Get the name of the database to analyze |
# |
-#set argv $argv0 |
-if {[llength $argv]!=1} { |
+proc usage {} { |
+ set argv0 [file rootname [file tail [info nameofexecutable]]] |
puts stderr "Usage: $argv0 database-name" |
exit 1 |
} |
-set file_to_analyze [lindex $argv 0] |
-if {![file exists $file_to_analyze]} { |
- puts stderr "No such file: $file_to_analyze" |
+set file_to_analyze {} |
+set flags(-pageinfo) 0 |
+set flags(-stats) 0 |
+append argv {} |
+foreach arg $argv { |
+ if {[regexp {^-+pageinfo$} $arg]} { |
+ set flags(-pageinfo) 1 |
+ } elseif {[regexp {^-+stats$} $arg]} { |
+ set flags(-stats) 1 |
+ } elseif {[regexp {^-} $arg]} { |
+ puts stderr "Unknown option: $arg" |
+ usage |
+ } elseif {$file_to_analyze!=""} { |
+ usage |
+ } else { |
+ set file_to_analyze $arg |
+ } |
+} |
+if {$file_to_analyze==""} usage |
+set root_filename $file_to_analyze |
+regexp {^file:(//)?([^?]*)} $file_to_analyze all x1 root_filename |
+if {![file exists $root_filename]} { |
+ puts stderr "No such file: $root_filename" |
exit 1 |
} |
-if {![file readable $file_to_analyze]} { |
- puts stderr "File is not readable: $file_to_analyze" |
+if {![file readable $root_filename]} { |
+ puts stderr "File is not readable: $root_filename" |
exit 1 |
} |
-if {[file size $file_to_analyze]<512} { |
- puts stderr "Empty or malformed database: $file_to_analyze" |
+set true_file_size [file size $root_filename] |
+if {$true_file_size<512} { |
+ puts stderr "Empty or malformed database: $root_filename" |
exit 1 |
} |
+# Compute the total file size assuming test_multiplexor is being used. |
+# Assume that SQLITE_ENABLE_8_3_NAMES might be enabled |
+# |
+set extension [file extension $root_filename] |
+set pattern $root_filename |
+append pattern {[0-3][0-9][0-9]} |
+foreach f [glob -nocomplain $pattern] { |
+ incr true_file_size [file size $f] |
+ set extension {} |
+} |
+if {[string length $extension]>=2 && [string length $extension]<=4} { |
+ set pattern [file rootname $root_filename] |
+ append pattern {.[0-3][0-9][0-9]} |
+ foreach f [glob -nocomplain $pattern] { |
+ incr true_file_size [file size $f] |
+ } |
+} |
+ |
# Open the database |
# |
-sqlite3 db [lindex $argv 0] |
+if {[catch {sqlite3 db $file_to_analyze -uri 1} msg]} { |
+ puts stderr "error trying to open $file_to_analyze: $msg" |
+ exit 1 |
+} |
register_dbstat_vtab db |
-set pageSize [db one {PRAGMA page_size}] |
+db eval {SELECT count(*) FROM sqlite_master} |
+set pageSize [expr {wide([db one {PRAGMA page_size}])}] |
-#set DB [btree_open [lindex $argv 0] 1000 0] |
+if {$flags(-pageinfo)} { |
+ db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat} |
+ db eval {SELECT name, path, pageno FROM temp.stat ORDER BY pageno} { |
+ puts "$pageno $name $path" |
+ } |
+ exit 0 |
+} |
+if {$flags(-stats)} { |
+ db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat} |
+ puts "BEGIN;" |
+ puts "CREATE TABLE stats(" |
+ puts " name STRING, /* Name of table or index */" |
+ puts " path INTEGER, /* Path to page from root */" |
+ puts " pageno INTEGER, /* Page number */" |
+ puts " pagetype STRING, /* 'internal', 'leaf' or 'overflow' */" |
+ puts " ncell INTEGER, /* Cells on page (0 for overflow) */" |
+ puts " payload INTEGER, /* Bytes of payload on this page */" |
+ puts " unused INTEGER, /* Bytes of unused space on this page */" |
+ puts " mx_payload INTEGER, /* Largest payload size of all cells */" |
+ puts " pgoffset INTEGER, /* Offset of page in file */" |
+ puts " pgsize INTEGER /* Size of the page */" |
+ puts ");" |
+ db eval {SELECT quote(name) || ',' || |
+ quote(path) || ',' || |
+ quote(pageno) || ',' || |
+ quote(pagetype) || ',' || |
+ quote(ncell) || ',' || |
+ quote(payload) || ',' || |
+ quote(unused) || ',' || |
+ quote(mx_payload) || ',' || |
+ quote(pgoffset) || ',' || |
+ quote(pgsize) AS x FROM stat} { |
+ puts "INSERT INTO stats VALUES($x);" |
+ } |
+ puts "COMMIT;" |
+ exit 0 |
+} |
# In-memory database for collecting statistics. This script loops through |
# the tables and indices in the database being analyzed, adding a row for each |
@@ -41,8 +119,7 @@ set pageSize [db one {PRAGMA page_size}] |
# queries the in-memory db to produce the space-analysis report. |
# |
sqlite3 mem :memory: |
-set tabledef\ |
-{CREATE TABLE space_used( |
+set tabledef {CREATE TABLE space_used( |
name clob, -- Name of a table or index in the database file |
tblname clob, -- Name of associated table |
is_index boolean, -- TRUE if it is an index, false for a table |
@@ -58,17 +135,17 @@ set tabledef\ |
int_unused int, -- Number of unused bytes on interior pages |
leaf_unused int, -- Number of unused bytes on primary pages |
ovfl_unused int, -- Number of unused bytes on overflow pages |
- gap_cnt int -- Number of gaps in the page layout |
+ gap_cnt int, -- Number of gaps in the page layout |
+ compressed_size int -- Total bytes stored on disk |
);} |
mem eval $tabledef |
# Create a temporary "dbstat" virtual table. |
# |
-db eval { |
- CREATE VIRTUAL TABLE temp.stat USING dbstat; |
- CREATE TEMP TABLE dbstat AS SELECT * FROM temp.stat ORDER BY name, path; |
- DROP TABLE temp.stat; |
-} |
+db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat} |
+db eval {CREATE TEMP TABLE dbstat AS SELECT * FROM temp.stat |
+ ORDER BY name, path} |
+db eval {DROP TABLE temp.stat} |
proc isleaf {pagetype is_index} { |
return [expr {$pagetype == "leaf" || ($pagetype == "internal" && $is_index)}] |
@@ -84,6 +161,8 @@ db func isleaf isleaf |
db func isinternal isinternal |
db func isoverflow isoverflow |
+set isCompressed 0 |
+set compressOverhead 0 |
set sql { SELECT name, tbl_name FROM sqlite_master WHERE rootpage>0 } |
foreach {name tblname} [concat sqlite_master sqlite_master [db eval $sql]] { |
@@ -101,10 +180,18 @@ foreach {name tblname} [concat sqlite_master sqlite_master [db eval $sql]] { |
sum(isoverflow(pagetype, $is_index)) AS ovfl_pages, |
sum(isinternal(pagetype, $is_index) * unused) AS int_unused, |
sum(isleaf(pagetype, $is_index) * unused) AS leaf_unused, |
- sum(isoverflow(pagetype, $is_index) * unused) AS ovfl_unused |
+ sum(isoverflow(pagetype, $is_index) * unused) AS ovfl_unused, |
+ sum(pgsize) AS compressed_size |
FROM temp.dbstat WHERE name = $name |
} break |
+ set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}] |
+ set storage [expr {$total_pages*$pageSize}] |
+ if {!$isCompressed && $storage>$compressed_size} { |
+ set isCompressed 1 |
+ set compressOverhead 14 |
+ } |
+ |
# Column 'gap_cnt' is set to the number of non-contiguous entries in the |
# list of pages visited if the b-tree structure is traversed in a top-down |
# fashion (each node visited before its child-tree is passed). Any overflow |
@@ -112,15 +199,17 @@ foreach {name tblname} [concat sqlite_master sqlite_master [db eval $sql]] { |
# is. |
# |
set gap_cnt 0 |
- set pglist [db eval { |
- SELECT pageno FROM temp.dbstat WHERE name = $name ORDER BY rowid |
- }] |
- set prev [lindex $pglist 0] |
- foreach pgno [lrange $pglist 1 end] { |
- if {$pgno != $prev+1} {incr gap_cnt} |
- set prev $pgno |
+ set prev 0 |
+ db eval { |
+ SELECT pageno, pagetype FROM temp.dbstat |
+ WHERE name=$name |
+ ORDER BY pageno |
+ } { |
+ if {$prev>0 && $pagetype=="leaf" && $pageno!=$prev+1} { |
+ incr gap_cnt |
+ } |
+ set prev $pageno |
} |
- |
mem eval { |
INSERT INTO space_used VALUES( |
$name, |
@@ -138,14 +227,15 @@ foreach {name tblname} [concat sqlite_master sqlite_master [db eval $sql]] { |
$int_unused, |
$leaf_unused, |
$ovfl_unused, |
- $gap_cnt |
+ $gap_cnt, |
+ $compressed_size |
); |
} |
} |
proc integerify {real} { |
if {[string is double -strict $real]} { |
- return [expr {int($real)}] |
+ return [expr {wide($real)}] |
} else { |
return 0 |
} |
@@ -158,8 +248,19 @@ mem function int integerify |
# [quote {hello world's}] == {'hello world''s'} |
# |
proc quote {txt} { |
- regsub -all ' $txt '' q |
- return '$q' |
+ return [string map {' ''} $txt] |
+} |
+ |
+# Output a title line |
+# |
+proc titleline {title} { |
+ if {$title==""} { |
+ puts [string repeat * 79] |
+ } else { |
+ set len [string length $title] |
+ set stars [string repeat * [expr 79-$len-5]] |
+ puts "*** $title $stars" |
+ } |
} |
# Generate a single line of output in the statistics section of the |
@@ -167,7 +268,7 @@ proc quote {txt} { |
# |
proc statline {title value {extra {}}} { |
set len [string length $title] |
- set dots [string range {......................................} $len end] |
+ set dots [string repeat . [expr 50-$len]] |
set len [string length $value] |
set sp2 [string range { } $len end] |
if {$extra ne ""} { |
@@ -199,8 +300,8 @@ proc divide {num denom} { |
# Generate a subreport that covers some subset of the database. |
# the $where clause determines which subset to analyze. |
# |
-proc subreport {title where} { |
- global pageSize file_pgcnt |
+proc subreport {title where showFrag} { |
+ global pageSize file_pgcnt compressOverhead |
# Query the in-memory database for the sum of various statistics |
# for the subset of tables/indices identified by the WHERE clause in |
@@ -224,15 +325,14 @@ proc subreport {title where} { |
int(sum(leaf_unused)) AS leaf_unused, |
int(sum(int_unused)) AS int_unused, |
int(sum(ovfl_unused)) AS ovfl_unused, |
- int(sum(gap_cnt)) AS gap_cnt |
+ int(sum(gap_cnt)) AS gap_cnt, |
+ int(sum(compressed_size)) AS compressed_size |
FROM space_used WHERE $where" {} {} |
# Output the sub-report title, nicely decorated with * characters. |
# |
puts "" |
- set len [string length $title] |
- set stars [string repeat * [expr 65-$len]] |
- puts "*** $title $stars" |
+ titleline $title |
puts "" |
# Calculate statistics and store the results in TCL variables, as follows: |
@@ -274,15 +374,21 @@ proc subreport {title where} { |
statline {Percentage of total database} $total_pages_percent |
statline {Number of entries} $nleaf |
statline {Bytes of storage consumed} $storage |
+ if {$compressed_size!=$storage} { |
+ set compressed_size [expr {$compressed_size+$compressOverhead*$total_pages}] |
+ set pct [expr {$compressed_size*100.0/$storage}] |
+ set pct [format {%5.1f%%} $pct] |
+ statline {Bytes used after compression} $compressed_size $pct |
+ } |
statline {Bytes of payload} $payload $payload_percent |
statline {Average payload per entry} $avg_payload |
statline {Average unused bytes per entry} $avg_unused |
if {[info exists avg_fanout]} { |
statline {Average fanout} $avg_fanout |
} |
- if {$total_pages>1} { |
- set fragmentation [percent $gap_cnt [expr {$total_pages-1}] {fragmentation}] |
- statline {Fragmentation} $fragmentation |
+ if {$showFrag && $total_pages>1} { |
+ set fragmentation [percent $gap_cnt [expr {$total_pages-1}]] |
+ statline {Non-sequential pages} $gap_cnt $fragmentation |
} |
statline {Maximum payload per entry} $mx_payload |
statline {Entries that use overflow} $ovfl_cnt $ovfl_cnt_percent |
@@ -293,16 +399,16 @@ proc subreport {title where} { |
statline {Overflow pages used} $ovfl_pages |
statline {Total pages used} $total_pages |
if {$int_unused>0} { |
- set int_unused_percent \ |
- [percent $int_unused [expr {$int_pages*$pageSize}] {of index space}] |
+ set int_unused_percent [ |
+ percent $int_unused [expr {$int_pages*$pageSize}] {of index space}] |
statline "Unused bytes on index pages" $int_unused $int_unused_percent |
} |
- statline "Unused bytes on primary pages" $leaf_unused \ |
- [percent $leaf_unused [expr {$leaf_pages*$pageSize}] {of primary space}] |
- statline "Unused bytes on overflow pages" $ovfl_unused \ |
- [percent $ovfl_unused [expr {$ovfl_pages*$pageSize}] {of overflow space}] |
- statline "Unused bytes on all pages" $total_unused \ |
- [percent $total_unused $storage {of all space}] |
+ statline "Unused bytes on primary pages" $leaf_unused [ |
+ percent $leaf_unused [expr {$leaf_pages*$pageSize}] {of primary space}] |
+ statline "Unused bytes on overflow pages" $ovfl_unused [ |
+ percent $ovfl_unused [expr {$ovfl_pages*$pageSize}] {of overflow space}] |
+ statline "Unused bytes on all pages" $total_unused [ |
+ percent $total_unused $storage {of all space}] |
return 1 |
} |
@@ -330,7 +436,7 @@ proc autovacuum_overhead {filePages pageSize} { |
set ptrsPerPage [expr double($pageSize/5)] |
# Return the number of pointer map pages in the database. |
- return [expr int(ceil( ($filePages-1.0)/($ptrsPerPage+1.0) ))] |
+ return [expr wide(ceil( ($filePages-1.0)/($ptrsPerPage+1.0) ))] |
} |
@@ -357,17 +463,24 @@ proc autovacuum_overhead {filePages pageSize} { |
# (not including sqlite_master) |
# user_percent: $user_payload as a percentage of total file size. |
-set file_bytes [file size $file_to_analyze] |
-set file_pgcnt [expr {$file_bytes/$pageSize}] |
+### The following, setting $file_bytes based on the actual size of the file |
+### on disk, causes this tool to choke on zipvfs databases. So set it based |
+### on the return of [PRAGMA page_count] instead. |
+if 0 { |
+ set file_bytes [file size $file_to_analyze] |
+ set file_pgcnt [expr {$file_bytes/$pageSize}] |
+} |
+set file_pgcnt [db one {PRAGMA page_count}] |
+set file_bytes [expr {$file_pgcnt * $pageSize}] |
set av_pgcnt [autovacuum_overhead $file_pgcnt $pageSize] |
set av_percent [percent $av_pgcnt $file_pgcnt] |
set sql {SELECT sum(leaf_pages+int_pages+ovfl_pages) FROM space_used} |
-set inuse_pgcnt [expr int([mem eval $sql])] |
+set inuse_pgcnt [expr wide([mem eval $sql])] |
set inuse_percent [percent $inuse_pgcnt $file_pgcnt] |
-set free_pgcnt [expr $file_pgcnt-$inuse_pgcnt-$av_pgcnt] |
+set free_pgcnt [expr {$file_pgcnt-$inuse_pgcnt-$av_pgcnt}] |
set free_percent [percent $free_pgcnt $file_pgcnt] |
set free_pgcnt2 [db one {PRAGMA freelist_count}] |
set free_percent2 [percent $free_pgcnt2 $file_pgcnt] |
@@ -387,10 +500,7 @@ set user_percent [percent $user_payload $file_bytes] |
# Output the summary statistics calculated above. |
# |
-puts "/** Disk-Space Utilization Report For $file_to_analyze" |
-catch { |
- puts "*** As of [clock format [clock seconds] -format {%Y-%b-%d %H:%M:%S}]" |
-} |
+puts "/** Disk-Space Utilization Report For $root_filename" |
puts "" |
statline {Page size in bytes} $pageSize |
statline {Pages in the whole file (measured)} $file_pgcnt |
@@ -401,50 +511,94 @@ statline {Pages on the freelist (calculated)} $free_pgcnt $free_percent |
statline {Pages of auto-vacuum overhead} $av_pgcnt $av_percent |
statline {Number of tables in the database} $ntable |
statline {Number of indices} $nindex |
-statline {Number of named indices} $nmanindex |
-statline {Automatically generated indices} $nautoindex |
-statline {Size of the file in bytes} $file_bytes |
+statline {Number of defined indices} $nmanindex |
+statline {Number of implied indices} $nautoindex |
+if {$isCompressed} { |
+ statline {Size of uncompressed content in bytes} $file_bytes |
+ set efficiency [percent $true_file_size $file_bytes] |
+ statline {Size of compressed file on disk} $true_file_size $efficiency |
+} else { |
+ statline {Size of the file in bytes} $file_bytes |
+} |
statline {Bytes of user payload stored} $user_payload $user_percent |
# Output table rankings |
# |
puts "" |
-puts "*** Page counts for all tables with their indices ********************" |
+titleline "Page counts for all tables with their indices" |
puts "" |
mem eval {SELECT tblname, count(*) AS cnt, |
int(sum(int_pages+leaf_pages+ovfl_pages)) AS size |
FROM space_used GROUP BY tblname ORDER BY size+0 DESC, tblname} {} { |
statline [string toupper $tblname] $size [percent $size $file_pgcnt] |
} |
+puts "" |
+titleline "Page counts for all tables and indices separately" |
+puts "" |
+mem eval { |
+ SELECT |
+ upper(name) AS nm, |
+ int(int_pages+leaf_pages+ovfl_pages) AS size |
+ FROM space_used |
+ ORDER BY size+0 DESC, name} {} { |
+ statline $nm $size [percent $size $file_pgcnt] |
+} |
+if {$isCompressed} { |
+ puts "" |
+ titleline "Bytes of disk space used after compression" |
+ puts "" |
+ set csum 0 |
+ mem eval {SELECT tblname, |
+ int(sum(compressed_size)) + |
+ $compressOverhead*sum(int_pages+leaf_pages+ovfl_pages) |
+ AS csize |
+ FROM space_used GROUP BY tblname ORDER BY csize+0 DESC, tblname} {} { |
+ incr csum $csize |
+ statline [string toupper $tblname] $csize [percent $csize $true_file_size] |
+ } |
+ set overhead [expr {$true_file_size - $csum}] |
+ if {$overhead>0} { |
+ statline {Header and free space} $overhead [percent $overhead $true_file_size] |
+ } |
+} |
# Output subreports |
# |
if {$nindex>0} { |
- subreport {All tables and indices} 1 |
+ subreport {All tables and indices} 1 0 |
} |
-subreport {All tables} {NOT is_index} |
+subreport {All tables} {NOT is_index} 0 |
if {$nindex>0} { |
- subreport {All indices} {is_index} |
+ subreport {All indices} {is_index} 0 |
} |
-foreach tbl [mem eval {SELECT name FROM space_used WHERE NOT is_index |
+foreach tbl [mem eval {SELECT DISTINCT tblname name FROM space_used |
ORDER BY name}] { |
- regsub ' $tbl '' qn |
+ set qn [quote $tbl] |
set name [string toupper $tbl] |
- set n [mem eval "SELECT count(*) FROM space_used WHERE tblname='$qn'"] |
+ set n [mem eval {SELECT count(*) FROM space_used WHERE tblname=$tbl}] |
if {$n>1} { |
- subreport "Table $name and all its indices" "tblname='$qn'" |
- subreport "Table $name w/o any indices" "name='$qn'" |
- subreport "Indices of table $name" "tblname='$qn' AND is_index" |
+ set idxlist [mem eval "SELECT name FROM space_used |
+ WHERE tblname='$qn' AND is_index |
+ ORDER BY 1"] |
+ subreport "Table $name and all its indices" "tblname='$qn'" 0 |
+ subreport "Table $name w/o any indices" "name='$qn'" 1 |
+ if {[llength $idxlist]>1} { |
+ subreport "Indices of table $name" "tblname='$qn' AND is_index" 0 |
+ } |
+ foreach idx $idxlist { |
+ set qidx [quote $idx] |
+ subreport "Index [string toupper $idx] of table $name" "name='$qidx'" 1 |
+ } |
} else { |
- subreport "Table $name" "name='$qn'" |
+ subreport "Table $name" "name='$qn'" 1 |
} |
} |
# Output instructions on what the numbers above mean. |
# |
+puts "" |
+titleline Definitions |
puts { |
-*** Definitions ****************************************************** |
- |
Page size in bytes |
The number of bytes in a single page of the database file. |
@@ -452,11 +606,9 @@ Page size in bytes |
Number of pages in the whole file |
} |
-puts \ |
-" The number of $pageSize-byte pages that go into forming the complete |
+puts " The number of $pageSize-byte pages that go into forming the complete |
database" |
-puts \ |
-{ |
+puts { |
Pages that store data |
The number of pages that store data, either as primary B*Tree pages or |
@@ -483,11 +635,11 @@ Number of indices |
The total number of indices in the database. |
-Number of named indices |
+Number of defined indices |
The number of indices created using an explicit CREATE INDEX statement. |
-Automatically generated indices |
+Number of implied indices |
The number of indices used to implement PRIMARY KEY or UNIQUE constraints |
on tables. |
@@ -536,13 +688,16 @@ Average unused bytes per entry |
category on a per-entry basis. This is the number of unused bytes on |
all pages divided by the number of entries. |
-Fragmentation |
+Non-sequential pages |
- The percentage of pages in the table or index that are not |
- consecutive in the disk file. Many filesystems are optimized |
- for sequential file access so smaller fragmentation numbers |
- sometimes result in faster queries, especially for larger |
- database files that do not fit in the disk cache. |
+ The number of pages in the table or index that are out of sequence. |
+ Many filesystems are optimized for sequential file access so a small |
+ number of non-sequential pages might result in faster queries, |
+ especially for larger database files that do not fit in the disk cache. |
+ Note that after running VACUUM, the root page of each table or index is |
+ at the beginning of the database file and all other pages are in a |
+ separate part of the database file, resulting in a single non- |
+ sequential page. |
Maximum payload per entry |
@@ -598,7 +753,7 @@ Unused bytes on all pages |
# Output a dump of the in-memory database. This can be used for more |
# complex offline analysis. |
# |
-puts "**********************************************************************" |
+titleline {} |
puts "The entire text of this report can be sourced into any SQL database" |
puts "engine for further analysis. All of the text above is an SQL comment." |
puts "The data used to generate this report follows:" |
@@ -611,7 +766,7 @@ mem eval {SELECT * FROM space_used} x { |
set sep ( |
foreach col $x(*) { |
set v $x($col) |
- if {$v=="" || ![string is double $v]} {set v [quote $v]} |
+ if {$v=="" || ![string is double $v]} {set v '[quote $v]'} |
puts -nonewline $sep$v |
set sep , |
} |