| OLD | NEW |
| 1 # Run this TCL script using "testfixture" in order get a report that shows | 1 # Run this TCL script using "testfixture" in order get a report that shows |
| 2 # how much disk space is used by a particular data to actually store data | 2 # how much disk space is used by a particular data to actually store data |
| 3 # versus how much space is unused. | 3 # versus how much space is unused. |
| 4 # | 4 # |
| 5 | 5 |
| 6 if {[catch { | 6 if {[catch { |
| 7 |
| 8 # Argument $tname is the name of a table within the database opened by |
| 9 # database handle [db]. Return true if it is a WITHOUT ROWID table, or |
| 10 # false otherwise. |
| 11 # |
| 12 proc is_without_rowid {tname} { |
| 13 set t [string map {' ''} $tname] |
| 14 db eval "PRAGMA index_list = '$t'" o { |
| 15 if {$o(origin) == "pk"} { |
| 16 set n $o(name) |
| 17 if {0==[db one { SELECT count(*) FROM sqlite_master WHERE name=$n }]} { |
| 18 return 1 |
| 19 } |
| 20 } |
| 21 } |
| 22 return 0 |
| 23 } |
| 24 |
| 7 # Get the name of the database to analyze | 25 # Get the name of the database to analyze |
| 8 # | 26 # |
| 9 proc usage {} { | 27 proc usage {} { |
| 10 set argv0 [file rootname [file tail [info nameofexecutable]]] | 28 set argv0 [file rootname [file tail [info nameofexecutable]]] |
| 11 puts stderr "Usage: $argv0 database-name" | 29 puts stderr "Usage: $argv0 ?--pageinfo? ?--stats? database-filename" |
| 30 puts stderr { |
| 31 Analyze the SQLite3 database file specified by the "database-filename" |
| 32 argument and output a report detailing size and storage efficiency |
| 33 information for the database and its constituent tables and indexes. |
| 34 |
| 35 Options: |
| 36 |
| 37 --stats Output SQL text that creates a new database containing |
| 38 statistics about the database that was analyzed |
| 39 |
| 40 --pageinfo Show how each page of the database-file is used |
| 41 } |
| 12 exit 1 | 42 exit 1 |
| 13 } | 43 } |
| 14 set file_to_analyze {} | 44 set file_to_analyze {} |
| 15 set flags(-pageinfo) 0 | 45 set flags(-pageinfo) 0 |
| 16 set flags(-stats) 0 | 46 set flags(-stats) 0 |
| 17 append argv {} | 47 append argv {} |
| 18 foreach arg $argv { | 48 foreach arg $argv { |
| 19 if {[regexp {^-+pageinfo$} $arg]} { | 49 if {[regexp {^-+pageinfo$} $arg]} { |
| 20 set flags(-pageinfo) 1 | 50 set flags(-pageinfo) 1 |
| 21 } elseif {[regexp {^-+stats$} $arg]} { | 51 } elseif {[regexp {^-+stats$} $arg]} { |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 63 incr true_file_size [file size $f] | 93 incr true_file_size [file size $f] |
| 64 } | 94 } |
| 65 } | 95 } |
| 66 | 96 |
| 67 # Open the database | 97 # Open the database |
| 68 # | 98 # |
| 69 if {[catch {sqlite3 db $file_to_analyze -uri 1} msg]} { | 99 if {[catch {sqlite3 db $file_to_analyze -uri 1} msg]} { |
| 70 puts stderr "error trying to open $file_to_analyze: $msg" | 100 puts stderr "error trying to open $file_to_analyze: $msg" |
| 71 exit 1 | 101 exit 1 |
| 72 } | 102 } |
| 73 register_dbstat_vtab db | |
| 74 | 103 |
| 75 db eval {SELECT count(*) FROM sqlite_master} | 104 db eval {SELECT count(*) FROM sqlite_master} |
| 76 set pageSize [expr {wide([db one {PRAGMA page_size}])}] | 105 set pageSize [expr {wide([db one {PRAGMA page_size}])}] |
| 77 | 106 |
| 78 if {$flags(-pageinfo)} { | 107 if {$flags(-pageinfo)} { |
| 79 db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat} | 108 db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat} |
| 80 db eval {SELECT name, path, pageno FROM temp.stat ORDER BY pageno} { | 109 db eval {SELECT name, path, pageno FROM temp.stat ORDER BY pageno} { |
| 81 puts "$pageno $name $path" | 110 puts "$pageno $name $path" |
| 82 } | 111 } |
| 83 exit 0 | 112 exit 0 |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 118 # to an in-memory database (for which the schema is shown below). It then | 147 # to an in-memory database (for which the schema is shown below). It then |
| 119 # queries the in-memory db to produce the space-analysis report. | 148 # queries the in-memory db to produce the space-analysis report. |
| 120 # | 149 # |
| 121 sqlite3 mem :memory: | 150 sqlite3 mem :memory: |
| 122 set tabledef {CREATE TABLE space_used( | 151 set tabledef {CREATE TABLE space_used( |
| 123 name clob, -- Name of a table or index in the database file | 152 name clob, -- Name of a table or index in the database file |
| 124 tblname clob, -- Name of associated table | 153 tblname clob, -- Name of associated table |
| 125 is_index boolean, -- TRUE if it is an index, false for a table | 154 is_index boolean, -- TRUE if it is an index, false for a table |
| 126 nentry int, -- Number of entries in the BTree | 155 nentry int, -- Number of entries in the BTree |
| 127 leaf_entries int, -- Number of leaf entries | 156 leaf_entries int, -- Number of leaf entries |
| 157 depth int, -- Depth of the b-tree |
| 128 payload int, -- Total amount of data stored in this table or index | 158 payload int, -- Total amount of data stored in this table or index |
| 129 ovfl_payload int, -- Total amount of data stored on overflow pages | 159 ovfl_payload int, -- Total amount of data stored on overflow pages |
| 130 ovfl_cnt int, -- Number of entries that use overflow | 160 ovfl_cnt int, -- Number of entries that use overflow |
| 131 mx_payload int, -- Maximum payload size | 161 mx_payload int, -- Maximum payload size |
| 132 int_pages int, -- Number of interior pages used | 162 int_pages int, -- Number of interior pages used |
| 133 leaf_pages int, -- Number of leaf pages used | 163 leaf_pages int, -- Number of leaf pages used |
| 134 ovfl_pages int, -- Number of overflow pages used | 164 ovfl_pages int, -- Number of overflow pages used |
| 135 int_unused int, -- Number of unused bytes on interior pages | 165 int_unused int, -- Number of unused bytes on interior pages |
| 136 leaf_unused int, -- Number of unused bytes on primary pages | 166 leaf_unused int, -- Number of unused bytes on primary pages |
| 137 ovfl_unused int, -- Number of unused bytes on overflow pages | 167 ovfl_unused int, -- Number of unused bytes on overflow pages |
| 138 gap_cnt int, -- Number of gaps in the page layout | 168 gap_cnt int, -- Number of gaps in the page layout |
| 139 compressed_size int -- Total bytes stored on disk | 169 compressed_size int -- Total bytes stored on disk |
| 140 );} | 170 );} |
| 141 mem eval $tabledef | 171 mem eval $tabledef |
| 142 | 172 |
| 143 # Create a temporary "dbstat" virtual table. | 173 # Create a temporary "dbstat" virtual table. |
| 144 # | 174 # |
| 145 db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat} | 175 db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat} |
| 146 db eval {CREATE TEMP TABLE dbstat AS SELECT * FROM temp.stat | 176 db eval {CREATE TEMP TABLE dbstat AS SELECT * FROM temp.stat |
| 147 ORDER BY name, path} | 177 ORDER BY name, path} |
| 148 db eval {DROP TABLE temp.stat} | 178 db eval {DROP TABLE temp.stat} |
| 149 | 179 |
| 150 proc isleaf {pagetype is_index} { | |
| 151 return [expr {$pagetype == "leaf" || ($pagetype == "internal" && $is_index)}] | |
| 152 } | |
| 153 proc isoverflow {pagetype is_index} { | |
| 154 return [expr {$pagetype == "overflow"}] | |
| 155 } | |
| 156 proc isinternal {pagetype is_index} { | |
| 157 return [expr {$pagetype == "internal" && $is_index==0}] | |
| 158 } | |
| 159 | |
| 160 db func isleaf isleaf | |
| 161 db func isinternal isinternal | |
| 162 db func isoverflow isoverflow | |
| 163 | |
| 164 set isCompressed 0 | 180 set isCompressed 0 |
| 165 set compressOverhead 0 | 181 set compressOverhead 0 |
| 182 set depth 0 |
| 166 set sql { SELECT name, tbl_name FROM sqlite_master WHERE rootpage>0 } | 183 set sql { SELECT name, tbl_name FROM sqlite_master WHERE rootpage>0 } |
| 167 foreach {name tblname} [concat sqlite_master sqlite_master [db eval $sql]] { | 184 foreach {name tblname} [concat sqlite_master sqlite_master [db eval $sql]] { |
| 168 | 185 |
| 169 set is_index [expr {$name!=$tblname}] | 186 set is_index [expr {$name!=$tblname}] |
| 187 set idx_btree [expr {$is_index || [is_without_rowid $name]}] |
| 170 db eval { | 188 db eval { |
| 171 SELECT | 189 SELECT |
| 172 sum(ncell) AS nentry, | 190 sum(ncell) AS nentry, |
| 173 sum(isleaf(pagetype, $is_index) * ncell) AS leaf_entries, | 191 sum((pagetype=='leaf')*ncell) AS leaf_entries, |
| 174 sum(payload) AS payload, | 192 sum(payload) AS payload, |
| 175 sum(isoverflow(pagetype, $is_index) * payload) AS ovfl_payload, | 193 sum((pagetype=='overflow') * payload) AS ovfl_payload, |
| 176 sum(path LIKE '%+000000') AS ovfl_cnt, | 194 sum(path LIKE '%+000000') AS ovfl_cnt, |
| 177 max(mx_payload) AS mx_payload, | 195 max(mx_payload) AS mx_payload, |
| 178 sum(isinternal(pagetype, $is_index)) AS int_pages, | 196 sum(pagetype=='internal') AS int_pages, |
| 179 sum(isleaf(pagetype, $is_index)) AS leaf_pages, | 197 sum(pagetype=='leaf') AS leaf_pages, |
| 180 sum(isoverflow(pagetype, $is_index)) AS ovfl_pages, | 198 sum(pagetype=='overflow') AS ovfl_pages, |
| 181 sum(isinternal(pagetype, $is_index) * unused) AS int_unused, | 199 sum((pagetype=='internal') * unused) AS int_unused, |
| 182 sum(isleaf(pagetype, $is_index) * unused) AS leaf_unused, | 200 sum((pagetype=='leaf') * unused) AS leaf_unused, |
| 183 sum(isoverflow(pagetype, $is_index) * unused) AS ovfl_unused, | 201 sum((pagetype=='overflow') * unused) AS ovfl_unused, |
| 184 sum(pgsize) AS compressed_size | 202 sum(pgsize) AS compressed_size, |
| 203 max((length(CASE WHEN path LIKE '%+%' THEN '' ELSE path END)+3)/4) |
| 204 AS depth |
| 185 FROM temp.dbstat WHERE name = $name | 205 FROM temp.dbstat WHERE name = $name |
| 186 } break | 206 } break |
| 187 | 207 |
| 188 set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}] | 208 set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}] |
| 189 set storage [expr {$total_pages*$pageSize}] | 209 set storage [expr {$total_pages*$pageSize}] |
| 190 if {!$isCompressed && $storage>$compressed_size} { | 210 if {!$isCompressed && $storage>$compressed_size} { |
| 191 set isCompressed 1 | 211 set isCompressed 1 |
| 192 set compressOverhead 14 | 212 set compressOverhead 14 |
| 193 } | 213 } |
| 194 | 214 |
| (...skipping 15 matching lines...) Expand all Loading... |
| 210 } | 230 } |
| 211 set prev $pageno | 231 set prev $pageno |
| 212 } | 232 } |
| 213 mem eval { | 233 mem eval { |
| 214 INSERT INTO space_used VALUES( | 234 INSERT INTO space_used VALUES( |
| 215 $name, | 235 $name, |
| 216 $tblname, | 236 $tblname, |
| 217 $is_index, | 237 $is_index, |
| 218 $nentry, | 238 $nentry, |
| 219 $leaf_entries, | 239 $leaf_entries, |
| 240 $depth, |
| 220 $payload, | 241 $payload, |
| 221 $ovfl_payload, | 242 $ovfl_payload, |
| 222 $ovfl_cnt, | 243 $ovfl_cnt, |
| 223 $mx_payload, | 244 $mx_payload, |
| 224 $int_pages, | 245 $int_pages, |
| 225 $leaf_pages, | 246 $leaf_pages, |
| 226 $ovfl_pages, | 247 $ovfl_pages, |
| 227 $int_unused, | 248 $int_unused, |
| 228 $leaf_unused, | 249 $leaf_unused, |
| 229 $ovfl_unused, | 250 $ovfl_unused, |
| (...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 319 int(sum(ovfl_payload)) AS ovfl_payload, | 340 int(sum(ovfl_payload)) AS ovfl_payload, |
| 320 max(mx_payload) AS mx_payload, | 341 max(mx_payload) AS mx_payload, |
| 321 int(sum(ovfl_cnt)) as ovfl_cnt, | 342 int(sum(ovfl_cnt)) as ovfl_cnt, |
| 322 int(sum(leaf_pages)) AS leaf_pages, | 343 int(sum(leaf_pages)) AS leaf_pages, |
| 323 int(sum(int_pages)) AS int_pages, | 344 int(sum(int_pages)) AS int_pages, |
| 324 int(sum(ovfl_pages)) AS ovfl_pages, | 345 int(sum(ovfl_pages)) AS ovfl_pages, |
| 325 int(sum(leaf_unused)) AS leaf_unused, | 346 int(sum(leaf_unused)) AS leaf_unused, |
| 326 int(sum(int_unused)) AS int_unused, | 347 int(sum(int_unused)) AS int_unused, |
| 327 int(sum(ovfl_unused)) AS ovfl_unused, | 348 int(sum(ovfl_unused)) AS ovfl_unused, |
| 328 int(sum(gap_cnt)) AS gap_cnt, | 349 int(sum(gap_cnt)) AS gap_cnt, |
| 329 int(sum(compressed_size)) AS compressed_size | 350 int(sum(compressed_size)) AS compressed_size, |
| 351 int(max(depth)) AS depth, |
| 352 count(*) AS cnt |
| 330 FROM space_used WHERE $where" {} {} | 353 FROM space_used WHERE $where" {} {} |
| 331 | 354 |
| 332 # Output the sub-report title, nicely decorated with * characters. | 355 # Output the sub-report title, nicely decorated with * characters. |
| 333 # | 356 # |
| 334 puts "" | 357 puts "" |
| 335 titleline $title | 358 titleline $title |
| 336 puts "" | 359 puts "" |
| 337 | 360 |
| 338 # Calculate statistics and store the results in TCL variables, as follows: | 361 # Calculate statistics and store the results in TCL variables, as follows: |
| 339 # | 362 # |
| (...skipping 16 matching lines...) Expand all Loading... |
| 356 set avg_unused [divide $total_unused $nleaf] | 379 set avg_unused [divide $total_unused $nleaf] |
| 357 if {$int_pages>0} { | 380 if {$int_pages>0} { |
| 358 # TODO: Is this formula correct? | 381 # TODO: Is this formula correct? |
| 359 set nTab [mem eval " | 382 set nTab [mem eval " |
| 360 SELECT count(*) FROM ( | 383 SELECT count(*) FROM ( |
| 361 SELECT DISTINCT tblname FROM space_used WHERE $where AND is_index=0 | 384 SELECT DISTINCT tblname FROM space_used WHERE $where AND is_index=0 |
| 362 ) | 385 ) |
| 363 "] | 386 "] |
| 364 set avg_fanout [mem eval " | 387 set avg_fanout [mem eval " |
| 365 SELECT (sum(leaf_pages+int_pages)-$nTab)/sum(int_pages) FROM space_used | 388 SELECT (sum(leaf_pages+int_pages)-$nTab)/sum(int_pages) FROM space_used |
| 366 WHERE $where AND is_index = 0 | 389 WHERE $where |
| 367 "] | 390 "] |
| 368 set avg_fanout [format %.2f $avg_fanout] | 391 set avg_fanout [format %.2f $avg_fanout] |
| 369 } | 392 } |
| 370 set ovfl_cnt_percent [percent $ovfl_cnt $nleaf {of all entries}] | 393 set ovfl_cnt_percent [percent $ovfl_cnt $nleaf {of all entries}] |
| 371 | 394 |
| 372 # Print out the sub-report statistics. | 395 # Print out the sub-report statistics. |
| 373 # | 396 # |
| 374 statline {Percentage of total database} $total_pages_percent | 397 statline {Percentage of total database} $total_pages_percent |
| 375 statline {Number of entries} $nleaf | 398 statline {Number of entries} $nleaf |
| 376 statline {Bytes of storage consumed} $storage | 399 statline {Bytes of storage consumed} $storage |
| 377 if {$compressed_size!=$storage} { | 400 if {$compressed_size!=$storage} { |
| 378 set compressed_size [expr {$compressed_size+$compressOverhead*$total_pages}] | 401 set compressed_size [expr {$compressed_size+$compressOverhead*$total_pages}] |
| 379 set pct [expr {$compressed_size*100.0/$storage}] | 402 set pct [expr {$compressed_size*100.0/$storage}] |
| 380 set pct [format {%5.1f%%} $pct] | 403 set pct [format {%5.1f%%} $pct] |
| 381 statline {Bytes used after compression} $compressed_size $pct | 404 statline {Bytes used after compression} $compressed_size $pct |
| 382 } | 405 } |
| 383 statline {Bytes of payload} $payload $payload_percent | 406 statline {Bytes of payload} $payload $payload_percent |
| 407 if {$cnt==1} {statline {B-tree depth} $depth} |
| 384 statline {Average payload per entry} $avg_payload | 408 statline {Average payload per entry} $avg_payload |
| 385 statline {Average unused bytes per entry} $avg_unused | 409 statline {Average unused bytes per entry} $avg_unused |
| 386 if {[info exists avg_fanout]} { | 410 if {[info exists avg_fanout]} { |
| 387 statline {Average fanout} $avg_fanout | 411 statline {Average fanout} $avg_fanout |
| 388 } | 412 } |
| 389 if {$showFrag && $total_pages>1} { | 413 if {$showFrag && $total_pages>1} { |
| 390 set fragmentation [percent $gap_cnt [expr {$total_pages-1}]] | 414 set fragmentation [percent $gap_cnt [expr {$total_pages-1}]] |
| 391 statline {Non-sequential pages} $gap_cnt $fragmentation | 415 statline {Non-sequential pages} $gap_cnt $fragmentation |
| 392 } | 416 } |
| 393 statline {Maximum payload per entry} $mx_payload | 417 statline {Maximum payload per entry} $mx_payload |
| (...skipping 378 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 772 } | 796 } |
| 773 puts ");" | 797 puts ");" |
| 774 } | 798 } |
| 775 puts "COMMIT;" | 799 puts "COMMIT;" |
| 776 | 800 |
| 777 } err]} { | 801 } err]} { |
| 778 puts "ERROR: $err" | 802 puts "ERROR: $err" |
| 779 puts $errorInfo | 803 puts $errorInfo |
| 780 exit 1 | 804 exit 1 |
| 781 } | 805 } |
| OLD | NEW |