OLD | NEW |
(Empty) | |
| 1 # Run this TCL script using "testfixture" in order get a report that shows |
| 2 # how much disk space is used by a particular data to actually store data |
| 3 # versus how much space is unused. |
| 4 # |
| 5 |
| 6 if {[catch { |
| 7 |
| 8 # Argument $tname is the name of a table within the database opened by |
| 9 # database handle [db]. Return true if it is a WITHOUT ROWID table, or |
| 10 # false otherwise. |
| 11 # |
| 12 proc is_without_rowid {tname} { |
| 13 set t [string map {' ''} $tname] |
| 14 db eval "PRAGMA index_list = '$t'" o { |
| 15 if {$o(origin) == "pk"} { |
| 16 set n $o(name) |
| 17 if {0==[db one { SELECT count(*) FROM sqlite_master WHERE name=$n }]} { |
| 18 return 1 |
| 19 } |
| 20 } |
| 21 } |
| 22 return 0 |
| 23 } |
| 24 |
| 25 # Read and run TCL commands from standard input. Used to implement |
| 26 # the --tclsh option. |
| 27 # |
| 28 proc tclsh {} { |
| 29 set line {} |
| 30 while {![eof stdin]} { |
| 31 if {$line!=""} { |
| 32 puts -nonewline "> " |
| 33 } else { |
| 34 puts -nonewline "% " |
| 35 } |
| 36 flush stdout |
| 37 append line [gets stdin] |
| 38 if {[info complete $line]} { |
| 39 if {[catch {uplevel #0 $line} result]} { |
| 40 puts stderr "Error: $result" |
| 41 } elseif {$result!=""} { |
| 42 puts $result |
| 43 } |
| 44 set line {} |
| 45 } else { |
| 46 append line \n |
| 47 } |
| 48 } |
| 49 } |
| 50 |
| 51 |
| 52 # Get the name of the database to analyze |
| 53 # |
| 54 proc usage {} { |
| 55 set argv0 [file rootname [file tail [info nameofexecutable]]] |
| 56 puts stderr "Usage: $argv0 ?--pageinfo? ?--stats? database-filename" |
| 57 puts stderr { |
| 58 Analyze the SQLite3 database file specified by the "database-filename" |
| 59 argument and output a report detailing size and storage efficiency |
| 60 information for the database and its constituent tables and indexes. |
| 61 |
| 62 Options: |
| 63 |
| 64 --pageinfo Show how each page of the database-file is used |
| 65 |
| 66 --stats Output SQL text that creates a new database containing |
| 67 statistics about the database that was analyzed |
| 68 |
| 69 --tclsh Run the built-in TCL interpreter interactively (for debugging) |
| 70 |
| 71 --version Show the version number of SQLite |
| 72 } |
| 73 exit 1 |
| 74 } |
| 75 set file_to_analyze {} |
| 76 set flags(-pageinfo) 0 |
| 77 set flags(-stats) 0 |
| 78 set flags(-debug) 0 |
| 79 append argv {} |
| 80 foreach arg $argv { |
| 81 if {[regexp {^-+pageinfo$} $arg]} { |
| 82 set flags(-pageinfo) 1 |
| 83 } elseif {[regexp {^-+stats$} $arg]} { |
| 84 set flags(-stats) 1 |
| 85 } elseif {[regexp {^-+debug$} $arg]} { |
| 86 set flags(-debug) 1 |
| 87 } elseif {[regexp {^-+tclsh$} $arg]} { |
| 88 tclsh |
| 89 exit 0 |
| 90 } elseif {[regexp {^-+version$} $arg]} { |
| 91 sqlite3 mem :memory: |
| 92 puts [mem one {SELECT sqlite_version()||' '||sqlite_source_id()}] |
| 93 mem close |
| 94 exit 0 |
| 95 } elseif {[regexp {^-} $arg]} { |
| 96 puts stderr "Unknown option: $arg" |
| 97 usage |
| 98 } elseif {$file_to_analyze!=""} { |
| 99 usage |
| 100 } else { |
| 101 set file_to_analyze $arg |
| 102 } |
| 103 } |
| 104 if {$file_to_analyze==""} usage |
| 105 set root_filename $file_to_analyze |
| 106 regexp {^file:(//)?([^?]*)} $file_to_analyze all x1 root_filename |
| 107 if {![file exists $root_filename]} { |
| 108 puts stderr "No such file: $root_filename" |
| 109 exit 1 |
| 110 } |
| 111 if {![file readable $root_filename]} { |
| 112 puts stderr "File is not readable: $root_filename" |
| 113 exit 1 |
| 114 } |
| 115 set true_file_size [file size $root_filename] |
| 116 if {$true_file_size<512} { |
| 117 puts stderr "Empty or malformed database: $root_filename" |
| 118 exit 1 |
| 119 } |
| 120 |
| 121 # Compute the total file size assuming test_multiplexor is being used. |
| 122 # Assume that SQLITE_ENABLE_8_3_NAMES might be enabled |
| 123 # |
| 124 set extension [file extension $root_filename] |
| 125 set pattern $root_filename |
| 126 append pattern {[0-3][0-9][0-9]} |
| 127 foreach f [glob -nocomplain $pattern] { |
| 128 incr true_file_size [file size $f] |
| 129 set extension {} |
| 130 } |
| 131 if {[string length $extension]>=2 && [string length $extension]<=4} { |
| 132 set pattern [file rootname $root_filename] |
| 133 append pattern {.[0-3][0-9][0-9]} |
| 134 foreach f [glob -nocomplain $pattern] { |
| 135 incr true_file_size [file size $f] |
| 136 } |
| 137 } |
| 138 |
| 139 # Open the database |
| 140 # |
| 141 if {[catch {sqlite3 db $file_to_analyze -uri 1} msg]} { |
| 142 puts stderr "error trying to open $file_to_analyze: $msg" |
| 143 exit 1 |
| 144 } |
| 145 if {$flags(-debug)} { |
| 146 proc dbtrace {txt} {puts $txt; flush stdout;} |
| 147 db trace ::dbtrace |
| 148 } |
| 149 |
| 150 db eval {SELECT count(*) FROM sqlite_master} |
| 151 set pageSize [expr {wide([db one {PRAGMA page_size}])}] |
| 152 |
| 153 if {$flags(-pageinfo)} { |
| 154 db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat} |
| 155 db eval {SELECT name, path, pageno FROM temp.stat ORDER BY pageno} { |
| 156 puts "$pageno $name $path" |
| 157 } |
| 158 exit 0 |
| 159 } |
| 160 if {$flags(-stats)} { |
| 161 db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat} |
| 162 puts "BEGIN;" |
| 163 puts "CREATE TABLE stats(" |
| 164 puts " name STRING, /* Name of table or index */" |
| 165 puts " path INTEGER, /* Path to page from root */" |
| 166 puts " pageno INTEGER, /* Page number */" |
| 167 puts " pagetype STRING, /* 'internal', 'leaf' or 'overflow' */" |
| 168 puts " ncell INTEGER, /* Cells on page (0 for overflow) */" |
| 169 puts " payload INTEGER, /* Bytes of payload on this page */" |
| 170 puts " unused INTEGER, /* Bytes of unused space on this page */" |
| 171 puts " mx_payload INTEGER, /* Largest payload size of all cells */" |
| 172 puts " pgoffset INTEGER, /* Offset of page in file */" |
| 173 puts " pgsize INTEGER /* Size of the page */" |
| 174 puts ");" |
| 175 db eval {SELECT quote(name) || ',' || |
| 176 quote(path) || ',' || |
| 177 quote(pageno) || ',' || |
| 178 quote(pagetype) || ',' || |
| 179 quote(ncell) || ',' || |
| 180 quote(payload) || ',' || |
| 181 quote(unused) || ',' || |
| 182 quote(mx_payload) || ',' || |
| 183 quote(pgoffset) || ',' || |
| 184 quote(pgsize) AS x FROM stat} { |
| 185 puts "INSERT INTO stats VALUES($x);" |
| 186 } |
| 187 puts "COMMIT;" |
| 188 exit 0 |
| 189 } |
| 190 |
| 191 |
| 192 # In-memory database for collecting statistics. This script loops through |
| 193 # the tables and indices in the database being analyzed, adding a row for each |
| 194 # to an in-memory database (for which the schema is shown below). It then |
| 195 # queries the in-memory db to produce the space-analysis report. |
| 196 # |
| 197 sqlite3 mem :memory: |
| 198 if {$flags(-debug)} { |
| 199 proc dbtrace {txt} {puts $txt; flush stdout;} |
| 200 mem trace ::dbtrace |
| 201 } |
| 202 set tabledef {CREATE TABLE space_used( |
| 203 name clob, -- Name of a table or index in the database file |
| 204 tblname clob, -- Name of associated table |
| 205 is_index boolean, -- TRUE if it is an index, false for a table |
| 206 is_without_rowid boolean, -- TRUE if WITHOUT ROWID table |
| 207 nentry int, -- Number of entries in the BTree |
| 208 leaf_entries int, -- Number of leaf entries |
| 209 depth int, -- Depth of the b-tree |
| 210 payload int, -- Total amount of data stored in this table or index |
| 211 ovfl_payload int, -- Total amount of data stored on overflow pages |
| 212 ovfl_cnt int, -- Number of entries that use overflow |
| 213 mx_payload int, -- Maximum payload size |
| 214 int_pages int, -- Number of interior pages used |
| 215 leaf_pages int, -- Number of leaf pages used |
| 216 ovfl_pages int, -- Number of overflow pages used |
| 217 int_unused int, -- Number of unused bytes on interior pages |
| 218 leaf_unused int, -- Number of unused bytes on primary pages |
| 219 ovfl_unused int, -- Number of unused bytes on overflow pages |
| 220 gap_cnt int, -- Number of gaps in the page layout |
| 221 compressed_size int -- Total bytes stored on disk |
| 222 );} |
| 223 mem eval $tabledef |
| 224 |
| 225 # Create a temporary "dbstat" virtual table. |
| 226 # |
| 227 db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat} |
| 228 db eval {CREATE TEMP TABLE dbstat AS SELECT * FROM temp.stat |
| 229 ORDER BY name, path} |
| 230 db eval {DROP TABLE temp.stat} |
| 231 |
| 232 set isCompressed 0 |
| 233 set compressOverhead 0 |
| 234 set depth 0 |
| 235 set sql { SELECT name, tbl_name FROM sqlite_master WHERE rootpage>0 } |
| 236 foreach {name tblname} [concat sqlite_master sqlite_master [db eval $sql]] { |
| 237 |
| 238 set is_index [expr {$name!=$tblname}] |
| 239 set is_without_rowid [is_without_rowid $name] |
| 240 db eval { |
| 241 SELECT |
| 242 sum(ncell) AS nentry, |
| 243 sum((pagetype=='leaf')*ncell) AS leaf_entries, |
| 244 sum(payload) AS payload, |
| 245 sum((pagetype=='overflow') * payload) AS ovfl_payload, |
| 246 sum(path LIKE '%+000000') AS ovfl_cnt, |
| 247 max(mx_payload) AS mx_payload, |
| 248 sum(pagetype=='internal') AS int_pages, |
| 249 sum(pagetype=='leaf') AS leaf_pages, |
| 250 sum(pagetype=='overflow') AS ovfl_pages, |
| 251 sum((pagetype=='internal') * unused) AS int_unused, |
| 252 sum((pagetype=='leaf') * unused) AS leaf_unused, |
| 253 sum((pagetype=='overflow') * unused) AS ovfl_unused, |
| 254 sum(pgsize) AS compressed_size, |
| 255 max((length(CASE WHEN path LIKE '%+%' THEN '' ELSE path END)+3)/4) |
| 256 AS depth |
| 257 FROM temp.dbstat WHERE name = $name |
| 258 } break |
| 259 |
| 260 set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}] |
| 261 set storage [expr {$total_pages*$pageSize}] |
| 262 if {!$isCompressed && $storage>$compressed_size} { |
| 263 set isCompressed 1 |
| 264 set compressOverhead 14 |
| 265 } |
| 266 |
| 267 # Column 'gap_cnt' is set to the number of non-contiguous entries in the |
| 268 # list of pages visited if the b-tree structure is traversed in a top-down |
| 269 # fashion (each node visited before its child-tree is passed). Any overflow |
| 270 # chains present are traversed from start to finish before any child-tree |
| 271 # is. |
| 272 # |
| 273 set gap_cnt 0 |
| 274 set prev 0 |
| 275 db eval { |
| 276 SELECT pageno, pagetype FROM temp.dbstat |
| 277 WHERE name=$name |
| 278 ORDER BY pageno |
| 279 } { |
| 280 if {$prev>0 && $pagetype=="leaf" && $pageno!=$prev+1} { |
| 281 incr gap_cnt |
| 282 } |
| 283 set prev $pageno |
| 284 } |
| 285 mem eval { |
| 286 INSERT INTO space_used VALUES( |
| 287 $name, |
| 288 $tblname, |
| 289 $is_index, |
| 290 $is_without_rowid, |
| 291 $nentry, |
| 292 $leaf_entries, |
| 293 $depth, |
| 294 $payload, |
| 295 $ovfl_payload, |
| 296 $ovfl_cnt, |
| 297 $mx_payload, |
| 298 $int_pages, |
| 299 $leaf_pages, |
| 300 $ovfl_pages, |
| 301 $int_unused, |
| 302 $leaf_unused, |
| 303 $ovfl_unused, |
| 304 $gap_cnt, |
| 305 $compressed_size |
| 306 ); |
| 307 } |
| 308 } |
| 309 |
| 310 proc integerify {real} { |
| 311 if {[string is double -strict $real]} { |
| 312 return [expr {wide($real)}] |
| 313 } else { |
| 314 return 0 |
| 315 } |
| 316 } |
| 317 mem function int integerify |
| 318 |
| 319 # Quote a string for use in an SQL query. Examples: |
| 320 # |
| 321 # [quote {hello world}] == {'hello world'} |
| 322 # [quote {hello world's}] == {'hello world''s'} |
| 323 # |
| 324 proc quote {txt} { |
| 325 return [string map {' ''} $txt] |
| 326 } |
| 327 |
| 328 # Output a title line |
| 329 # |
| 330 proc titleline {title} { |
| 331 if {$title==""} { |
| 332 puts [string repeat * 79] |
| 333 } else { |
| 334 set len [string length $title] |
| 335 set stars [string repeat * [expr 79-$len-5]] |
| 336 puts "*** $title $stars" |
| 337 } |
| 338 } |
| 339 |
| 340 # Generate a single line of output in the statistics section of the |
| 341 # report. |
| 342 # |
| 343 proc statline {title value {extra {}}} { |
| 344 set len [string length $title] |
| 345 set dots [string repeat . [expr 50-$len]] |
| 346 set len [string length $value] |
| 347 set sp2 [string range { } $len end] |
| 348 if {$extra ne ""} { |
| 349 set extra " $extra" |
| 350 } |
| 351 puts "$title$dots $value$sp2$extra" |
| 352 } |
| 353 |
| 354 # Generate a formatted percentage value for $num/$denom |
| 355 # |
| 356 proc percent {num denom {of {}}} { |
| 357 if {$denom==0.0} {return ""} |
| 358 set v [expr {$num*100.0/$denom}] |
| 359 set of {} |
| 360 if {$v==100.0 || $v<0.001 || ($v>1.0 && $v<99.0)} { |
| 361 return [format {%5.1f%% %s} $v $of] |
| 362 } elseif {$v<0.1 || $v>99.9} { |
| 363 return [format {%7.3f%% %s} $v $of] |
| 364 } else { |
| 365 return [format {%6.2f%% %s} $v $of] |
| 366 } |
| 367 } |
| 368 |
| 369 proc divide {num denom} { |
| 370 if {$denom==0} {return 0.0} |
| 371 return [format %.2f [expr double($num)/double($denom)]] |
| 372 } |
| 373 |
| 374 # Generate a subreport that covers some subset of the database. |
| 375 # the $where clause determines which subset to analyze. |
| 376 # |
| 377 proc subreport {title where showFrag} { |
| 378 global pageSize file_pgcnt compressOverhead |
| 379 |
| 380 # Query the in-memory database for the sum of various statistics |
| 381 # for the subset of tables/indices identified by the WHERE clause in |
| 382 # $where. Note that even if the WHERE clause matches no rows, the |
| 383 # following query returns exactly one row (because it is an aggregate). |
| 384 # |
| 385 # The results of the query are stored directly by SQLite into local |
| 386 # variables (i.e. $nentry, $payload etc.). |
| 387 # |
| 388 mem eval " |
| 389 SELECT |
| 390 int(sum( |
| 391 CASE WHEN (is_without_rowid OR is_index) THEN nentry |
| 392 ELSE leaf_entries |
| 393 END |
| 394 )) AS nentry, |
| 395 int(sum(payload)) AS payload, |
| 396 int(sum(ovfl_payload)) AS ovfl_payload, |
| 397 max(mx_payload) AS mx_payload, |
| 398 int(sum(ovfl_cnt)) as ovfl_cnt, |
| 399 int(sum(leaf_pages)) AS leaf_pages, |
| 400 int(sum(int_pages)) AS int_pages, |
| 401 int(sum(ovfl_pages)) AS ovfl_pages, |
| 402 int(sum(leaf_unused)) AS leaf_unused, |
| 403 int(sum(int_unused)) AS int_unused, |
| 404 int(sum(ovfl_unused)) AS ovfl_unused, |
| 405 int(sum(gap_cnt)) AS gap_cnt, |
| 406 int(sum(compressed_size)) AS compressed_size, |
| 407 int(max(depth)) AS depth, |
| 408 count(*) AS cnt |
| 409 FROM space_used WHERE $where" {} {} |
| 410 |
| 411 # Output the sub-report title, nicely decorated with * characters. |
| 412 # |
| 413 puts "" |
| 414 titleline $title |
| 415 puts "" |
| 416 |
| 417 # Calculate statistics and store the results in TCL variables, as follows: |
| 418 # |
| 419 # total_pages: Database pages consumed. |
| 420 # total_pages_percent: Pages consumed as a percentage of the file. |
| 421 # storage: Bytes consumed. |
| 422 # payload_percent: Payload bytes used as a percentage of $storage. |
| 423 # total_unused: Unused bytes on pages. |
| 424 # avg_payload: Average payload per btree entry. |
| 425 # avg_fanout: Average fanout for internal pages. |
| 426 # avg_unused: Average unused bytes per btree entry. |
| 427 # ovfl_cnt_percent: Percentage of btree entries that use overflow pages. |
| 428 # |
| 429 set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}] |
| 430 set total_pages_percent [percent $total_pages $file_pgcnt] |
| 431 set storage [expr {$total_pages*$pageSize}] |
| 432 set payload_percent [percent $payload $storage {of storage consumed}] |
| 433 set total_unused [expr {$ovfl_unused+$int_unused+$leaf_unused}] |
| 434 set avg_payload [divide $payload $nentry] |
| 435 set avg_unused [divide $total_unused $nentry] |
| 436 if {$int_pages>0} { |
| 437 # TODO: Is this formula correct? |
| 438 set nTab [mem eval " |
| 439 SELECT count(*) FROM ( |
| 440 SELECT DISTINCT tblname FROM space_used WHERE $where AND is_index=0 |
| 441 ) |
| 442 "] |
| 443 set avg_fanout [mem eval " |
| 444 SELECT (sum(leaf_pages+int_pages)-$nTab)/sum(int_pages) FROM space_used |
| 445 WHERE $where |
| 446 "] |
| 447 set avg_fanout [format %.2f $avg_fanout] |
| 448 } |
| 449 set ovfl_cnt_percent [percent $ovfl_cnt $nentry {of all entries}] |
| 450 |
| 451 # Print out the sub-report statistics. |
| 452 # |
| 453 statline {Percentage of total database} $total_pages_percent |
| 454 statline {Number of entries} $nentry |
| 455 statline {Bytes of storage consumed} $storage |
| 456 if {$compressed_size!=$storage} { |
| 457 set compressed_size [expr {$compressed_size+$compressOverhead*$total_pages}] |
| 458 set pct [expr {$compressed_size*100.0/$storage}] |
| 459 set pct [format {%5.1f%%} $pct] |
| 460 statline {Bytes used after compression} $compressed_size $pct |
| 461 } |
| 462 statline {Bytes of payload} $payload $payload_percent |
| 463 if {$cnt==1} {statline {B-tree depth} $depth} |
| 464 statline {Average payload per entry} $avg_payload |
| 465 statline {Average unused bytes per entry} $avg_unused |
| 466 if {[info exists avg_fanout]} { |
| 467 statline {Average fanout} $avg_fanout |
| 468 } |
| 469 if {$showFrag && $total_pages>1} { |
| 470 set fragmentation [percent $gap_cnt [expr {$total_pages-1}]] |
| 471 statline {Non-sequential pages} $gap_cnt $fragmentation |
| 472 } |
| 473 statline {Maximum payload per entry} $mx_payload |
| 474 statline {Entries that use overflow} $ovfl_cnt $ovfl_cnt_percent |
| 475 if {$int_pages>0} { |
| 476 statline {Index pages used} $int_pages |
| 477 } |
| 478 statline {Primary pages used} $leaf_pages |
| 479 statline {Overflow pages used} $ovfl_pages |
| 480 statline {Total pages used} $total_pages |
| 481 if {$int_unused>0} { |
| 482 set int_unused_percent [ |
| 483 percent $int_unused [expr {$int_pages*$pageSize}] {of index space}] |
| 484 statline "Unused bytes on index pages" $int_unused $int_unused_percent |
| 485 } |
| 486 statline "Unused bytes on primary pages" $leaf_unused [ |
| 487 percent $leaf_unused [expr {$leaf_pages*$pageSize}] {of primary space}] |
| 488 statline "Unused bytes on overflow pages" $ovfl_unused [ |
| 489 percent $ovfl_unused [expr {$ovfl_pages*$pageSize}] {of overflow space}] |
| 490 statline "Unused bytes on all pages" $total_unused [ |
| 491 percent $total_unused $storage {of all space}] |
| 492 return 1 |
| 493 } |
| 494 |
| 495 # Calculate the overhead in pages caused by auto-vacuum. |
| 496 # |
| 497 # This procedure calculates and returns the number of pages used by the |
| 498 # auto-vacuum 'pointer-map'. If the database does not support auto-vacuum, |
| 499 # then 0 is returned. The two arguments are the size of the database file in |
| 500 # pages and the page size used by the database (in bytes). |
| 501 proc autovacuum_overhead {filePages pageSize} { |
| 502 |
| 503 # Set $autovacuum to non-zero for databases that support auto-vacuum. |
| 504 set autovacuum [db one {PRAGMA auto_vacuum}] |
| 505 |
| 506 # If the database is not an auto-vacuum database or the file consists |
| 507 # of one page only then there is no overhead for auto-vacuum. Return zero. |
| 508 if {0==$autovacuum || $filePages==1} { |
| 509 return 0 |
| 510 } |
| 511 |
| 512 # The number of entries on each pointer map page. The layout of the |
| 513 # database file is one pointer-map page, followed by $ptrsPerPage other |
| 514 # pages, followed by a pointer-map page etc. The first pointer-map page |
| 515 # is the second page of the file overall. |
| 516 set ptrsPerPage [expr double($pageSize/5)] |
| 517 |
| 518 # Return the number of pointer map pages in the database. |
| 519 return [expr wide(ceil( ($filePages-1.0)/($ptrsPerPage+1.0) ))] |
| 520 } |
| 521 |
| 522 |
| 523 # Calculate the summary statistics for the database and store the results |
| 524 # in TCL variables. They are output below. Variables are as follows: |
| 525 # |
| 526 # pageSize: Size of each page in bytes. |
| 527 # file_bytes: File size in bytes. |
| 528 # file_pgcnt: Number of pages in the file. |
| 529 # file_pgcnt2: Number of pages in the file (calculated). |
| 530 # av_pgcnt: Pages consumed by the auto-vacuum pointer-map. |
| 531 # av_percent: Percentage of the file consumed by auto-vacuum pointer-map. |
| 532 # inuse_pgcnt: Data pages in the file. |
| 533 # inuse_percent: Percentage of pages used to store data. |
| 534 # free_pgcnt: Free pages calculated as (<total pages> - <in-use pages>) |
| 535 # free_pgcnt2: Free pages in the file according to the file header. |
| 536 # free_percent: Percentage of file consumed by free pages (calculated). |
| 537 # free_percent2: Percentage of file consumed by free pages (header). |
| 538 # ntable: Number of tables in the db. |
| 539 # nindex: Number of indices in the db. |
| 540 # nautoindex: Number of indices created automatically. |
| 541 # nmanindex: Number of indices created manually. |
| 542 # user_payload: Number of bytes of payload in table btrees |
| 543 # (not including sqlite_master) |
| 544 # user_percent: $user_payload as a percentage of total file size. |
| 545 |
| 546 ### The following, setting $file_bytes based on the actual size of the file |
| 547 ### on disk, causes this tool to choke on zipvfs databases. So set it based |
| 548 ### on the return of [PRAGMA page_count] instead. |
| 549 if 0 { |
| 550 set file_bytes [file size $file_to_analyze] |
| 551 set file_pgcnt [expr {$file_bytes/$pageSize}] |
| 552 } |
| 553 set file_pgcnt [db one {PRAGMA page_count}] |
| 554 set file_bytes [expr {$file_pgcnt * $pageSize}] |
| 555 |
| 556 set av_pgcnt [autovacuum_overhead $file_pgcnt $pageSize] |
| 557 set av_percent [percent $av_pgcnt $file_pgcnt] |
| 558 |
| 559 set sql {SELECT sum(leaf_pages+int_pages+ovfl_pages) FROM space_used} |
| 560 set inuse_pgcnt [expr wide([mem eval $sql])] |
| 561 set inuse_percent [percent $inuse_pgcnt $file_pgcnt] |
| 562 |
| 563 set free_pgcnt [expr {$file_pgcnt-$inuse_pgcnt-$av_pgcnt}] |
| 564 set free_percent [percent $free_pgcnt $file_pgcnt] |
| 565 set free_pgcnt2 [db one {PRAGMA freelist_count}] |
| 566 set free_percent2 [percent $free_pgcnt2 $file_pgcnt] |
| 567 |
| 568 set file_pgcnt2 [expr {$inuse_pgcnt+$free_pgcnt2+$av_pgcnt}] |
| 569 |
| 570 set ntable [db eval {SELECT count(*)+1 FROM sqlite_master WHERE type='table'}] |
| 571 set nindex [db eval {SELECT count(*) FROM sqlite_master WHERE type='index'}] |
| 572 set sql {SELECT count(*) FROM sqlite_master WHERE name LIKE 'sqlite_autoindex%'} |
| 573 set nautoindex [db eval $sql] |
| 574 set nmanindex [expr {$nindex-$nautoindex}] |
| 575 |
| 576 # set total_payload [mem eval "SELECT sum(payload) FROM space_used"] |
| 577 set user_payload [mem one {SELECT int(sum(payload)) FROM space_used |
| 578 WHERE NOT is_index AND name NOT LIKE 'sqlite_master'}] |
| 579 set user_percent [percent $user_payload $file_bytes] |
| 580 |
| 581 # Output the summary statistics calculated above. |
| 582 # |
| 583 puts "/** Disk-Space Utilization Report For $root_filename" |
| 584 puts "" |
| 585 statline {Page size in bytes} $pageSize |
| 586 statline {Pages in the whole file (measured)} $file_pgcnt |
| 587 statline {Pages in the whole file (calculated)} $file_pgcnt2 |
| 588 statline {Pages that store data} $inuse_pgcnt $inuse_percent |
| 589 statline {Pages on the freelist (per header)} $free_pgcnt2 $free_percent2 |
| 590 statline {Pages on the freelist (calculated)} $free_pgcnt $free_percent |
| 591 statline {Pages of auto-vacuum overhead} $av_pgcnt $av_percent |
| 592 statline {Number of tables in the database} $ntable |
| 593 statline {Number of indices} $nindex |
| 594 statline {Number of defined indices} $nmanindex |
| 595 statline {Number of implied indices} $nautoindex |
| 596 if {$isCompressed} { |
| 597 statline {Size of uncompressed content in bytes} $file_bytes |
| 598 set efficiency [percent $true_file_size $file_bytes] |
| 599 statline {Size of compressed file on disk} $true_file_size $efficiency |
| 600 } else { |
| 601 statline {Size of the file in bytes} $file_bytes |
| 602 } |
| 603 statline {Bytes of user payload stored} $user_payload $user_percent |
| 604 |
| 605 # Output table rankings |
| 606 # |
| 607 puts "" |
| 608 titleline "Page counts for all tables with their indices" |
| 609 puts "" |
| 610 mem eval {SELECT tblname, count(*) AS cnt, |
| 611 int(sum(int_pages+leaf_pages+ovfl_pages)) AS size |
| 612 FROM space_used GROUP BY tblname ORDER BY size+0 DESC, tblname} {} { |
| 613 statline [string toupper $tblname] $size [percent $size $file_pgcnt] |
| 614 } |
| 615 puts "" |
| 616 titleline "Page counts for all tables and indices separately" |
| 617 puts "" |
| 618 mem eval { |
| 619 SELECT |
| 620 upper(name) AS nm, |
| 621 int(int_pages+leaf_pages+ovfl_pages) AS size |
| 622 FROM space_used |
| 623 ORDER BY size+0 DESC, name} {} { |
| 624 statline $nm $size [percent $size $file_pgcnt] |
| 625 } |
| 626 if {$isCompressed} { |
| 627 puts "" |
| 628 titleline "Bytes of disk space used after compression" |
| 629 puts "" |
| 630 set csum 0 |
| 631 mem eval {SELECT tblname, |
| 632 int(sum(compressed_size)) + |
| 633 $compressOverhead*sum(int_pages+leaf_pages+ovfl_pages) |
| 634 AS csize |
| 635 FROM space_used GROUP BY tblname ORDER BY csize+0 DESC, tblname} {} { |
| 636 incr csum $csize |
| 637 statline [string toupper $tblname] $csize [percent $csize $true_file_size] |
| 638 } |
| 639 set overhead [expr {$true_file_size - $csum}] |
| 640 if {$overhead>0} { |
| 641 statline {Header and free space} $overhead [percent $overhead $true_file_siz
e] |
| 642 } |
| 643 } |
| 644 |
| 645 # Output subreports |
| 646 # |
| 647 if {$nindex>0} { |
| 648 subreport {All tables and indices} 1 0 |
| 649 } |
| 650 subreport {All tables} {NOT is_index} 0 |
| 651 if {$nindex>0} { |
| 652 subreport {All indices} {is_index} 0 |
| 653 } |
| 654 foreach tbl [mem eval {SELECT DISTINCT tblname name FROM space_used |
| 655 ORDER BY name}] { |
| 656 set qn [quote $tbl] |
| 657 set name [string toupper $tbl] |
| 658 set n [mem eval {SELECT count(*) FROM space_used WHERE tblname=$tbl}] |
| 659 if {$n>1} { |
| 660 set idxlist [mem eval "SELECT name FROM space_used |
| 661 WHERE tblname='$qn' AND is_index |
| 662 ORDER BY 1"] |
| 663 subreport "Table $name and all its indices" "tblname='$qn'" 0 |
| 664 subreport "Table $name w/o any indices" "name='$qn'" 1 |
| 665 if {[llength $idxlist]>1} { |
| 666 subreport "Indices of table $name" "tblname='$qn' AND is_index" 0 |
| 667 } |
| 668 foreach idx $idxlist { |
| 669 set qidx [quote $idx] |
| 670 subreport "Index [string toupper $idx] of table $name" "name='$qidx'" 1 |
| 671 } |
| 672 } else { |
| 673 subreport "Table $name" "name='$qn'" 1 |
| 674 } |
| 675 } |
| 676 |
| 677 # Output instructions on what the numbers above mean. |
| 678 # |
| 679 puts "" |
| 680 titleline Definitions |
| 681 puts { |
| 682 Page size in bytes |
| 683 |
| 684 The number of bytes in a single page of the database file. |
| 685 Usually 1024. |
| 686 |
| 687 Number of pages in the whole file |
| 688 } |
| 689 puts " The number of $pageSize-byte pages that go into forming the complete |
| 690 database" |
| 691 puts { |
| 692 Pages that store data |
| 693 |
| 694 The number of pages that store data, either as primary B*Tree pages or |
| 695 as overflow pages. The number at the right is the data pages divided by |
| 696 the total number of pages in the file. |
| 697 |
| 698 Pages on the freelist |
| 699 |
| 700 The number of pages that are not currently in use but are reserved for |
| 701 future use. The percentage at the right is the number of freelist pages |
| 702 divided by the total number of pages in the file. |
| 703 |
| 704 Pages of auto-vacuum overhead |
| 705 |
| 706 The number of pages that store data used by the database to facilitate |
| 707 auto-vacuum. This is zero for databases that do not support auto-vacuum. |
| 708 |
| 709 Number of tables in the database |
| 710 |
| 711 The number of tables in the database, including the SQLITE_MASTER table |
| 712 used to store schema information. |
| 713 |
| 714 Number of indices |
| 715 |
| 716 The total number of indices in the database. |
| 717 |
| 718 Number of defined indices |
| 719 |
| 720 The number of indices created using an explicit CREATE INDEX statement. |
| 721 |
| 722 Number of implied indices |
| 723 |
| 724 The number of indices used to implement PRIMARY KEY or UNIQUE constraints |
| 725 on tables. |
| 726 |
| 727 Size of the file in bytes |
| 728 |
| 729 The total amount of disk space used by the entire database files. |
| 730 |
| 731 Bytes of user payload stored |
| 732 |
| 733 The total number of bytes of user payload stored in the database. The |
| 734 schema information in the SQLITE_MASTER table is not counted when |
| 735 computing this number. The percentage at the right shows the payload |
| 736 divided by the total file size. |
| 737 |
| 738 Percentage of total database |
| 739 |
| 740 The amount of the complete database file that is devoted to storing |
| 741 information described by this category. |
| 742 |
| 743 Number of entries |
| 744 |
| 745 The total number of B-Tree key/value pairs stored under this category. |
| 746 |
| 747 Bytes of storage consumed |
| 748 |
| 749 The total amount of disk space required to store all B-Tree entries |
| 750 under this category. The is the total number of pages used times |
| 751 the pages size. |
| 752 |
| 753 Bytes of payload |
| 754 |
| 755 The amount of payload stored under this category. Payload is the data |
| 756 part of table entries and the key part of index entries. The percentage |
| 757 at the right is the bytes of payload divided by the bytes of storage |
| 758 consumed. |
| 759 |
| 760 Average payload per entry |
| 761 |
| 762 The average amount of payload on each entry. This is just the bytes of |
| 763 payload divided by the number of entries. |
| 764 |
| 765 Average unused bytes per entry |
| 766 |
| 767 The average amount of free space remaining on all pages under this |
| 768 category on a per-entry basis. This is the number of unused bytes on |
| 769 all pages divided by the number of entries. |
| 770 |
| 771 Non-sequential pages |
| 772 |
| 773 The number of pages in the table or index that are out of sequence. |
| 774 Many filesystems are optimized for sequential file access so a small |
| 775 number of non-sequential pages might result in faster queries, |
| 776 especially for larger database files that do not fit in the disk cache. |
| 777 Note that after running VACUUM, the root page of each table or index is |
| 778 at the beginning of the database file and all other pages are in a |
| 779 separate part of the database file, resulting in a single non- |
| 780 sequential page. |
| 781 |
| 782 Maximum payload per entry |
| 783 |
| 784 The largest payload size of any entry. |
| 785 |
| 786 Entries that use overflow |
| 787 |
| 788 The number of entries that user one or more overflow pages. |
| 789 |
| 790 Total pages used |
| 791 |
| 792 This is the number of pages used to hold all information in the current |
| 793 category. This is the sum of index, primary, and overflow pages. |
| 794 |
| 795 Index pages used |
| 796 |
| 797 This is the number of pages in a table B-tree that hold only key (rowid) |
| 798 information and no data. |
| 799 |
| 800 Primary pages used |
| 801 |
| 802 This is the number of B-tree pages that hold both key and data. |
| 803 |
| 804 Overflow pages used |
| 805 |
| 806 The total number of overflow pages used for this category. |
| 807 |
| 808 Unused bytes on index pages |
| 809 |
| 810 The total number of bytes of unused space on all index pages. The |
| 811 percentage at the right is the number of unused bytes divided by the |
| 812 total number of bytes on index pages. |
| 813 |
| 814 Unused bytes on primary pages |
| 815 |
| 816 The total number of bytes of unused space on all primary pages. The |
| 817 percentage at the right is the number of unused bytes divided by the |
| 818 total number of bytes on primary pages. |
| 819 |
| 820 Unused bytes on overflow pages |
| 821 |
| 822 The total number of bytes of unused space on all overflow pages. The |
| 823 percentage at the right is the number of unused bytes divided by the |
| 824 total number of bytes on overflow pages. |
| 825 |
| 826 Unused bytes on all pages |
| 827 |
| 828 The total number of bytes of unused space on all primary and overflow |
| 829 pages. The percentage at the right is the number of unused bytes |
| 830 divided by the total number of bytes. |
| 831 } |
| 832 |
| 833 # Output a dump of the in-memory database. This can be used for more |
| 834 # complex offline analysis. |
| 835 # |
| 836 titleline {} |
| 837 puts "The entire text of this report can be sourced into any SQL database" |
| 838 puts "engine for further analysis. All of the text above is an SQL comment." |
| 839 puts "The data used to generate this report follows:" |
| 840 puts "*/" |
| 841 puts "BEGIN;" |
| 842 puts $tabledef |
| 843 unset -nocomplain x |
| 844 mem eval {SELECT * FROM space_used} x { |
| 845 puts -nonewline "INSERT INTO space_used VALUES" |
| 846 set sep ( |
| 847 foreach col $x(*) { |
| 848 set v $x($col) |
| 849 if {$v=="" || ![string is double $v]} {set v '[quote $v]'} |
| 850 puts -nonewline $sep$v |
| 851 set sep , |
| 852 } |
| 853 puts ");" |
| 854 } |
| 855 puts "COMMIT;" |
| 856 |
| 857 } err]} { |
| 858 puts "ERROR: $err" |
| 859 puts $errorInfo |
| 860 exit 1 |
| 861 } |
OLD | NEW |