| OLD | NEW |
| (Empty) |
| 1 # Run this TCL script using "testfixture" in order get a report that shows | |
| 2 # how much disk space is used by a particular data to actually store data | |
| 3 # versus how much space is unused. | |
| 4 # | |
| 5 | |
| 6 if {[catch { | |
| 7 | |
| 8 # Get the name of the database to analyze | |
| 9 # | |
| 10 #set argv $argv0 | |
| 11 if {[llength $argv]!=1} { | |
| 12 puts stderr "Usage: $argv0 database-name" | |
| 13 exit 1 | |
| 14 } | |
| 15 set file_to_analyze [lindex $argv 0] | |
| 16 if {![file exists $file_to_analyze]} { | |
| 17 puts stderr "No such file: $file_to_analyze" | |
| 18 exit 1 | |
| 19 } | |
| 20 if {![file readable $file_to_analyze]} { | |
| 21 puts stderr "File is not readable: $file_to_analyze" | |
| 22 exit 1 | |
| 23 } | |
| 24 if {[file size $file_to_analyze]<512} { | |
| 25 puts stderr "Empty or malformed database: $file_to_analyze" | |
| 26 exit 1 | |
| 27 } | |
| 28 | |
| 29 # Maximum distance between pages before we consider it a "gap" | |
| 30 # | |
| 31 set MAXGAP 3 | |
| 32 | |
| 33 # Open the database | |
| 34 # | |
| 35 sqlite3 db [lindex $argv 0] | |
| 36 set DB [btree_open [lindex $argv 0] 1000 0] | |
| 37 | |
| 38 # In-memory database for collecting statistics. This script loops through | |
| 39 # the tables and indices in the database being analyzed, adding a row for each | |
| 40 # to an in-memory database (for which the schema is shown below). It then | |
| 41 # queries the in-memory db to produce the space-analysis report. | |
| 42 # | |
| 43 sqlite3 mem :memory: | |
| 44 set tabledef\ | |
| 45 {CREATE TABLE space_used( | |
| 46 name clob, -- Name of a table or index in the database file | |
| 47 tblname clob, -- Name of associated table | |
| 48 is_index boolean, -- TRUE if it is an index, false for a table | |
| 49 nentry int, -- Number of entries in the BTree | |
| 50 leaf_entries int, -- Number of leaf entries | |
| 51 payload int, -- Total amount of data stored in this table or index | |
| 52 ovfl_payload int, -- Total amount of data stored on overflow pages | |
| 53 ovfl_cnt int, -- Number of entries that use overflow | |
| 54 mx_payload int, -- Maximum payload size | |
| 55 int_pages int, -- Number of interior pages used | |
| 56 leaf_pages int, -- Number of leaf pages used | |
| 57 ovfl_pages int, -- Number of overflow pages used | |
| 58 int_unused int, -- Number of unused bytes on interior pages | |
| 59 leaf_unused int, -- Number of unused bytes on primary pages | |
| 60 ovfl_unused int, -- Number of unused bytes on overflow pages | |
| 61 gap_cnt int -- Number of gaps in the page layout | |
| 62 );} | |
| 63 mem eval $tabledef | |
| 64 | |
| 65 proc integerify {real} { | |
| 66 if {[string is double -strict $real]} { | |
| 67 return [expr {int($real)}] | |
| 68 } else { | |
| 69 return 0 | |
| 70 } | |
| 71 } | |
| 72 mem function int integerify | |
| 73 | |
| 74 # Quote a string for use in an SQL query. Examples: | |
| 75 # | |
| 76 # [quote {hello world}] == {'hello world'} | |
| 77 # [quote {hello world's}] == {'hello world''s'} | |
| 78 # | |
| 79 proc quote {txt} { | |
| 80 regsub -all ' $txt '' q | |
| 81 return '$q' | |
| 82 } | |
| 83 | |
| 84 # This proc is a wrapper around the btree_cursor_info command. The | |
| 85 # second argument is an open btree cursor returned by [btree_cursor]. | |
| 86 # The first argument is the name of an array variable that exists in | |
| 87 # the scope of the caller. If the third argument is non-zero, then | |
| 88 # info is returned for the page that lies $up entries upwards in the | |
| 89 # tree-structure. (i.e. $up==1 returns the parent page, $up==2 the | |
| 90 # grandparent etc.) | |
| 91 # | |
| 92 # The following entries in that array are filled in with information retrieved | |
| 93 # using [btree_cursor_info]: | |
| 94 # | |
| 95 # $arrayvar(page_no) = The page number | |
| 96 # $arrayvar(entry_no) = The entry number | |
| 97 # $arrayvar(page_entries) = Total number of entries on this page | |
| 98 # $arrayvar(cell_size) = Cell size (local payload + header) | |
| 99 # $arrayvar(page_freebytes) = Number of free bytes on this page | |
| 100 # $arrayvar(page_freeblocks) = Number of free blocks on the page | |
| 101 # $arrayvar(payload_bytes) = Total payload size (local + overflow) | |
| 102 # $arrayvar(header_bytes) = Header size in bytes | |
| 103 # $arrayvar(local_payload_bytes) = Local payload size | |
| 104 # $arrayvar(parent) = Parent page number | |
| 105 # | |
| 106 proc cursor_info {arrayvar csr {up 0}} { | |
| 107 upvar $arrayvar a | |
| 108 foreach [list a(page_no) \ | |
| 109 a(entry_no) \ | |
| 110 a(page_entries) \ | |
| 111 a(cell_size) \ | |
| 112 a(page_freebytes) \ | |
| 113 a(page_freeblocks) \ | |
| 114 a(payload_bytes) \ | |
| 115 a(header_bytes) \ | |
| 116 a(local_payload_bytes) \ | |
| 117 a(parent) \ | |
| 118 a(first_ovfl) ] [btree_cursor_info $csr $up] break | |
| 119 } | |
| 120 | |
| 121 # Determine the page-size of the database. This global variable is used | |
| 122 # throughout the script. | |
| 123 # | |
| 124 set pageSize [db eval {PRAGMA page_size}] | |
| 125 | |
| 126 # Analyze every table in the database, one at a time. | |
| 127 # | |
| 128 # The following query returns the name and root-page of each table in the | |
| 129 # database, including the sqlite_master table. | |
| 130 # | |
| 131 set sql { | |
| 132 SELECT name, rootpage FROM sqlite_master | |
| 133 WHERE type='table' AND rootpage>0 | |
| 134 UNION ALL | |
| 135 SELECT 'sqlite_master', 1 | |
| 136 ORDER BY 1 | |
| 137 } | |
| 138 set wideZero [expr {10000000000 - 10000000000}] | |
| 139 foreach {name rootpage} [db eval $sql] { | |
| 140 puts stderr "Analyzing table $name..." | |
| 141 | |
| 142 # Code below traverses the table being analyzed (table name $name), using the | |
| 143 # btree cursor $cursor. Statistics related to table $name are accumulated in | |
| 144 # the following variables: | |
| 145 # | |
| 146 set total_payload $wideZero ;# Payload space used by all entries | |
| 147 set total_ovfl $wideZero ;# Payload space on overflow pages | |
| 148 set unused_int $wideZero ;# Unused space on interior nodes | |
| 149 set unused_leaf $wideZero ;# Unused space on leaf nodes | |
| 150 set unused_ovfl $wideZero ;# Unused space on overflow pages | |
| 151 set cnt_ovfl $wideZero ;# Number of entries that use overflows | |
| 152 set cnt_leaf_entry $wideZero ;# Number of leaf entries | |
| 153 set cnt_int_entry $wideZero ;# Number of interor entries | |
| 154 set mx_payload $wideZero ;# Maximum payload size | |
| 155 set ovfl_pages $wideZero ;# Number of overflow pages used | |
| 156 set leaf_pages $wideZero ;# Number of leaf pages | |
| 157 set int_pages $wideZero ;# Number of interior pages | |
| 158 set gap_cnt 0 ;# Number of holes in the page sequence | |
| 159 set prev_pgno 0 ;# Last page number seen | |
| 160 | |
| 161 # As the btree is traversed, the array variable $seen($pgno) is set to 1 | |
| 162 # the first time page $pgno is encountered. | |
| 163 # | |
| 164 catch {unset seen} | |
| 165 | |
| 166 # The following loop runs once for each entry in table $name. The table | |
| 167 # is traversed using the btree cursor stored in variable $csr | |
| 168 # | |
| 169 set csr [btree_cursor $DB $rootpage 0] | |
| 170 for {btree_first $csr} {![btree_eof $csr]} {btree_next $csr} { | |
| 171 incr cnt_leaf_entry | |
| 172 | |
| 173 # Retrieve information about the entry the btree-cursor points to into | |
| 174 # the array variable $ci (cursor info). | |
| 175 # | |
| 176 cursor_info ci $csr | |
| 177 | |
| 178 # Check if the payload of this entry is greater than the current | |
| 179 # $mx_payload statistic for the table. Also increase the $total_payload | |
| 180 # statistic. | |
| 181 # | |
| 182 if {$ci(payload_bytes)>$mx_payload} {set mx_payload $ci(payload_bytes)} | |
| 183 incr total_payload $ci(payload_bytes) | |
| 184 | |
| 185 # If this entry uses overflow pages, then update the $cnt_ovfl, | |
| 186 # $total_ovfl, $ovfl_pages and $unused_ovfl statistics. | |
| 187 # | |
| 188 set ovfl [expr {$ci(payload_bytes)-$ci(local_payload_bytes)}] | |
| 189 if {$ovfl} { | |
| 190 incr cnt_ovfl | |
| 191 incr total_ovfl $ovfl | |
| 192 set n [expr {int(ceil($ovfl/($pageSize-4.0)))}] | |
| 193 incr ovfl_pages $n | |
| 194 incr unused_ovfl [expr {$n*($pageSize-4) - $ovfl}] | |
| 195 set pglist [btree_ovfl_info $DB $csr] | |
| 196 } else { | |
| 197 set pglist {} | |
| 198 } | |
| 199 | |
| 200 # If this is the first table entry analyzed for the page, then update | |
| 201 # the page-related statistics $leaf_pages and $unused_leaf. Also, if | |
| 202 # this page has a parent page that has not been analyzed, retrieve | |
| 203 # info for the parent and update statistics for it too. | |
| 204 # | |
| 205 if {![info exists seen($ci(page_no))]} { | |
| 206 set seen($ci(page_no)) 1 | |
| 207 incr leaf_pages | |
| 208 incr unused_leaf $ci(page_freebytes) | |
| 209 set pglist "$ci(page_no) $pglist" | |
| 210 | |
| 211 # Now check if the page has a parent that has not been analyzed. If | |
| 212 # so, update the $int_pages, $cnt_int_entry and $unused_int statistics | |
| 213 # accordingly. Then check if the parent page has a parent that has | |
| 214 # not yet been analyzed etc. | |
| 215 # | |
| 216 # set parent $ci(parent_page_no) | |
| 217 for {set up 1} \ | |
| 218 {$ci(parent)!=0 && ![info exists seen($ci(parent))]} {incr up} \ | |
| 219 { | |
| 220 # Mark the parent as seen. | |
| 221 # | |
| 222 set seen($ci(parent)) 1 | |
| 223 | |
| 224 # Retrieve info for the parent and update statistics. | |
| 225 cursor_info ci $csr $up | |
| 226 incr int_pages | |
| 227 incr cnt_int_entry $ci(page_entries) | |
| 228 incr unused_int $ci(page_freebytes) | |
| 229 | |
| 230 # parent pages come before their first child | |
| 231 set pglist "$ci(page_no) $pglist" | |
| 232 } | |
| 233 } | |
| 234 | |
| 235 # Check the page list for fragmentation | |
| 236 # | |
| 237 foreach pg $pglist { | |
| 238 if {$pg!=$prev_pgno+1 && $prev_pgno>0} { | |
| 239 incr gap_cnt | |
| 240 } | |
| 241 set prev_pgno $pg | |
| 242 } | |
| 243 } | |
| 244 btree_close_cursor $csr | |
| 245 | |
| 246 # Handle the special case where a table contains no data. In this case | |
| 247 # all statistics are zero, except for the number of leaf pages (1) and | |
| 248 # the unused bytes on leaf pages ($pageSize - 8). | |
| 249 # | |
| 250 # An exception to the above is the sqlite_master table. If it is empty | |
| 251 # then all statistics are zero except for the number of leaf pages (1), | |
| 252 # and the number of unused bytes on leaf pages ($pageSize - 112). | |
| 253 # | |
| 254 if {[llength [array names seen]]==0} { | |
| 255 set leaf_pages 1 | |
| 256 if {$rootpage==1} { | |
| 257 set unused_leaf [expr {$pageSize-112}] | |
| 258 } else { | |
| 259 set unused_leaf [expr {$pageSize-8}] | |
| 260 } | |
| 261 } | |
| 262 | |
| 263 # Insert the statistics for the table analyzed into the in-memory database. | |
| 264 # | |
| 265 set sql "INSERT INTO space_used VALUES(" | |
| 266 append sql [quote $name] | |
| 267 append sql ",[quote $name]" | |
| 268 append sql ",0" | |
| 269 append sql ",[expr {$cnt_leaf_entry+$cnt_int_entry}]" | |
| 270 append sql ",$cnt_leaf_entry" | |
| 271 append sql ",$total_payload" | |
| 272 append sql ",$total_ovfl" | |
| 273 append sql ",$cnt_ovfl" | |
| 274 append sql ",$mx_payload" | |
| 275 append sql ",$int_pages" | |
| 276 append sql ",$leaf_pages" | |
| 277 append sql ",$ovfl_pages" | |
| 278 append sql ",$unused_int" | |
| 279 append sql ",$unused_leaf" | |
| 280 append sql ",$unused_ovfl" | |
| 281 append sql ",$gap_cnt" | |
| 282 append sql ); | |
| 283 mem eval $sql | |
| 284 } | |
| 285 | |
| 286 # Analyze every index in the database, one at a time. | |
| 287 # | |
| 288 # The query below returns the name, associated table and root-page number | |
| 289 # for every index in the database. | |
| 290 # | |
| 291 set sql { | |
| 292 SELECT name, tbl_name, rootpage FROM sqlite_master WHERE type='index' | |
| 293 ORDER BY 2, 1 | |
| 294 } | |
| 295 foreach {name tbl_name rootpage} [db eval $sql] { | |
| 296 puts stderr "Analyzing index $name of table $tbl_name..." | |
| 297 | |
| 298 # Code below traverses the index being analyzed (index name $name), using the | |
| 299 # btree cursor $cursor. Statistics related to index $name are accumulated in | |
| 300 # the following variables: | |
| 301 # | |
| 302 set total_payload $wideZero ;# Payload space used by all entries | |
| 303 set total_ovfl $wideZero ;# Payload space on overflow pages | |
| 304 set unused_leaf $wideZero ;# Unused space on leaf nodes | |
| 305 set unused_ovfl $wideZero ;# Unused space on overflow pages | |
| 306 set cnt_ovfl $wideZero ;# Number of entries that use overflows | |
| 307 set cnt_leaf_entry $wideZero ;# Number of leaf entries | |
| 308 set mx_payload $wideZero ;# Maximum payload size | |
| 309 set ovfl_pages $wideZero ;# Number of overflow pages used | |
| 310 set leaf_pages $wideZero ;# Number of leaf pages | |
| 311 set gap_cnt 0 ;# Number of holes in the page sequence | |
| 312 set prev_pgno 0 ;# Last page number seen | |
| 313 | |
| 314 # As the btree is traversed, the array variable $seen($pgno) is set to 1 | |
| 315 # the first time page $pgno is encountered. | |
| 316 # | |
| 317 catch {unset seen} | |
| 318 | |
| 319 # The following loop runs once for each entry in index $name. The index | |
| 320 # is traversed using the btree cursor stored in variable $csr | |
| 321 # | |
| 322 set csr [btree_cursor $DB $rootpage 0] | |
| 323 for {btree_first $csr} {![btree_eof $csr]} {btree_next $csr} { | |
| 324 incr cnt_leaf_entry | |
| 325 | |
| 326 # Retrieve information about the entry the btree-cursor points to into | |
| 327 # the array variable $ci (cursor info). | |
| 328 # | |
| 329 cursor_info ci $csr | |
| 330 | |
| 331 # Check if the payload of this entry is greater than the current | |
| 332 # $mx_payload statistic for the table. Also increase the $total_payload | |
| 333 # statistic. | |
| 334 # | |
| 335 set payload [btree_keysize $csr] | |
| 336 if {$payload>$mx_payload} {set mx_payload $payload} | |
| 337 incr total_payload $payload | |
| 338 | |
| 339 # If this entry uses overflow pages, then update the $cnt_ovfl, | |
| 340 # $total_ovfl, $ovfl_pages and $unused_ovfl statistics. | |
| 341 # | |
| 342 set ovfl [expr {$payload-$ci(local_payload_bytes)}] | |
| 343 if {$ovfl} { | |
| 344 incr cnt_ovfl | |
| 345 incr total_ovfl $ovfl | |
| 346 set n [expr {int(ceil($ovfl/($pageSize-4.0)))}] | |
| 347 incr ovfl_pages $n | |
| 348 incr unused_ovfl [expr {$n*($pageSize-4) - $ovfl}] | |
| 349 } | |
| 350 | |
| 351 # If this is the first table entry analyzed for the page, then update | |
| 352 # the page-related statistics $leaf_pages and $unused_leaf. | |
| 353 # | |
| 354 if {![info exists seen($ci(page_no))]} { | |
| 355 set seen($ci(page_no)) 1 | |
| 356 incr leaf_pages | |
| 357 incr unused_leaf $ci(page_freebytes) | |
| 358 set pg $ci(page_no) | |
| 359 if {$prev_pgno>0 && $pg!=$prev_pgno+1} { | |
| 360 incr gap_cnt | |
| 361 } | |
| 362 set prev_pgno $ci(page_no) | |
| 363 } | |
| 364 } | |
| 365 btree_close_cursor $csr | |
| 366 | |
| 367 # Handle the special case where a index contains no data. In this case | |
| 368 # all statistics are zero, except for the number of leaf pages (1) and | |
| 369 # the unused bytes on leaf pages ($pageSize - 8). | |
| 370 # | |
| 371 if {[llength [array names seen]]==0} { | |
| 372 set leaf_pages 1 | |
| 373 set unused_leaf [expr {$pageSize-8}] | |
| 374 } | |
| 375 | |
| 376 # Insert the statistics for the index analyzed into the in-memory database. | |
| 377 # | |
| 378 set sql "INSERT INTO space_used VALUES(" | |
| 379 append sql [quote $name] | |
| 380 append sql ",[quote $tbl_name]" | |
| 381 append sql ",1" | |
| 382 append sql ",$cnt_leaf_entry" | |
| 383 append sql ",$cnt_leaf_entry" | |
| 384 append sql ",$total_payload" | |
| 385 append sql ",$total_ovfl" | |
| 386 append sql ",$cnt_ovfl" | |
| 387 append sql ",$mx_payload" | |
| 388 append sql ",0" | |
| 389 append sql ",$leaf_pages" | |
| 390 append sql ",$ovfl_pages" | |
| 391 append sql ",0" | |
| 392 append sql ",$unused_leaf" | |
| 393 append sql ",$unused_ovfl" | |
| 394 append sql ",$gap_cnt" | |
| 395 append sql ); | |
| 396 mem eval $sql | |
| 397 } | |
| 398 | |
| 399 # Generate a single line of output in the statistics section of the | |
| 400 # report. | |
| 401 # | |
| 402 proc statline {title value {extra {}}} { | |
| 403 set len [string length $title] | |
| 404 set dots [string range {......................................} $len end] | |
| 405 set len [string length $value] | |
| 406 set sp2 [string range { } $len end] | |
| 407 if {$extra ne ""} { | |
| 408 set extra " $extra" | |
| 409 } | |
| 410 puts "$title$dots $value$sp2$extra" | |
| 411 } | |
| 412 | |
| 413 # Generate a formatted percentage value for $num/$denom | |
| 414 # | |
| 415 proc percent {num denom {of {}}} { | |
| 416 if {$denom==0.0} {return ""} | |
| 417 set v [expr {$num*100.0/$denom}] | |
| 418 set of {} | |
| 419 if {$v==100.0 || $v<0.001 || ($v>1.0 && $v<99.0)} { | |
| 420 return [format {%5.1f%% %s} $v $of] | |
| 421 } elseif {$v<0.1 || $v>99.9} { | |
| 422 return [format {%7.3f%% %s} $v $of] | |
| 423 } else { | |
| 424 return [format {%6.2f%% %s} $v $of] | |
| 425 } | |
| 426 } | |
| 427 | |
| 428 proc divide {num denom} { | |
| 429 if {$denom==0} {return 0.0} | |
| 430 return [format %.2f [expr double($num)/double($denom)]] | |
| 431 } | |
| 432 | |
| 433 # Generate a subreport that covers some subset of the database. | |
| 434 # the $where clause determines which subset to analyze. | |
| 435 # | |
| 436 proc subreport {title where} { | |
| 437 global pageSize file_pgcnt | |
| 438 | |
| 439 # Query the in-memory database for the sum of various statistics | |
| 440 # for the subset of tables/indices identified by the WHERE clause in | |
| 441 # $where. Note that even if the WHERE clause matches no rows, the | |
| 442 # following query returns exactly one row (because it is an aggregate). | |
| 443 # | |
| 444 # The results of the query are stored directly by SQLite into local | |
| 445 # variables (i.e. $nentry, $nleaf etc.). | |
| 446 # | |
| 447 mem eval " | |
| 448 SELECT | |
| 449 int(sum(nentry)) AS nentry, | |
| 450 int(sum(leaf_entries)) AS nleaf, | |
| 451 int(sum(payload)) AS payload, | |
| 452 int(sum(ovfl_payload)) AS ovfl_payload, | |
| 453 max(mx_payload) AS mx_payload, | |
| 454 int(sum(ovfl_cnt)) as ovfl_cnt, | |
| 455 int(sum(leaf_pages)) AS leaf_pages, | |
| 456 int(sum(int_pages)) AS int_pages, | |
| 457 int(sum(ovfl_pages)) AS ovfl_pages, | |
| 458 int(sum(leaf_unused)) AS leaf_unused, | |
| 459 int(sum(int_unused)) AS int_unused, | |
| 460 int(sum(ovfl_unused)) AS ovfl_unused, | |
| 461 int(sum(gap_cnt)) AS gap_cnt | |
| 462 FROM space_used WHERE $where" {} {} | |
| 463 | |
| 464 # Output the sub-report title, nicely decorated with * characters. | |
| 465 # | |
| 466 puts "" | |
| 467 set len [string length $title] | |
| 468 set stars [string repeat * [expr 65-$len]] | |
| 469 puts "*** $title $stars" | |
| 470 puts "" | |
| 471 | |
| 472 # Calculate statistics and store the results in TCL variables, as follows: | |
| 473 # | |
| 474 # total_pages: Database pages consumed. | |
| 475 # total_pages_percent: Pages consumed as a percentage of the file. | |
| 476 # storage: Bytes consumed. | |
| 477 # payload_percent: Payload bytes used as a percentage of $storage. | |
| 478 # total_unused: Unused bytes on pages. | |
| 479 # avg_payload: Average payload per btree entry. | |
| 480 # avg_fanout: Average fanout for internal pages. | |
| 481 # avg_unused: Average unused bytes per btree entry. | |
| 482 # ovfl_cnt_percent: Percentage of btree entries that use overflow pages. | |
| 483 # | |
| 484 set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}] | |
| 485 set total_pages_percent [percent $total_pages $file_pgcnt] | |
| 486 set storage [expr {$total_pages*$pageSize}] | |
| 487 set payload_percent [percent $payload $storage {of storage consumed}] | |
| 488 set total_unused [expr {$ovfl_unused+$int_unused+$leaf_unused}] | |
| 489 set avg_payload [divide $payload $nleaf] | |
| 490 set avg_unused [divide $total_unused $nleaf] | |
| 491 if {$int_pages>0} { | |
| 492 # TODO: Is this formula correct? | |
| 493 set nTab [mem eval " | |
| 494 SELECT count(*) FROM ( | |
| 495 SELECT DISTINCT tblname FROM space_used WHERE $where AND is_index=0 | |
| 496 ) | |
| 497 "] | |
| 498 set avg_fanout [mem eval " | |
| 499 SELECT (sum(leaf_pages+int_pages)-$nTab)/sum(int_pages) FROM space_used | |
| 500 WHERE $where AND is_index = 0 | |
| 501 "] | |
| 502 set avg_fanout [format %.2f $avg_fanout] | |
| 503 } | |
| 504 set ovfl_cnt_percent [percent $ovfl_cnt $nleaf {of all entries}] | |
| 505 | |
| 506 # Print out the sub-report statistics. | |
| 507 # | |
| 508 statline {Percentage of total database} $total_pages_percent | |
| 509 statline {Number of entries} $nleaf | |
| 510 statline {Bytes of storage consumed} $storage | |
| 511 statline {Bytes of payload} $payload $payload_percent | |
| 512 statline {Average payload per entry} $avg_payload | |
| 513 statline {Average unused bytes per entry} $avg_unused | |
| 514 if {[info exists avg_fanout]} { | |
| 515 statline {Average fanout} $avg_fanout | |
| 516 } | |
| 517 if {$total_pages>1} { | |
| 518 set fragmentation [percent $gap_cnt [expr {$total_pages-1}] {fragmentation}] | |
| 519 statline {Fragmentation} $fragmentation | |
| 520 } | |
| 521 statline {Maximum payload per entry} $mx_payload | |
| 522 statline {Entries that use overflow} $ovfl_cnt $ovfl_cnt_percent | |
| 523 if {$int_pages>0} { | |
| 524 statline {Index pages used} $int_pages | |
| 525 } | |
| 526 statline {Primary pages used} $leaf_pages | |
| 527 statline {Overflow pages used} $ovfl_pages | |
| 528 statline {Total pages used} $total_pages | |
| 529 if {$int_unused>0} { | |
| 530 set int_unused_percent \ | |
| 531 [percent $int_unused [expr {$int_pages*$pageSize}] {of index space}] | |
| 532 statline "Unused bytes on index pages" $int_unused $int_unused_percent | |
| 533 } | |
| 534 statline "Unused bytes on primary pages" $leaf_unused \ | |
| 535 [percent $leaf_unused [expr {$leaf_pages*$pageSize}] {of primary space}] | |
| 536 statline "Unused bytes on overflow pages" $ovfl_unused \ | |
| 537 [percent $ovfl_unused [expr {$ovfl_pages*$pageSize}] {of overflow space}] | |
| 538 statline "Unused bytes on all pages" $total_unused \ | |
| 539 [percent $total_unused $storage {of all space}] | |
| 540 return 1 | |
| 541 } | |
| 542 | |
| 543 # Calculate the overhead in pages caused by auto-vacuum. | |
| 544 # | |
| 545 # This procedure calculates and returns the number of pages used by the | |
| 546 # auto-vacuum 'pointer-map'. If the database does not support auto-vacuum, | |
| 547 # then 0 is returned. The two arguments are the size of the database file in | |
| 548 # pages and the page size used by the database (in bytes). | |
| 549 proc autovacuum_overhead {filePages pageSize} { | |
| 550 | |
| 551 # Read the value of meta 4. If non-zero, then the database supports | |
| 552 # auto-vacuum. It would be possible to use "PRAGMA auto_vacuum" instead, | |
| 553 # but that would not work if the SQLITE_OMIT_PRAGMA macro was defined | |
| 554 # when the library was built. | |
| 555 set meta4 [lindex [btree_get_meta $::DB] 4] | |
| 556 | |
| 557 # If the database is not an auto-vacuum database or the file consists | |
| 558 # of one page only then there is no overhead for auto-vacuum. Return zero. | |
| 559 if {0==$meta4 || $filePages==1} { | |
| 560 return 0 | |
| 561 } | |
| 562 | |
| 563 # The number of entries on each pointer map page. The layout of the | |
| 564 # database file is one pointer-map page, followed by $ptrsPerPage other | |
| 565 # pages, followed by a pointer-map page etc. The first pointer-map page | |
| 566 # is the second page of the file overall. | |
| 567 set ptrsPerPage [expr double($pageSize/5)] | |
| 568 | |
| 569 # Return the number of pointer map pages in the database. | |
| 570 return [expr int(ceil( ($filePages-1.0)/($ptrsPerPage+1.0) ))] | |
| 571 } | |
| 572 | |
| 573 | |
| 574 # Calculate the summary statistics for the database and store the results | |
| 575 # in TCL variables. They are output below. Variables are as follows: | |
| 576 # | |
| 577 # pageSize: Size of each page in bytes. | |
| 578 # file_bytes: File size in bytes. | |
| 579 # file_pgcnt: Number of pages in the file. | |
| 580 # file_pgcnt2: Number of pages in the file (calculated). | |
| 581 # av_pgcnt: Pages consumed by the auto-vacuum pointer-map. | |
| 582 # av_percent: Percentage of the file consumed by auto-vacuum pointer-map. | |
| 583 # inuse_pgcnt: Data pages in the file. | |
| 584 # inuse_percent: Percentage of pages used to store data. | |
| 585 # free_pgcnt: Free pages calculated as (<total pages> - <in-use pages>) | |
| 586 # free_pgcnt2: Free pages in the file according to the file header. | |
| 587 # free_percent: Percentage of file consumed by free pages (calculated). | |
| 588 # free_percent2: Percentage of file consumed by free pages (header). | |
| 589 # ntable: Number of tables in the db. | |
| 590 # nindex: Number of indices in the db. | |
| 591 # nautoindex: Number of indices created automatically. | |
| 592 # nmanindex: Number of indices created manually. | |
| 593 # user_payload: Number of bytes of payload in table btrees | |
| 594 # (not including sqlite_master) | |
| 595 # user_percent: $user_payload as a percentage of total file size. | |
| 596 | |
| 597 set file_bytes [file size $file_to_analyze] | |
| 598 set file_pgcnt [expr {$file_bytes/$pageSize}] | |
| 599 | |
| 600 set av_pgcnt [autovacuum_overhead $file_pgcnt $pageSize] | |
| 601 set av_percent [percent $av_pgcnt $file_pgcnt] | |
| 602 | |
| 603 set sql {SELECT sum(leaf_pages+int_pages+ovfl_pages) FROM space_used} | |
| 604 set inuse_pgcnt [expr int([mem eval $sql])] | |
| 605 set inuse_percent [percent $inuse_pgcnt $file_pgcnt] | |
| 606 | |
| 607 set free_pgcnt [expr $file_pgcnt-$inuse_pgcnt-$av_pgcnt] | |
| 608 set free_percent [percent $free_pgcnt $file_pgcnt] | |
| 609 set free_pgcnt2 [lindex [btree_get_meta $DB] 0] | |
| 610 set free_percent2 [percent $free_pgcnt2 $file_pgcnt] | |
| 611 | |
| 612 set file_pgcnt2 [expr {$inuse_pgcnt+$free_pgcnt2+$av_pgcnt}] | |
| 613 | |
| 614 set ntable [db eval {SELECT count(*)+1 FROM sqlite_master WHERE type='table'}] | |
| 615 set nindex [db eval {SELECT count(*) FROM sqlite_master WHERE type='index'}] | |
| 616 set sql {SELECT count(*) FROM sqlite_master WHERE name LIKE 'sqlite_autoindex%'} | |
| 617 set nautoindex [db eval $sql] | |
| 618 set nmanindex [expr {$nindex-$nautoindex}] | |
| 619 | |
| 620 # set total_payload [mem eval "SELECT sum(payload) FROM space_used"] | |
| 621 set user_payload [mem one {SELECT int(sum(payload)) FROM space_used | |
| 622 WHERE NOT is_index AND name NOT LIKE 'sqlite_master'}] | |
| 623 set user_percent [percent $user_payload $file_bytes] | |
| 624 | |
| 625 # Output the summary statistics calculated above. | |
| 626 # | |
| 627 puts "/** Disk-Space Utilization Report For $file_to_analyze" | |
| 628 catch { | |
| 629 puts "*** As of [clock format [clock seconds] -format {%Y-%b-%d %H:%M:%S}]" | |
| 630 } | |
| 631 puts "" | |
| 632 statline {Page size in bytes} $pageSize | |
| 633 statline {Pages in the whole file (measured)} $file_pgcnt | |
| 634 statline {Pages in the whole file (calculated)} $file_pgcnt2 | |
| 635 statline {Pages that store data} $inuse_pgcnt $inuse_percent | |
| 636 statline {Pages on the freelist (per header)} $free_pgcnt2 $free_percent2 | |
| 637 statline {Pages on the freelist (calculated)} $free_pgcnt $free_percent | |
| 638 statline {Pages of auto-vacuum overhead} $av_pgcnt $av_percent | |
| 639 statline {Number of tables in the database} $ntable | |
| 640 statline {Number of indices} $nindex | |
| 641 statline {Number of named indices} $nmanindex | |
| 642 statline {Automatically generated indices} $nautoindex | |
| 643 statline {Size of the file in bytes} $file_bytes | |
| 644 statline {Bytes of user payload stored} $user_payload $user_percent | |
| 645 | |
| 646 # Output table rankings | |
| 647 # | |
| 648 puts "" | |
| 649 puts "*** Page counts for all tables with their indices ********************" | |
| 650 puts "" | |
| 651 mem eval {SELECT tblname, count(*) AS cnt, | |
| 652 int(sum(int_pages+leaf_pages+ovfl_pages)) AS size | |
| 653 FROM space_used GROUP BY tblname ORDER BY size+0 DESC, tblname} {} { | |
| 654 statline [string toupper $tblname] $size [percent $size $file_pgcnt] | |
| 655 } | |
| 656 | |
| 657 # Output subreports | |
| 658 # | |
| 659 if {$nindex>0} { | |
| 660 subreport {All tables and indices} 1 | |
| 661 } | |
| 662 subreport {All tables} {NOT is_index} | |
| 663 if {$nindex>0} { | |
| 664 subreport {All indices} {is_index} | |
| 665 } | |
| 666 foreach tbl [mem eval {SELECT name FROM space_used WHERE NOT is_index | |
| 667 ORDER BY name}] { | |
| 668 regsub ' $tbl '' qn | |
| 669 set name [string toupper $tbl] | |
| 670 set n [mem eval "SELECT count(*) FROM space_used WHERE tblname='$qn'"] | |
| 671 if {$n>1} { | |
| 672 subreport "Table $name and all its indices" "tblname='$qn'" | |
| 673 subreport "Table $name w/o any indices" "name='$qn'" | |
| 674 subreport "Indices of table $name" "tblname='$qn' AND is_index" | |
| 675 } else { | |
| 676 subreport "Table $name" "name='$qn'" | |
| 677 } | |
| 678 } | |
| 679 | |
| 680 # Output instructions on what the numbers above mean. | |
| 681 # | |
| 682 puts { | |
| 683 *** Definitions ****************************************************** | |
| 684 | |
| 685 Page size in bytes | |
| 686 | |
| 687 The number of bytes in a single page of the database file. | |
| 688 Usually 1024. | |
| 689 | |
| 690 Number of pages in the whole file | |
| 691 } | |
| 692 puts \ | |
| 693 " The number of $pageSize-byte pages that go into forming the complete | |
| 694 database" | |
| 695 puts \ | |
| 696 { | |
| 697 Pages that store data | |
| 698 | |
| 699 The number of pages that store data, either as primary B*Tree pages or | |
| 700 as overflow pages. The number at the right is the data pages divided by | |
| 701 the total number of pages in the file. | |
| 702 | |
| 703 Pages on the freelist | |
| 704 | |
| 705 The number of pages that are not currently in use but are reserved for | |
| 706 future use. The percentage at the right is the number of freelist pages | |
| 707 divided by the total number of pages in the file. | |
| 708 | |
| 709 Pages of auto-vacuum overhead | |
| 710 | |
| 711 The number of pages that store data used by the database to facilitate | |
| 712 auto-vacuum. This is zero for databases that do not support auto-vacuum. | |
| 713 | |
| 714 Number of tables in the database | |
| 715 | |
| 716 The number of tables in the database, including the SQLITE_MASTER table | |
| 717 used to store schema information. | |
| 718 | |
| 719 Number of indices | |
| 720 | |
| 721 The total number of indices in the database. | |
| 722 | |
| 723 Number of named indices | |
| 724 | |
| 725 The number of indices created using an explicit CREATE INDEX statement. | |
| 726 | |
| 727 Automatically generated indices | |
| 728 | |
| 729 The number of indices used to implement PRIMARY KEY or UNIQUE constraints | |
| 730 on tables. | |
| 731 | |
| 732 Size of the file in bytes | |
| 733 | |
| 734 The total amount of disk space used by the entire database files. | |
| 735 | |
| 736 Bytes of user payload stored | |
| 737 | |
| 738 The total number of bytes of user payload stored in the database. The | |
| 739 schema information in the SQLITE_MASTER table is not counted when | |
| 740 computing this number. The percentage at the right shows the payload | |
| 741 divided by the total file size. | |
| 742 | |
| 743 Percentage of total database | |
| 744 | |
| 745 The amount of the complete database file that is devoted to storing | |
| 746 information described by this category. | |
| 747 | |
| 748 Number of entries | |
| 749 | |
| 750 The total number of B-Tree key/value pairs stored under this category. | |
| 751 | |
| 752 Bytes of storage consumed | |
| 753 | |
| 754 The total amount of disk space required to store all B-Tree entries | |
| 755 under this category. The is the total number of pages used times | |
| 756 the pages size. | |
| 757 | |
| 758 Bytes of payload | |
| 759 | |
| 760 The amount of payload stored under this category. Payload is the data | |
| 761 part of table entries and the key part of index entries. The percentage | |
| 762 at the right is the bytes of payload divided by the bytes of storage | |
| 763 consumed. | |
| 764 | |
| 765 Average payload per entry | |
| 766 | |
| 767 The average amount of payload on each entry. This is just the bytes of | |
| 768 payload divided by the number of entries. | |
| 769 | |
| 770 Average unused bytes per entry | |
| 771 | |
| 772 The average amount of free space remaining on all pages under this | |
| 773 category on a per-entry basis. This is the number of unused bytes on | |
| 774 all pages divided by the number of entries. | |
| 775 | |
| 776 Fragmentation | |
| 777 | |
| 778 The percentage of pages in the table or index that are not | |
| 779 consecutive in the disk file. Many filesystems are optimized | |
| 780 for sequential file access so smaller fragmentation numbers | |
| 781 sometimes result in faster queries, especially for larger | |
| 782 database files that do not fit in the disk cache. | |
| 783 | |
| 784 Maximum payload per entry | |
| 785 | |
| 786 The largest payload size of any entry. | |
| 787 | |
| 788 Entries that use overflow | |
| 789 | |
| 790 The number of entries that user one or more overflow pages. | |
| 791 | |
| 792 Total pages used | |
| 793 | |
| 794 This is the number of pages used to hold all information in the current | |
| 795 category. This is the sum of index, primary, and overflow pages. | |
| 796 | |
| 797 Index pages used | |
| 798 | |
| 799 This is the number of pages in a table B-tree that hold only key (rowid) | |
| 800 information and no data. | |
| 801 | |
| 802 Primary pages used | |
| 803 | |
| 804 This is the number of B-tree pages that hold both key and data. | |
| 805 | |
| 806 Overflow pages used | |
| 807 | |
| 808 The total number of overflow pages used for this category. | |
| 809 | |
| 810 Unused bytes on index pages | |
| 811 | |
| 812 The total number of bytes of unused space on all index pages. The | |
| 813 percentage at the right is the number of unused bytes divided by the | |
| 814 total number of bytes on index pages. | |
| 815 | |
| 816 Unused bytes on primary pages | |
| 817 | |
| 818 The total number of bytes of unused space on all primary pages. The | |
| 819 percentage at the right is the number of unused bytes divided by the | |
| 820 total number of bytes on primary pages. | |
| 821 | |
| 822 Unused bytes on overflow pages | |
| 823 | |
| 824 The total number of bytes of unused space on all overflow pages. The | |
| 825 percentage at the right is the number of unused bytes divided by the | |
| 826 total number of bytes on overflow pages. | |
| 827 | |
| 828 Unused bytes on all pages | |
| 829 | |
| 830 The total number of bytes of unused space on all primary and overflow | |
| 831 pages. The percentage at the right is the number of unused bytes | |
| 832 divided by the total number of bytes. | |
| 833 } | |
| 834 | |
| 835 # Output a dump of the in-memory database. This can be used for more | |
| 836 # complex offline analysis. | |
| 837 # | |
| 838 puts "**********************************************************************" | |
| 839 puts "The entire text of this report can be sourced into any SQL database" | |
| 840 puts "engine for further analysis. All of the text above is an SQL comment." | |
| 841 puts "The data used to generate this report follows:" | |
| 842 puts "*/" | |
| 843 puts "BEGIN;" | |
| 844 puts $tabledef | |
| 845 unset -nocomplain x | |
| 846 mem eval {SELECT * FROM space_used} x { | |
| 847 puts -nonewline "INSERT INTO space_used VALUES" | |
| 848 set sep ( | |
| 849 foreach col $x(*) { | |
| 850 set v $x($col) | |
| 851 if {$v=="" || ![string is double $v]} {set v [quote $v]} | |
| 852 puts -nonewline $sep$v | |
| 853 set sep , | |
| 854 } | |
| 855 puts ");" | |
| 856 } | |
| 857 puts "COMMIT;" | |
| 858 | |
| 859 } err]} { | |
| 860 puts "ERROR: $err" | |
| 861 puts $errorInfo | |
| 862 exit 1 | |
| 863 } | |
| OLD | NEW |