OLD | NEW |
1 # Run this TCL script using "testfixture" in order get a report that shows | 1 # Run this TCL script using "testfixture" in order get a report that shows |
2 # how much disk space is used by a particular data to actually store data | 2 # how much disk space is used by a particular data to actually store data |
3 # versus how much space is unused. | 3 # versus how much space is unused. |
4 # | 4 # |
5 | 5 |
6 if {[catch { | 6 if {[catch { |
| 7 |
| 8 # Argument $tname is the name of a table within the database opened by |
| 9 # database handle [db]. Return true if it is a WITHOUT ROWID table, or |
| 10 # false otherwise. |
| 11 # |
| 12 proc is_without_rowid {tname} { |
| 13 set t [string map {' ''} $tname] |
| 14 db eval "PRAGMA index_list = '$t'" o { |
| 15 if {$o(origin) == "pk"} { |
| 16 set n $o(name) |
| 17 if {0==[db one { SELECT count(*) FROM sqlite_master WHERE name=$n }]} { |
| 18 return 1 |
| 19 } |
| 20 } |
| 21 } |
| 22 return 0 |
| 23 } |
| 24 |
7 # Get the name of the database to analyze | 25 # Get the name of the database to analyze |
8 # | 26 # |
9 proc usage {} { | 27 proc usage {} { |
10 set argv0 [file rootname [file tail [info nameofexecutable]]] | 28 set argv0 [file rootname [file tail [info nameofexecutable]]] |
11 puts stderr "Usage: $argv0 database-name" | 29 puts stderr "Usage: $argv0 ?--pageinfo? ?--stats? database-filename" |
| 30 puts stderr { |
| 31 Analyze the SQLite3 database file specified by the "database-filename" |
| 32 argument and output a report detailing size and storage efficiency |
| 33 information for the database and its constituent tables and indexes. |
| 34 |
| 35 Options: |
| 36 |
| 37 --stats Output SQL text that creates a new database containing |
| 38 statistics about the database that was analyzed |
| 39 |
| 40 --pageinfo Show how each page of the database-file is used |
| 41 } |
12 exit 1 | 42 exit 1 |
13 } | 43 } |
14 set file_to_analyze {} | 44 set file_to_analyze {} |
15 set flags(-pageinfo) 0 | 45 set flags(-pageinfo) 0 |
16 set flags(-stats) 0 | 46 set flags(-stats) 0 |
17 append argv {} | 47 append argv {} |
18 foreach arg $argv { | 48 foreach arg $argv { |
19 if {[regexp {^-+pageinfo$} $arg]} { | 49 if {[regexp {^-+pageinfo$} $arg]} { |
20 set flags(-pageinfo) 1 | 50 set flags(-pageinfo) 1 |
21 } elseif {[regexp {^-+stats$} $arg]} { | 51 } elseif {[regexp {^-+stats$} $arg]} { |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
63 incr true_file_size [file size $f] | 93 incr true_file_size [file size $f] |
64 } | 94 } |
65 } | 95 } |
66 | 96 |
67 # Open the database | 97 # Open the database |
68 # | 98 # |
69 if {[catch {sqlite3 db $file_to_analyze -uri 1} msg]} { | 99 if {[catch {sqlite3 db $file_to_analyze -uri 1} msg]} { |
70 puts stderr "error trying to open $file_to_analyze: $msg" | 100 puts stderr "error trying to open $file_to_analyze: $msg" |
71 exit 1 | 101 exit 1 |
72 } | 102 } |
73 register_dbstat_vtab db | |
74 | 103 |
75 db eval {SELECT count(*) FROM sqlite_master} | 104 db eval {SELECT count(*) FROM sqlite_master} |
76 set pageSize [expr {wide([db one {PRAGMA page_size}])}] | 105 set pageSize [expr {wide([db one {PRAGMA page_size}])}] |
77 | 106 |
78 if {$flags(-pageinfo)} { | 107 if {$flags(-pageinfo)} { |
79 db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat} | 108 db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat} |
80 db eval {SELECT name, path, pageno FROM temp.stat ORDER BY pageno} { | 109 db eval {SELECT name, path, pageno FROM temp.stat ORDER BY pageno} { |
81 puts "$pageno $name $path" | 110 puts "$pageno $name $path" |
82 } | 111 } |
83 exit 0 | 112 exit 0 |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
118 # to an in-memory database (for which the schema is shown below). It then | 147 # to an in-memory database (for which the schema is shown below). It then |
119 # queries the in-memory db to produce the space-analysis report. | 148 # queries the in-memory db to produce the space-analysis report. |
120 # | 149 # |
121 sqlite3 mem :memory: | 150 sqlite3 mem :memory: |
122 set tabledef {CREATE TABLE space_used( | 151 set tabledef {CREATE TABLE space_used( |
123 name clob, -- Name of a table or index in the database file | 152 name clob, -- Name of a table or index in the database file |
124 tblname clob, -- Name of associated table | 153 tblname clob, -- Name of associated table |
125 is_index boolean, -- TRUE if it is an index, false for a table | 154 is_index boolean, -- TRUE if it is an index, false for a table |
126 nentry int, -- Number of entries in the BTree | 155 nentry int, -- Number of entries in the BTree |
127 leaf_entries int, -- Number of leaf entries | 156 leaf_entries int, -- Number of leaf entries |
| 157 depth int, -- Depth of the b-tree |
128 payload int, -- Total amount of data stored in this table or index | 158 payload int, -- Total amount of data stored in this table or index |
129 ovfl_payload int, -- Total amount of data stored on overflow pages | 159 ovfl_payload int, -- Total amount of data stored on overflow pages |
130 ovfl_cnt int, -- Number of entries that use overflow | 160 ovfl_cnt int, -- Number of entries that use overflow |
131 mx_payload int, -- Maximum payload size | 161 mx_payload int, -- Maximum payload size |
132 int_pages int, -- Number of interior pages used | 162 int_pages int, -- Number of interior pages used |
133 leaf_pages int, -- Number of leaf pages used | 163 leaf_pages int, -- Number of leaf pages used |
134 ovfl_pages int, -- Number of overflow pages used | 164 ovfl_pages int, -- Number of overflow pages used |
135 int_unused int, -- Number of unused bytes on interior pages | 165 int_unused int, -- Number of unused bytes on interior pages |
136 leaf_unused int, -- Number of unused bytes on primary pages | 166 leaf_unused int, -- Number of unused bytes on primary pages |
137 ovfl_unused int, -- Number of unused bytes on overflow pages | 167 ovfl_unused int, -- Number of unused bytes on overflow pages |
138 gap_cnt int, -- Number of gaps in the page layout | 168 gap_cnt int, -- Number of gaps in the page layout |
139 compressed_size int -- Total bytes stored on disk | 169 compressed_size int -- Total bytes stored on disk |
140 );} | 170 );} |
141 mem eval $tabledef | 171 mem eval $tabledef |
142 | 172 |
143 # Create a temporary "dbstat" virtual table. | 173 # Create a temporary "dbstat" virtual table. |
144 # | 174 # |
145 db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat} | 175 db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat} |
146 db eval {CREATE TEMP TABLE dbstat AS SELECT * FROM temp.stat | 176 db eval {CREATE TEMP TABLE dbstat AS SELECT * FROM temp.stat |
147 ORDER BY name, path} | 177 ORDER BY name, path} |
148 db eval {DROP TABLE temp.stat} | 178 db eval {DROP TABLE temp.stat} |
149 | 179 |
150 proc isleaf {pagetype is_index} { | |
151 return [expr {$pagetype == "leaf" || ($pagetype == "internal" && $is_index)}] | |
152 } | |
153 proc isoverflow {pagetype is_index} { | |
154 return [expr {$pagetype == "overflow"}] | |
155 } | |
156 proc isinternal {pagetype is_index} { | |
157 return [expr {$pagetype == "internal" && $is_index==0}] | |
158 } | |
159 | |
160 db func isleaf isleaf | |
161 db func isinternal isinternal | |
162 db func isoverflow isoverflow | |
163 | |
164 set isCompressed 0 | 180 set isCompressed 0 |
165 set compressOverhead 0 | 181 set compressOverhead 0 |
| 182 set depth 0 |
166 set sql { SELECT name, tbl_name FROM sqlite_master WHERE rootpage>0 } | 183 set sql { SELECT name, tbl_name FROM sqlite_master WHERE rootpage>0 } |
167 foreach {name tblname} [concat sqlite_master sqlite_master [db eval $sql]] { | 184 foreach {name tblname} [concat sqlite_master sqlite_master [db eval $sql]] { |
168 | 185 |
169 set is_index [expr {$name!=$tblname}] | 186 set is_index [expr {$name!=$tblname}] |
| 187 set idx_btree [expr {$is_index || [is_without_rowid $name]}] |
170 db eval { | 188 db eval { |
171 SELECT | 189 SELECT |
172 sum(ncell) AS nentry, | 190 sum(ncell) AS nentry, |
173 sum(isleaf(pagetype, $is_index) * ncell) AS leaf_entries, | 191 sum((pagetype=='leaf')*ncell) AS leaf_entries, |
174 sum(payload) AS payload, | 192 sum(payload) AS payload, |
175 sum(isoverflow(pagetype, $is_index) * payload) AS ovfl_payload, | 193 sum((pagetype=='overflow') * payload) AS ovfl_payload, |
176 sum(path LIKE '%+000000') AS ovfl_cnt, | 194 sum(path LIKE '%+000000') AS ovfl_cnt, |
177 max(mx_payload) AS mx_payload, | 195 max(mx_payload) AS mx_payload, |
178 sum(isinternal(pagetype, $is_index)) AS int_pages, | 196 sum(pagetype=='internal') AS int_pages, |
179 sum(isleaf(pagetype, $is_index)) AS leaf_pages, | 197 sum(pagetype=='leaf') AS leaf_pages, |
180 sum(isoverflow(pagetype, $is_index)) AS ovfl_pages, | 198 sum(pagetype=='overflow') AS ovfl_pages, |
181 sum(isinternal(pagetype, $is_index) * unused) AS int_unused, | 199 sum((pagetype=='internal') * unused) AS int_unused, |
182 sum(isleaf(pagetype, $is_index) * unused) AS leaf_unused, | 200 sum((pagetype=='leaf') * unused) AS leaf_unused, |
183 sum(isoverflow(pagetype, $is_index) * unused) AS ovfl_unused, | 201 sum((pagetype=='overflow') * unused) AS ovfl_unused, |
184 sum(pgsize) AS compressed_size | 202 sum(pgsize) AS compressed_size, |
| 203 max((length(CASE WHEN path LIKE '%+%' THEN '' ELSE path END)+3)/4) |
| 204 AS depth |
185 FROM temp.dbstat WHERE name = $name | 205 FROM temp.dbstat WHERE name = $name |
186 } break | 206 } break |
187 | 207 |
188 set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}] | 208 set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}] |
189 set storage [expr {$total_pages*$pageSize}] | 209 set storage [expr {$total_pages*$pageSize}] |
190 if {!$isCompressed && $storage>$compressed_size} { | 210 if {!$isCompressed && $storage>$compressed_size} { |
191 set isCompressed 1 | 211 set isCompressed 1 |
192 set compressOverhead 14 | 212 set compressOverhead 14 |
193 } | 213 } |
194 | 214 |
(...skipping 15 matching lines...) Expand all Loading... |
210 } | 230 } |
211 set prev $pageno | 231 set prev $pageno |
212 } | 232 } |
213 mem eval { | 233 mem eval { |
214 INSERT INTO space_used VALUES( | 234 INSERT INTO space_used VALUES( |
215 $name, | 235 $name, |
216 $tblname, | 236 $tblname, |
217 $is_index, | 237 $is_index, |
218 $nentry, | 238 $nentry, |
219 $leaf_entries, | 239 $leaf_entries, |
| 240 $depth, |
220 $payload, | 241 $payload, |
221 $ovfl_payload, | 242 $ovfl_payload, |
222 $ovfl_cnt, | 243 $ovfl_cnt, |
223 $mx_payload, | 244 $mx_payload, |
224 $int_pages, | 245 $int_pages, |
225 $leaf_pages, | 246 $leaf_pages, |
226 $ovfl_pages, | 247 $ovfl_pages, |
227 $int_unused, | 248 $int_unused, |
228 $leaf_unused, | 249 $leaf_unused, |
229 $ovfl_unused, | 250 $ovfl_unused, |
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
319 int(sum(ovfl_payload)) AS ovfl_payload, | 340 int(sum(ovfl_payload)) AS ovfl_payload, |
320 max(mx_payload) AS mx_payload, | 341 max(mx_payload) AS mx_payload, |
321 int(sum(ovfl_cnt)) as ovfl_cnt, | 342 int(sum(ovfl_cnt)) as ovfl_cnt, |
322 int(sum(leaf_pages)) AS leaf_pages, | 343 int(sum(leaf_pages)) AS leaf_pages, |
323 int(sum(int_pages)) AS int_pages, | 344 int(sum(int_pages)) AS int_pages, |
324 int(sum(ovfl_pages)) AS ovfl_pages, | 345 int(sum(ovfl_pages)) AS ovfl_pages, |
325 int(sum(leaf_unused)) AS leaf_unused, | 346 int(sum(leaf_unused)) AS leaf_unused, |
326 int(sum(int_unused)) AS int_unused, | 347 int(sum(int_unused)) AS int_unused, |
327 int(sum(ovfl_unused)) AS ovfl_unused, | 348 int(sum(ovfl_unused)) AS ovfl_unused, |
328 int(sum(gap_cnt)) AS gap_cnt, | 349 int(sum(gap_cnt)) AS gap_cnt, |
329 int(sum(compressed_size)) AS compressed_size | 350 int(sum(compressed_size)) AS compressed_size, |
| 351 int(max(depth)) AS depth, |
| 352 count(*) AS cnt |
330 FROM space_used WHERE $where" {} {} | 353 FROM space_used WHERE $where" {} {} |
331 | 354 |
332 # Output the sub-report title, nicely decorated with * characters. | 355 # Output the sub-report title, nicely decorated with * characters. |
333 # | 356 # |
334 puts "" | 357 puts "" |
335 titleline $title | 358 titleline $title |
336 puts "" | 359 puts "" |
337 | 360 |
338 # Calculate statistics and store the results in TCL variables, as follows: | 361 # Calculate statistics and store the results in TCL variables, as follows: |
339 # | 362 # |
(...skipping 16 matching lines...) Expand all Loading... |
356 set avg_unused [divide $total_unused $nleaf] | 379 set avg_unused [divide $total_unused $nleaf] |
357 if {$int_pages>0} { | 380 if {$int_pages>0} { |
358 # TODO: Is this formula correct? | 381 # TODO: Is this formula correct? |
359 set nTab [mem eval " | 382 set nTab [mem eval " |
360 SELECT count(*) FROM ( | 383 SELECT count(*) FROM ( |
361 SELECT DISTINCT tblname FROM space_used WHERE $where AND is_index=0 | 384 SELECT DISTINCT tblname FROM space_used WHERE $where AND is_index=0 |
362 ) | 385 ) |
363 "] | 386 "] |
364 set avg_fanout [mem eval " | 387 set avg_fanout [mem eval " |
365 SELECT (sum(leaf_pages+int_pages)-$nTab)/sum(int_pages) FROM space_used | 388 SELECT (sum(leaf_pages+int_pages)-$nTab)/sum(int_pages) FROM space_used |
366 WHERE $where AND is_index = 0 | 389 WHERE $where |
367 "] | 390 "] |
368 set avg_fanout [format %.2f $avg_fanout] | 391 set avg_fanout [format %.2f $avg_fanout] |
369 } | 392 } |
370 set ovfl_cnt_percent [percent $ovfl_cnt $nleaf {of all entries}] | 393 set ovfl_cnt_percent [percent $ovfl_cnt $nleaf {of all entries}] |
371 | 394 |
372 # Print out the sub-report statistics. | 395 # Print out the sub-report statistics. |
373 # | 396 # |
374 statline {Percentage of total database} $total_pages_percent | 397 statline {Percentage of total database} $total_pages_percent |
375 statline {Number of entries} $nleaf | 398 statline {Number of entries} $nleaf |
376 statline {Bytes of storage consumed} $storage | 399 statline {Bytes of storage consumed} $storage |
377 if {$compressed_size!=$storage} { | 400 if {$compressed_size!=$storage} { |
378 set compressed_size [expr {$compressed_size+$compressOverhead*$total_pages}] | 401 set compressed_size [expr {$compressed_size+$compressOverhead*$total_pages}] |
379 set pct [expr {$compressed_size*100.0/$storage}] | 402 set pct [expr {$compressed_size*100.0/$storage}] |
380 set pct [format {%5.1f%%} $pct] | 403 set pct [format {%5.1f%%} $pct] |
381 statline {Bytes used after compression} $compressed_size $pct | 404 statline {Bytes used after compression} $compressed_size $pct |
382 } | 405 } |
383 statline {Bytes of payload} $payload $payload_percent | 406 statline {Bytes of payload} $payload $payload_percent |
| 407 if {$cnt==1} {statline {B-tree depth} $depth} |
384 statline {Average payload per entry} $avg_payload | 408 statline {Average payload per entry} $avg_payload |
385 statline {Average unused bytes per entry} $avg_unused | 409 statline {Average unused bytes per entry} $avg_unused |
386 if {[info exists avg_fanout]} { | 410 if {[info exists avg_fanout]} { |
387 statline {Average fanout} $avg_fanout | 411 statline {Average fanout} $avg_fanout |
388 } | 412 } |
389 if {$showFrag && $total_pages>1} { | 413 if {$showFrag && $total_pages>1} { |
390 set fragmentation [percent $gap_cnt [expr {$total_pages-1}]] | 414 set fragmentation [percent $gap_cnt [expr {$total_pages-1}]] |
391 statline {Non-sequential pages} $gap_cnt $fragmentation | 415 statline {Non-sequential pages} $gap_cnt $fragmentation |
392 } | 416 } |
393 statline {Maximum payload per entry} $mx_payload | 417 statline {Maximum payload per entry} $mx_payload |
(...skipping 378 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
772 } | 796 } |
773 puts ");" | 797 puts ");" |
774 } | 798 } |
775 puts "COMMIT;" | 799 puts "COMMIT;" |
776 | 800 |
777 } err]} { | 801 } err]} { |
778 puts "ERROR: $err" | 802 puts "ERROR: $err" |
779 puts $errorInfo | 803 puts $errorInfo |
780 exit 1 | 804 exit 1 |
781 } | 805 } |
OLD | NEW |