OLD | NEW |
| (Empty) |
1 /* | |
2 ** This program is a debugging and analysis utility that displays | |
3 ** information about an FTS3 or FTS4 index. | |
4 ** | |
5 ** Link this program against the SQLite3 amalgamation with the | |
6 ** SQLITE_ENABLE_FTS4 compile-time option. Then run it as: | |
7 ** | |
8 ** fts3view DATABASE | |
9 ** | |
10 ** to get a list of all FTS3/4 tables in DATABASE, or do | |
11 ** | |
12 ** fts3view DATABASE TABLE COMMAND .... | |
13 ** | |
14 ** to see various aspects of the TABLE table. Type fts3view with no | |
15 ** arguments for a list of available COMMANDs. | |
16 */ | |
17 #include <stdio.h> | |
18 #include <stdarg.h> | |
19 #include <stdlib.h> | |
20 #include <string.h> | |
21 #include <ctype.h> | |
22 #include "sqlite3.h" | |
23 | |
24 /* | |
25 ** Extra command-line arguments: | |
26 */ | |
27 int nExtra; | |
28 char **azExtra; | |
29 | |
30 /* | |
31 ** Look for a command-line argument. | |
32 */ | |
33 const char *findOption(const char *zName, int hasArg, const char *zDefault){ | |
34 int i; | |
35 const char *zResult = zDefault; | |
36 for(i=0; i<nExtra; i++){ | |
37 const char *z = azExtra[i]; | |
38 while( z[0]=='-' ) z++; | |
39 if( strcmp(z, zName)==0 ){ | |
40 int j = 1; | |
41 if( hasArg==0 || i==nExtra-1 ) j = 0; | |
42 zResult = azExtra[i+j]; | |
43 while( i+j<nExtra ){ | |
44 azExtra[i] = azExtra[i+j+1]; | |
45 i++; | |
46 } | |
47 break; | |
48 } | |
49 } | |
50 return zResult; | |
51 } | |
52 | |
53 | |
54 /* | |
55 ** Prepare an SQL query | |
56 */ | |
57 static sqlite3_stmt *prepare(sqlite3 *db, const char *zFormat, ...){ | |
58 va_list ap; | |
59 char *zSql; | |
60 sqlite3_stmt *pStmt; | |
61 int rc; | |
62 | |
63 va_start(ap, zFormat); | |
64 zSql = sqlite3_vmprintf(zFormat, ap); | |
65 va_end(ap); | |
66 rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); | |
67 if( rc ){ | |
68 fprintf(stderr, "Error: %s\nSQL: %s\n", sqlite3_errmsg(db), zSql); | |
69 exit(1); | |
70 } | |
71 sqlite3_free(zSql); | |
72 return pStmt; | |
73 } | |
74 | |
75 /* | |
76 ** Run an SQL statement | |
77 */ | |
78 static int runSql(sqlite3 *db, const char *zFormat, ...){ | |
79 va_list ap; | |
80 char *zSql; | |
81 int rc; | |
82 | |
83 va_start(ap, zFormat); | |
84 zSql = sqlite3_vmprintf(zFormat, ap); | |
85 rc = sqlite3_exec(db, zSql, 0, 0, 0); | |
86 va_end(ap); | |
87 return rc; | |
88 } | |
89 | |
90 /* | |
91 ** Show the table schema | |
92 */ | |
93 static void showSchema(sqlite3 *db, const char *zTab){ | |
94 sqlite3_stmt *pStmt; | |
95 pStmt = prepare(db, | |
96 "SELECT sql FROM sqlite_master" | |
97 " WHERE name LIKE '%q%%'" | |
98 " ORDER BY 1", | |
99 zTab); | |
100 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
101 printf("%s;\n", sqlite3_column_text(pStmt, 0)); | |
102 } | |
103 sqlite3_finalize(pStmt); | |
104 pStmt = prepare(db, "PRAGMA page_size"); | |
105 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
106 printf("PRAGMA page_size=%s;\n", sqlite3_column_text(pStmt, 0)); | |
107 } | |
108 sqlite3_finalize(pStmt); | |
109 pStmt = prepare(db, "PRAGMA journal_mode"); | |
110 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
111 printf("PRAGMA journal_mode=%s;\n", sqlite3_column_text(pStmt, 0)); | |
112 } | |
113 sqlite3_finalize(pStmt); | |
114 pStmt = prepare(db, "PRAGMA auto_vacuum"); | |
115 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
116 const char *zType = "???"; | |
117 switch( sqlite3_column_int(pStmt, 0) ){ | |
118 case 0: zType = "OFF"; break; | |
119 case 1: zType = "FULL"; break; | |
120 case 2: zType = "INCREMENTAL"; break; | |
121 } | |
122 printf("PRAGMA auto_vacuum=%s;\n", zType); | |
123 } | |
124 sqlite3_finalize(pStmt); | |
125 pStmt = prepare(db, "PRAGMA encoding"); | |
126 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
127 printf("PRAGMA encoding=%s;\n", sqlite3_column_text(pStmt, 0)); | |
128 } | |
129 sqlite3_finalize(pStmt); | |
130 } | |
131 | |
132 /* | |
133 ** Read a 64-bit variable-length integer from memory starting at p[0]. | |
134 ** Return the number of bytes read, or 0 on error. | |
135 ** The value is stored in *v. | |
136 */ | |
137 int getVarint(const unsigned char *p, sqlite_int64 *v){ | |
138 const unsigned char *q = p; | |
139 sqlite_uint64 x = 0, y = 1; | |
140 while( (*q&0x80)==0x80 && q-(unsigned char *)p<9 ){ | |
141 x += y * (*q++ & 0x7f); | |
142 y <<= 7; | |
143 } | |
144 x += y * (*q++); | |
145 *v = (sqlite_int64) x; | |
146 return (int) (q - (unsigned char *)p); | |
147 } | |
148 | |
149 | |
150 /* Show the content of the %_stat table | |
151 */ | |
152 static void showStat(sqlite3 *db, const char *zTab){ | |
153 sqlite3_stmt *pStmt; | |
154 pStmt = prepare(db, "SELECT id, value FROM '%q_stat'", zTab); | |
155 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
156 printf("stat[%d] =", sqlite3_column_int(pStmt, 0)); | |
157 switch( sqlite3_column_type(pStmt, 1) ){ | |
158 case SQLITE_INTEGER: { | |
159 printf(" %d\n", sqlite3_column_int(pStmt, 1)); | |
160 break; | |
161 } | |
162 case SQLITE_BLOB: { | |
163 unsigned char *x = (unsigned char*)sqlite3_column_blob(pStmt, 1); | |
164 int len = sqlite3_column_bytes(pStmt, 1); | |
165 int i = 0; | |
166 sqlite3_int64 v; | |
167 while( i<len ){ | |
168 i += getVarint(x, &v); | |
169 printf(" %lld", v); | |
170 } | |
171 printf("\n"); | |
172 break; | |
173 } | |
174 } | |
175 } | |
176 sqlite3_finalize(pStmt); | |
177 } | |
178 | |
179 /* | |
180 ** Report on the vocabulary. This creates an fts4aux table with a random | |
181 ** name, but deletes it in the end. | |
182 */ | |
183 static void showVocabulary(sqlite3 *db, const char *zTab){ | |
184 char *zAux; | |
185 sqlite3_uint64 r; | |
186 sqlite3_stmt *pStmt; | |
187 int nDoc = 0; | |
188 int nToken = 0; | |
189 int nOccurrence = 0; | |
190 int nTop; | |
191 int n, i; | |
192 | |
193 sqlite3_randomness(sizeof(r), &r); | |
194 zAux = sqlite3_mprintf("viewer_%llx", zTab, r); | |
195 runSql(db, "BEGIN"); | |
196 pStmt = prepare(db, "SELECT count(*) FROM %Q", zTab); | |
197 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
198 nDoc = sqlite3_column_int(pStmt, 0); | |
199 } | |
200 sqlite3_finalize(pStmt); | |
201 printf("Number of documents...................... %9d\n", nDoc); | |
202 | |
203 runSql(db, "CREATE VIRTUAL TABLE %s USING fts4aux(%Q)", zAux, zTab); | |
204 pStmt = prepare(db, | |
205 "SELECT count(*), sum(occurrences) FROM %s WHERE col='*'", | |
206 zAux); | |
207 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
208 nToken = sqlite3_column_int(pStmt, 0); | |
209 nOccurrence = sqlite3_column_int(pStmt, 1); | |
210 } | |
211 sqlite3_finalize(pStmt); | |
212 printf("Total tokens in all documents............ %9d\n", nOccurrence); | |
213 printf("Total number of distinct tokens.......... %9d\n", nToken); | |
214 if( nToken==0 ) goto end_vocab; | |
215 | |
216 n = 0; | |
217 pStmt = prepare(db, "SELECT count(*) FROM %s" | |
218 " WHERE col='*' AND occurrences==1", zAux); | |
219 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
220 n = sqlite3_column_int(pStmt, 0); | |
221 } | |
222 sqlite3_finalize(pStmt); | |
223 printf("Tokens used exactly once................. %9d %5.2f%%\n", | |
224 n, n*100.0/nToken); | |
225 | |
226 n = 0; | |
227 pStmt = prepare(db, "SELECT count(*) FROM %s" | |
228 " WHERE col='*' AND documents==1", zAux); | |
229 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
230 n = sqlite3_column_int(pStmt, 0); | |
231 } | |
232 sqlite3_finalize(pStmt); | |
233 printf("Tokens used in only one document......... %9d %5.2f%%\n", | |
234 n, n*100.0/nToken); | |
235 | |
236 if( nDoc>=2000 ){ | |
237 n = 0; | |
238 pStmt = prepare(db, "SELECT count(*) FROM %s" | |
239 " WHERE col='*' AND occurrences<=%d", zAux, nDoc/1000); | |
240 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
241 n = sqlite3_column_int(pStmt, 0); | |
242 } | |
243 sqlite3_finalize(pStmt); | |
244 printf("Tokens used in 0.1%% or less of docs...... %9d %5.2f%%\n", | |
245 n, n*100.0/nToken); | |
246 } | |
247 | |
248 if( nDoc>=200 ){ | |
249 n = 0; | |
250 pStmt = prepare(db, "SELECT count(*) FROM %s" | |
251 " WHERE col='*' AND occurrences<=%d", zAux, nDoc/100); | |
252 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
253 n = sqlite3_column_int(pStmt, 0); | |
254 } | |
255 sqlite3_finalize(pStmt); | |
256 printf("Tokens used in 1%% or less of docs........ %9d %5.2f%%\n", | |
257 n, n*100.0/nToken); | |
258 } | |
259 | |
260 nTop = atoi(findOption("top", 1, "25")); | |
261 printf("The %d most common tokens:\n", nTop); | |
262 pStmt = prepare(db, | |
263 "SELECT term, documents FROM %s" | |
264 " WHERE col='*'" | |
265 " ORDER BY documents DESC, term" | |
266 " LIMIT %d", zAux, nTop); | |
267 i = 0; | |
268 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
269 i++; | |
270 n = sqlite3_column_int(pStmt, 1); | |
271 printf(" %2d. %-30s %9d docs %5.2f%%\n", i, | |
272 sqlite3_column_text(pStmt, 0), n, n*100.0/nDoc); | |
273 } | |
274 sqlite3_finalize(pStmt); | |
275 | |
276 end_vocab: | |
277 runSql(db, "ROLLBACK"); | |
278 sqlite3_free(zAux); | |
279 } | |
280 | |
281 /* | |
282 ** Report on the number and sizes of segments | |
283 */ | |
284 static void showSegmentStats(sqlite3 *db, const char *zTab){ | |
285 sqlite3_stmt *pStmt; | |
286 int nSeg = 0; | |
287 sqlite3_int64 szSeg = 0, mxSeg = 0; | |
288 int nIdx = 0; | |
289 sqlite3_int64 szIdx = 0, mxIdx = 0; | |
290 int nRoot = 0; | |
291 sqlite3_int64 szRoot = 0, mxRoot = 0; | |
292 sqlite3_int64 mx; | |
293 int nLeaf; | |
294 int n; | |
295 int pgsz; | |
296 int mxLevel; | |
297 int i; | |
298 | |
299 pStmt = prepare(db, | |
300 "SELECT count(*), sum(length(block)), max(length(block))" | |
301 " FROM '%q_segments'", | |
302 zTab); | |
303 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
304 nSeg = sqlite3_column_int(pStmt, 0); | |
305 szSeg = sqlite3_column_int64(pStmt, 1); | |
306 mxSeg = sqlite3_column_int64(pStmt, 2); | |
307 } | |
308 sqlite3_finalize(pStmt); | |
309 pStmt = prepare(db, | |
310 "SELECT count(*), sum(length(block)), max(length(block))" | |
311 " FROM '%q_segments' a JOIN '%q_segdir' b" | |
312 " WHERE a.blockid BETWEEN b.leaves_end_block+1 AND b.end_block", | |
313 zTab, zTab); | |
314 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
315 nIdx = sqlite3_column_int(pStmt, 0); | |
316 szIdx = sqlite3_column_int64(pStmt, 1); | |
317 mxIdx = sqlite3_column_int64(pStmt, 2); | |
318 } | |
319 sqlite3_finalize(pStmt); | |
320 pStmt = prepare(db, | |
321 "SELECT count(*), sum(length(root)), max(length(root))" | |
322 " FROM '%q_segdir'", | |
323 zTab); | |
324 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
325 nRoot = sqlite3_column_int(pStmt, 0); | |
326 szRoot = sqlite3_column_int64(pStmt, 1); | |
327 mxRoot = sqlite3_column_int64(pStmt, 2); | |
328 } | |
329 sqlite3_finalize(pStmt); | |
330 | |
331 printf("Number of segments....................... %9d\n", nSeg+nRoot); | |
332 printf("Number of leaf segments.................. %9d\n", nSeg-nIdx); | |
333 printf("Number of index segments................. %9d\n", nIdx); | |
334 printf("Number of root segments.................. %9d\n", nRoot); | |
335 printf("Total size of all segments............... %9lld\n", szSeg+szRoot); | |
336 printf("Total size of all leaf segments.......... %9lld\n", szSeg-szIdx); | |
337 printf("Total size of all index segments......... %9lld\n", szIdx); | |
338 printf("Total size of all root segments.......... %9lld\n", szRoot); | |
339 if( nSeg>0 ){ | |
340 printf("Average size of all segments............. %11.1f\n", | |
341 (double)(szSeg+szRoot)/(double)(nSeg+nRoot)); | |
342 printf("Average size of leaf segments............ %11.1f\n", | |
343 (double)(szSeg-szIdx)/(double)(nSeg-nIdx)); | |
344 } | |
345 if( nIdx>0 ){ | |
346 printf("Average size of index segments........... %11.1f\n", | |
347 (double)szIdx/(double)nIdx); | |
348 } | |
349 if( nRoot>0 ){ | |
350 printf("Average size of root segments............ %11.1f\n", | |
351 (double)szRoot/(double)nRoot); | |
352 } | |
353 mx = mxSeg; | |
354 if( mx<mxRoot ) mx = mxRoot; | |
355 printf("Maximum segment size..................... %9lld\n", mx); | |
356 printf("Maximum index segment size............... %9lld\n", mxIdx); | |
357 printf("Maximum root segment size................ %9lld\n", mxRoot); | |
358 | |
359 pStmt = prepare(db, "PRAGMA page_size"); | |
360 pgsz = 1024; | |
361 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
362 pgsz = sqlite3_column_int(pStmt, 0); | |
363 } | |
364 sqlite3_finalize(pStmt); | |
365 printf("Database page size....................... %9d\n", pgsz); | |
366 pStmt = prepare(db, | |
367 "SELECT count(*)" | |
368 " FROM '%q_segments' a JOIN '%q_segdir' b" | |
369 " WHERE a.blockid BETWEEN b.start_block AND b.leaves_end_block" | |
370 " AND length(a.block)>%d", | |
371 zTab, zTab, pgsz-45); | |
372 n = 0; | |
373 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
374 n = sqlite3_column_int(pStmt, 0); | |
375 } | |
376 sqlite3_finalize(pStmt); | |
377 nLeaf = nSeg - nIdx; | |
378 printf("Leaf segments larger than %5d bytes.... %9d %5.2f%%\n", | |
379 pgsz-45, n, nLeaf>0 ? n*100.0/nLeaf : 0.0); | |
380 | |
381 pStmt = prepare(db, "SELECT max(level%%1024) FROM '%q_segdir'", zTab); | |
382 mxLevel = 0; | |
383 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
384 mxLevel = sqlite3_column_int(pStmt, 0); | |
385 } | |
386 sqlite3_finalize(pStmt); | |
387 | |
388 for(i=0; i<=mxLevel; i++){ | |
389 pStmt = prepare(db, | |
390 "SELECT count(*), sum(len), avg(len), max(len), sum(len>%d)," | |
391 " count(distinct idx)" | |
392 " FROM (SELECT length(a.block) AS len, idx" | |
393 " FROM '%q_segments' a JOIN '%q_segdir' b" | |
394 " WHERE (a.blockid BETWEEN b.start_block" | |
395 " AND b.leaves_end_block)" | |
396 " AND (b.level%%1024)==%d)", | |
397 pgsz-45, zTab, zTab, i); | |
398 if( sqlite3_step(pStmt)==SQLITE_ROW | |
399 && (nLeaf = sqlite3_column_int(pStmt, 0))>0 | |
400 ){ | |
401 int nIdx = sqlite3_column_int(pStmt, 5); | |
402 sqlite3_int64 sz; | |
403 printf("For level %d:\n", i); | |
404 printf(" Number of indexes...................... %9d\n", nIdx); | |
405 printf(" Number of leaf segments................ %9d\n", nLeaf); | |
406 if( nIdx>1 ){ | |
407 printf(" Average leaf segments per index........ %11.1f\n", | |
408 (double)nLeaf/(double)nIdx); | |
409 } | |
410 printf(" Total size of all leaf segments........ %9lld\n", | |
411 (sz = sqlite3_column_int64(pStmt, 1))); | |
412 printf(" Average size of leaf segments.......... %11.1f\n", | |
413 sqlite3_column_double(pStmt, 2)); | |
414 if( nIdx>1 ){ | |
415 printf(" Average leaf segment size per index.... %11.1f\n", | |
416 (double)sz/(double)nIdx); | |
417 } | |
418 printf(" Maximum leaf segment size.............. %9lld\n", | |
419 sqlite3_column_int64(pStmt, 3)); | |
420 n = sqlite3_column_int(pStmt, 4); | |
421 printf(" Leaf segments larger than %5d bytes.. %9d %5.2f%%\n", | |
422 pgsz-45, n, n*100.0/nLeaf); | |
423 } | |
424 sqlite3_finalize(pStmt); | |
425 } | |
426 } | |
427 | |
428 /* | |
429 ** Print a single "tree" line of the segdir map output. | |
430 */ | |
431 static void printTreeLine(sqlite3_int64 iLower, sqlite3_int64 iUpper){ | |
432 printf(" tree %9lld", iLower); | |
433 if( iUpper>iLower ){ | |
434 printf(" thru %9lld (%lld blocks)", iUpper, iUpper-iLower+1); | |
435 } | |
436 printf("\n"); | |
437 } | |
438 | |
439 /* | |
440 ** Check to see if the block of a %_segments entry is NULL. | |
441 */ | |
442 static int isNullSegment(sqlite3 *db, const char *zTab, sqlite3_int64 iBlockId){ | |
443 sqlite3_stmt *pStmt; | |
444 int rc = 1; | |
445 | |
446 pStmt = prepare(db, "SELECT block IS NULL FROM '%q_segments'" | |
447 " WHERE blockid=%lld", zTab, iBlockId); | |
448 if( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
449 rc = sqlite3_column_int(pStmt, 0); | |
450 } | |
451 sqlite3_finalize(pStmt); | |
452 return rc; | |
453 } | |
454 | |
455 /* | |
456 ** Show a map of segments derived from the %_segdir table. | |
457 */ | |
458 static void showSegdirMap(sqlite3 *db, const char *zTab){ | |
459 int mxIndex, iIndex; | |
460 sqlite3_stmt *pStmt = 0; | |
461 sqlite3_stmt *pStmt2 = 0; | |
462 int prevLevel; | |
463 | |
464 pStmt = prepare(db, "SELECT max(level/1024) FROM '%q_segdir'", zTab); | |
465 if( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
466 mxIndex = sqlite3_column_int(pStmt, 0); | |
467 }else{ | |
468 mxIndex = 0; | |
469 } | |
470 sqlite3_finalize(pStmt); | |
471 | |
472 printf("Number of inverted indices............... %3d\n", mxIndex+1); | |
473 pStmt = prepare(db, | |
474 "SELECT level, idx, start_block, leaves_end_block, end_block, rowid" | |
475 " FROM '%q_segdir'" | |
476 " WHERE level/1024==?" | |
477 " ORDER BY level DESC, idx", | |
478 zTab); | |
479 pStmt2 = prepare(db, | |
480 "SELECT blockid FROM '%q_segments'" | |
481 " WHERE blockid BETWEEN ? AND ? ORDER BY blockid", | |
482 zTab); | |
483 for(iIndex=0; iIndex<=mxIndex; iIndex++){ | |
484 if( mxIndex>0 ){ | |
485 printf("**************************** Index %d " | |
486 "****************************\n", iIndex); | |
487 } | |
488 sqlite3_bind_int(pStmt, 1, iIndex); | |
489 prevLevel = -1; | |
490 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
491 int iLevel = sqlite3_column_int(pStmt, 0)%1024; | |
492 int iIdx = sqlite3_column_int(pStmt, 1); | |
493 sqlite3_int64 iStart = sqlite3_column_int64(pStmt, 2); | |
494 sqlite3_int64 iLEnd = sqlite3_column_int64(pStmt, 3); | |
495 sqlite3_int64 iEnd = sqlite3_column_int64(pStmt, 4); | |
496 char rtag[20]; | |
497 if( iLevel!=prevLevel ){ | |
498 printf("level %2d idx %2d", iLevel, iIdx); | |
499 prevLevel = iLevel; | |
500 }else{ | |
501 printf(" idx %2d", iIdx); | |
502 } | |
503 sqlite3_snprintf(sizeof(rtag), rtag, "r%lld", | |
504 sqlite3_column_int64(pStmt,5)); | |
505 printf(" root %9s\n", rtag); | |
506 if( iLEnd>iStart ){ | |
507 sqlite3_int64 iLower, iPrev, iX; | |
508 if( iLEnd+1<=iEnd ){ | |
509 sqlite3_bind_int64(pStmt2, 1, iLEnd+1); | |
510 sqlite3_bind_int64(pStmt2, 2, iEnd); | |
511 iLower = -1; | |
512 while( sqlite3_step(pStmt2)==SQLITE_ROW ){ | |
513 iX = sqlite3_column_int64(pStmt2, 0); | |
514 if( iLower<0 ){ | |
515 iLower = iPrev = iX; | |
516 }else if( iX==iPrev+1 ){ | |
517 iPrev = iX; | |
518 }else{ | |
519 printTreeLine(iLower, iPrev); | |
520 iLower = iPrev = iX; | |
521 } | |
522 } | |
523 sqlite3_reset(pStmt2); | |
524 if( iLower>=0 ){ | |
525 if( iLower==iPrev && iLower==iEnd | |
526 && isNullSegment(db,zTab,iLower) | |
527 ){ | |
528 printf(" null %9lld\n", iLower); | |
529 }else{ | |
530 printTreeLine(iLower, iPrev); | |
531 } | |
532 } | |
533 } | |
534 printf(" leaves %9lld thru %9lld (%lld blocks)\n", | |
535 iStart, iLEnd, iLEnd - iStart + 1); | |
536 } | |
537 } | |
538 sqlite3_reset(pStmt); | |
539 } | |
540 sqlite3_finalize(pStmt); | |
541 sqlite3_finalize(pStmt2); | |
542 } | |
543 | |
544 /* | |
545 ** Decode a single segment block and display the results on stdout. | |
546 */ | |
547 static void decodeSegment( | |
548 const unsigned char *aData, /* Content to print */ | |
549 int nData /* Number of bytes of content */ | |
550 ){ | |
551 sqlite3_int64 iChild; | |
552 sqlite3_int64 iPrefix; | |
553 sqlite3_int64 nTerm; | |
554 sqlite3_int64 n; | |
555 sqlite3_int64 iDocsz; | |
556 int iHeight; | |
557 sqlite3_int64 i = 0; | |
558 int cnt = 0; | |
559 char zTerm[1000]; | |
560 | |
561 i += getVarint(aData, &n); | |
562 iHeight = (int)n; | |
563 printf("height: %d\n", iHeight); | |
564 if( iHeight>0 ){ | |
565 i += getVarint(aData+i, &iChild); | |
566 printf("left-child: %lld\n", iChild); | |
567 } | |
568 while( i<nData ){ | |
569 if( (cnt++)>0 ){ | |
570 i += getVarint(aData+i, &iPrefix); | |
571 }else{ | |
572 iPrefix = 0; | |
573 } | |
574 i += getVarint(aData+i, &nTerm); | |
575 if( iPrefix+nTerm+1 >= sizeof(zTerm) ){ | |
576 fprintf(stderr, "term to long\n"); | |
577 exit(1); | |
578 } | |
579 memcpy(zTerm+iPrefix, aData+i, (size_t)nTerm); | |
580 zTerm[iPrefix+nTerm] = 0; | |
581 i += nTerm; | |
582 if( iHeight==0 ){ | |
583 i += getVarint(aData+i, &iDocsz); | |
584 printf("term: %-25s doclist %7lld bytes offset %lld\n", zTerm, iDocsz, i); | |
585 i += iDocsz; | |
586 }else{ | |
587 printf("term: %-25s child %lld\n", zTerm, ++iChild); | |
588 } | |
589 } | |
590 } | |
591 | |
592 | |
593 /* | |
594 ** Print a a blob as hex and ascii. | |
595 */ | |
596 static void printBlob( | |
597 const unsigned char *aData, /* Content to print */ | |
598 int nData /* Number of bytes of content */ | |
599 ){ | |
600 int i, j; | |
601 const char *zOfstFmt; | |
602 const int perLine = 16; | |
603 | |
604 if( (nData&~0xfff)==0 ){ | |
605 zOfstFmt = " %03x: "; | |
606 }else if( (nData&~0xffff)==0 ){ | |
607 zOfstFmt = " %04x: "; | |
608 }else if( (nData&~0xfffff)==0 ){ | |
609 zOfstFmt = " %05x: "; | |
610 }else if( (nData&~0xffffff)==0 ){ | |
611 zOfstFmt = " %06x: "; | |
612 }else{ | |
613 zOfstFmt = " %08x: "; | |
614 } | |
615 | |
616 for(i=0; i<nData; i += perLine){ | |
617 fprintf(stdout, zOfstFmt, i); | |
618 for(j=0; j<perLine; j++){ | |
619 if( i+j>nData ){ | |
620 fprintf(stdout, " "); | |
621 }else{ | |
622 fprintf(stdout,"%02x ", aData[i+j]); | |
623 } | |
624 } | |
625 for(j=0; j<perLine; j++){ | |
626 if( i+j>nData ){ | |
627 fprintf(stdout, " "); | |
628 }else{ | |
629 fprintf(stdout,"%c", isprint(aData[i+j]) ? aData[i+j] : '.'); | |
630 } | |
631 } | |
632 fprintf(stdout,"\n"); | |
633 } | |
634 } | |
635 | |
636 /* | |
637 ** Convert text to a 64-bit integer | |
638 */ | |
639 static sqlite3_int64 atoi64(const char *z){ | |
640 sqlite3_int64 v = 0; | |
641 while( z[0]>='0' && z[0]<='9' ){ | |
642 v = v*10 + z[0] - '0'; | |
643 z++; | |
644 } | |
645 return v; | |
646 } | |
647 | |
648 /* | |
649 ** Return a prepared statement which, when stepped, will return in its | |
650 ** first column the blob associated with segment zId. If zId begins with | |
651 ** 'r' then it is a rowid of a %_segdir entry. Otherwise it is a | |
652 ** %_segment entry. | |
653 */ | |
654 static sqlite3_stmt *prepareToGetSegment( | |
655 sqlite3 *db, /* The database */ | |
656 const char *zTab, /* The FTS3/4 table name */ | |
657 const char *zId /* ID of the segment to open */ | |
658 ){ | |
659 sqlite3_stmt *pStmt; | |
660 if( zId[0]=='r' ){ | |
661 pStmt = prepare(db, "SELECT root FROM '%q_segdir' WHERE rowid=%lld", | |
662 zTab, atoi64(zId+1)); | |
663 }else{ | |
664 pStmt = prepare(db, "SELECT block FROM '%q_segments' WHERE blockid=%lld", | |
665 zTab, atoi64(zId)); | |
666 } | |
667 return pStmt; | |
668 } | |
669 | |
670 /* | |
671 ** Print the content of a segment or of the root of a segdir. The segment | |
672 ** or root is identified by azExtra[0]. If the first character of azExtra[0] | |
673 ** is 'r' then the remainder is the integer rowid of the %_segdir entry. | |
674 ** If the first character of azExtra[0] is not 'r' then, then all of | |
675 ** azExtra[0] is an integer which is the block number. | |
676 ** | |
677 ** If the --raw option is present in azExtra, then a hex dump is provided. | |
678 ** Otherwise a decoding is shown. | |
679 */ | |
680 static void showSegment(sqlite3 *db, const char *zTab){ | |
681 const unsigned char *aData; | |
682 int nData; | |
683 sqlite3_stmt *pStmt; | |
684 | |
685 pStmt = prepareToGetSegment(db, zTab, azExtra[0]); | |
686 if( sqlite3_step(pStmt)!=SQLITE_ROW ){ | |
687 sqlite3_finalize(pStmt); | |
688 return; | |
689 } | |
690 nData = sqlite3_column_bytes(pStmt, 0); | |
691 aData = sqlite3_column_blob(pStmt, 0); | |
692 printf("Segment %s of size %d bytes:\n", azExtra[0], nData); | |
693 if( findOption("raw", 0, 0)!=0 ){ | |
694 printBlob(aData, nData); | |
695 }else{ | |
696 decodeSegment(aData, nData); | |
697 } | |
698 sqlite3_finalize(pStmt); | |
699 } | |
700 | |
701 /* | |
702 ** Decode a single doclist and display the results on stdout. | |
703 */ | |
704 static void decodeDoclist( | |
705 const unsigned char *aData, /* Content to print */ | |
706 int nData /* Number of bytes of content */ | |
707 ){ | |
708 sqlite3_int64 iPrevDocid = 0; | |
709 sqlite3_int64 iDocid; | |
710 sqlite3_int64 iPos; | |
711 sqlite3_int64 iPrevPos = 0; | |
712 sqlite3_int64 iCol; | |
713 int i = 0; | |
714 | |
715 while( i<nData ){ | |
716 i += getVarint(aData+i, &iDocid); | |
717 printf("docid %lld col0", iDocid+iPrevDocid); | |
718 iPrevDocid += iDocid; | |
719 iPrevPos = 0; | |
720 while( 1 ){ | |
721 i += getVarint(aData+i, &iPos); | |
722 if( iPos==1 ){ | |
723 i += getVarint(aData+i, &iCol); | |
724 printf(" col%lld", iCol); | |
725 iPrevPos = 0; | |
726 }else if( iPos==0 ){ | |
727 printf("\n"); | |
728 break; | |
729 }else{ | |
730 iPrevPos += iPos - 2; | |
731 printf(" %lld", iPrevPos); | |
732 } | |
733 } | |
734 } | |
735 } | |
736 | |
737 | |
738 /* | |
739 ** Print the content of a doclist. The segment or segdir-root is | |
740 ** identified by azExtra[0]. If the first character of azExtra[0] | |
741 ** is 'r' then the remainder is the integer rowid of the %_segdir entry. | |
742 ** If the first character of azExtra[0] is not 'r' then, then all of | |
743 ** azExtra[0] is an integer which is the block number. The offset | |
744 ** into the segment is identified by azExtra[1]. The size of the doclist | |
745 ** is azExtra[2]. | |
746 ** | |
747 ** If the --raw option is present in azExtra, then a hex dump is provided. | |
748 ** Otherwise a decoding is shown. | |
749 */ | |
750 static void showDoclist(sqlite3 *db, const char *zTab){ | |
751 const unsigned char *aData; | |
752 sqlite3_int64 offset; | |
753 int nData; | |
754 sqlite3_stmt *pStmt; | |
755 | |
756 offset = atoi64(azExtra[1]); | |
757 nData = atoi(azExtra[2]); | |
758 pStmt = prepareToGetSegment(db, zTab, azExtra[0]); | |
759 if( sqlite3_step(pStmt)!=SQLITE_ROW ){ | |
760 sqlite3_finalize(pStmt); | |
761 return; | |
762 } | |
763 aData = sqlite3_column_blob(pStmt, 0); | |
764 printf("Doclist at %s offset %lld of size %d bytes:\n", | |
765 azExtra[0], offset, nData); | |
766 if( findOption("raw", 0, 0)!=0 ){ | |
767 printBlob(aData+offset, nData); | |
768 }else{ | |
769 decodeDoclist(aData+offset, nData); | |
770 } | |
771 sqlite3_finalize(pStmt); | |
772 } | |
773 | |
774 /* | |
775 ** Show the top N largest segments | |
776 */ | |
777 static void listBigSegments(sqlite3 *db, const char *zTab){ | |
778 int nTop, i; | |
779 sqlite3_stmt *pStmt; | |
780 sqlite3_int64 sz; | |
781 sqlite3_int64 id; | |
782 | |
783 nTop = atoi(findOption("top", 1, "25")); | |
784 printf("The %d largest segments:\n", nTop); | |
785 pStmt = prepare(db, | |
786 "SELECT blockid, length(block) AS len FROM '%q_segments'" | |
787 " ORDER BY 2 DESC, 1" | |
788 " LIMIT %d", zTab, nTop); | |
789 i = 0; | |
790 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
791 i++; | |
792 id = sqlite3_column_int64(pStmt, 0); | |
793 sz = sqlite3_column_int64(pStmt, 1); | |
794 printf(" %2d. %9lld size %lld\n", i, id, sz); | |
795 } | |
796 sqlite3_finalize(pStmt); | |
797 } | |
798 | |
799 | |
800 | |
801 static void usage(const char *argv0){ | |
802 fprintf(stderr, "Usage: %s DATABASE\n" | |
803 " or: %s DATABASE FTS3TABLE ARGS...\n", argv0, argv0); | |
804 fprintf(stderr, | |
805 "ARGS:\n" | |
806 " big-segments [--top N] show the largest segments\n" | |
807 " doclist BLOCKID OFFSET SIZE [--raw] Decode a doclist\n" | |
808 " schema FTS table schema\n" | |
809 " segdir directory of segments\n" | |
810 " segment BLOCKID [--raw] content of a segment\n" | |
811 " segment-stats info on segment sizes\n" | |
812 " stat the %%_stat table\n" | |
813 " vocabulary [--top N] document vocabulary\n" | |
814 ); | |
815 exit(1); | |
816 } | |
817 | |
818 int main(int argc, char **argv){ | |
819 sqlite3 *db; | |
820 int rc; | |
821 const char *zTab; | |
822 const char *zCmd; | |
823 | |
824 if( argc<2 ) usage(argv[0]); | |
825 rc = sqlite3_open(argv[1], &db); | |
826 if( rc ){ | |
827 fprintf(stderr, "Cannot open %s\n", argv[1]); | |
828 exit(1); | |
829 } | |
830 if( argc==2 ){ | |
831 sqlite3_stmt *pStmt; | |
832 int cnt = 0; | |
833 pStmt = prepare(db, "SELECT b.sql" | |
834 " FROM sqlite_master a, sqlite_master b" | |
835 " WHERE a.name GLOB '*_segdir'" | |
836 " AND b.name=substr(a.name,1,length(a.name)-7)" | |
837 " ORDER BY 1"); | |
838 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
839 cnt++; | |
840 printf("%s;\n", sqlite3_column_text(pStmt, 0)); | |
841 } | |
842 sqlite3_finalize(pStmt); | |
843 if( cnt==0 ){ | |
844 printf("/* No FTS3/4 tables found in database %s */\n", argv[1]); | |
845 } | |
846 return 0; | |
847 } | |
848 if( argc<4 ) usage(argv[0]); | |
849 zTab = argv[2]; | |
850 zCmd = argv[3]; | |
851 nExtra = argc-4; | |
852 azExtra = argv+4; | |
853 if( strcmp(zCmd,"big-segments")==0 ){ | |
854 listBigSegments(db, zTab); | |
855 }else if( strcmp(zCmd,"doclist")==0 ){ | |
856 if( argc<7 ) usage(argv[0]); | |
857 showDoclist(db, zTab); | |
858 }else if( strcmp(zCmd,"schema")==0 ){ | |
859 showSchema(db, zTab); | |
860 }else if( strcmp(zCmd,"segdir")==0 ){ | |
861 showSegdirMap(db, zTab); | |
862 }else if( strcmp(zCmd,"segment")==0 ){ | |
863 if( argc<5 ) usage(argv[0]); | |
864 showSegment(db, zTab); | |
865 }else if( strcmp(zCmd,"segment-stats")==0 ){ | |
866 showSegmentStats(db, zTab); | |
867 }else if( strcmp(zCmd,"stat")==0 ){ | |
868 showStat(db, zTab); | |
869 }else if( strcmp(zCmd,"vocabulary")==0 ){ | |
870 showVocabulary(db, zTab); | |
871 }else{ | |
872 usage(argv[0]); | |
873 } | |
874 return 0; | |
875 } | |
OLD | NEW |