| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 ** This program is a debugging and analysis utility that displays | |
| 3 ** information about an FTS3 or FTS4 index. | |
| 4 ** | |
| 5 ** Link this program against the SQLite3 amalgamation with the | |
| 6 ** SQLITE_ENABLE_FTS4 compile-time option. Then run it as: | |
| 7 ** | |
| 8 ** fts3view DATABASE | |
| 9 ** | |
| 10 ** to get a list of all FTS3/4 tables in DATABASE, or do | |
| 11 ** | |
| 12 ** fts3view DATABASE TABLE COMMAND .... | |
| 13 ** | |
| 14 ** to see various aspects of the TABLE table. Type fts3view with no | |
| 15 ** arguments for a list of available COMMANDs. | |
| 16 */ | |
| 17 #include <stdio.h> | |
| 18 #include <stdarg.h> | |
| 19 #include <stdlib.h> | |
| 20 #include <string.h> | |
| 21 #include <ctype.h> | |
| 22 #include "sqlite3.h" | |
| 23 | |
| 24 /* | |
| 25 ** Extra command-line arguments: | |
| 26 */ | |
| 27 int nExtra; | |
| 28 char **azExtra; | |
| 29 | |
| 30 /* | |
| 31 ** Look for a command-line argument. | |
| 32 */ | |
| 33 const char *findOption(const char *zName, int hasArg, const char *zDefault){ | |
| 34 int i; | |
| 35 const char *zResult = zDefault; | |
| 36 for(i=0; i<nExtra; i++){ | |
| 37 const char *z = azExtra[i]; | |
| 38 while( z[0]=='-' ) z++; | |
| 39 if( strcmp(z, zName)==0 ){ | |
| 40 int j = 1; | |
| 41 if( hasArg==0 || i==nExtra-1 ) j = 0; | |
| 42 zResult = azExtra[i+j]; | |
| 43 while( i+j<nExtra ){ | |
| 44 azExtra[i] = azExtra[i+j+1]; | |
| 45 i++; | |
| 46 } | |
| 47 break; | |
| 48 } | |
| 49 } | |
| 50 return zResult; | |
| 51 } | |
| 52 | |
| 53 | |
| 54 /* | |
| 55 ** Prepare an SQL query | |
| 56 */ | |
| 57 static sqlite3_stmt *prepare(sqlite3 *db, const char *zFormat, ...){ | |
| 58 va_list ap; | |
| 59 char *zSql; | |
| 60 sqlite3_stmt *pStmt; | |
| 61 int rc; | |
| 62 | |
| 63 va_start(ap, zFormat); | |
| 64 zSql = sqlite3_vmprintf(zFormat, ap); | |
| 65 va_end(ap); | |
| 66 rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); | |
| 67 if( rc ){ | |
| 68 fprintf(stderr, "Error: %s\nSQL: %s\n", sqlite3_errmsg(db), zSql); | |
| 69 exit(1); | |
| 70 } | |
| 71 sqlite3_free(zSql); | |
| 72 return pStmt; | |
| 73 } | |
| 74 | |
| 75 /* | |
| 76 ** Run an SQL statement | |
| 77 */ | |
| 78 static int runSql(sqlite3 *db, const char *zFormat, ...){ | |
| 79 va_list ap; | |
| 80 char *zSql; | |
| 81 int rc; | |
| 82 | |
| 83 va_start(ap, zFormat); | |
| 84 zSql = sqlite3_vmprintf(zFormat, ap); | |
| 85 rc = sqlite3_exec(db, zSql, 0, 0, 0); | |
| 86 va_end(ap); | |
| 87 return rc; | |
| 88 } | |
| 89 | |
| 90 /* | |
| 91 ** Show the table schema | |
| 92 */ | |
| 93 static void showSchema(sqlite3 *db, const char *zTab){ | |
| 94 sqlite3_stmt *pStmt; | |
| 95 pStmt = prepare(db, | |
| 96 "SELECT sql FROM sqlite_master" | |
| 97 " WHERE name LIKE '%q%%'" | |
| 98 " ORDER BY 1", | |
| 99 zTab); | |
| 100 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
| 101 printf("%s;\n", sqlite3_column_text(pStmt, 0)); | |
| 102 } | |
| 103 sqlite3_finalize(pStmt); | |
| 104 pStmt = prepare(db, "PRAGMA page_size"); | |
| 105 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
| 106 printf("PRAGMA page_size=%s;\n", sqlite3_column_text(pStmt, 0)); | |
| 107 } | |
| 108 sqlite3_finalize(pStmt); | |
| 109 pStmt = prepare(db, "PRAGMA journal_mode"); | |
| 110 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
| 111 printf("PRAGMA journal_mode=%s;\n", sqlite3_column_text(pStmt, 0)); | |
| 112 } | |
| 113 sqlite3_finalize(pStmt); | |
| 114 pStmt = prepare(db, "PRAGMA auto_vacuum"); | |
| 115 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
| 116 const char *zType = "???"; | |
| 117 switch( sqlite3_column_int(pStmt, 0) ){ | |
| 118 case 0: zType = "OFF"; break; | |
| 119 case 1: zType = "FULL"; break; | |
| 120 case 2: zType = "INCREMENTAL"; break; | |
| 121 } | |
| 122 printf("PRAGMA auto_vacuum=%s;\n", zType); | |
| 123 } | |
| 124 sqlite3_finalize(pStmt); | |
| 125 pStmt = prepare(db, "PRAGMA encoding"); | |
| 126 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
| 127 printf("PRAGMA encoding=%s;\n", sqlite3_column_text(pStmt, 0)); | |
| 128 } | |
| 129 sqlite3_finalize(pStmt); | |
| 130 } | |
| 131 | |
| 132 /* | |
| 133 ** Read a 64-bit variable-length integer from memory starting at p[0]. | |
| 134 ** Return the number of bytes read, or 0 on error. | |
| 135 ** The value is stored in *v. | |
| 136 */ | |
| 137 int getVarint(const unsigned char *p, sqlite_int64 *v){ | |
| 138 const unsigned char *q = p; | |
| 139 sqlite_uint64 x = 0, y = 1; | |
| 140 while( (*q&0x80)==0x80 && q-(unsigned char *)p<9 ){ | |
| 141 x += y * (*q++ & 0x7f); | |
| 142 y <<= 7; | |
| 143 } | |
| 144 x += y * (*q++); | |
| 145 *v = (sqlite_int64) x; | |
| 146 return (int) (q - (unsigned char *)p); | |
| 147 } | |
| 148 | |
| 149 | |
| 150 /* Show the content of the %_stat table | |
| 151 */ | |
| 152 static void showStat(sqlite3 *db, const char *zTab){ | |
| 153 sqlite3_stmt *pStmt; | |
| 154 pStmt = prepare(db, "SELECT id, value FROM '%q_stat'", zTab); | |
| 155 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
| 156 printf("stat[%d] =", sqlite3_column_int(pStmt, 0)); | |
| 157 switch( sqlite3_column_type(pStmt, 1) ){ | |
| 158 case SQLITE_INTEGER: { | |
| 159 printf(" %d\n", sqlite3_column_int(pStmt, 1)); | |
| 160 break; | |
| 161 } | |
| 162 case SQLITE_BLOB: { | |
| 163 unsigned char *x = (unsigned char*)sqlite3_column_blob(pStmt, 1); | |
| 164 int len = sqlite3_column_bytes(pStmt, 1); | |
| 165 int i = 0; | |
| 166 sqlite3_int64 v; | |
| 167 while( i<len ){ | |
| 168 i += getVarint(x, &v); | |
| 169 printf(" %lld", v); | |
| 170 } | |
| 171 printf("\n"); | |
| 172 break; | |
| 173 } | |
| 174 } | |
| 175 } | |
| 176 sqlite3_finalize(pStmt); | |
| 177 } | |
| 178 | |
| 179 /* | |
| 180 ** Report on the vocabulary. This creates an fts4aux table with a random | |
| 181 ** name, but deletes it in the end. | |
| 182 */ | |
| 183 static void showVocabulary(sqlite3 *db, const char *zTab){ | |
| 184 char *zAux; | |
| 185 sqlite3_uint64 r; | |
| 186 sqlite3_stmt *pStmt; | |
| 187 int nDoc = 0; | |
| 188 int nToken = 0; | |
| 189 int nOccurrence = 0; | |
| 190 int nTop; | |
| 191 int n, i; | |
| 192 | |
| 193 sqlite3_randomness(sizeof(r), &r); | |
| 194 zAux = sqlite3_mprintf("viewer_%llx", zTab, r); | |
| 195 runSql(db, "BEGIN"); | |
| 196 pStmt = prepare(db, "SELECT count(*) FROM %Q", zTab); | |
| 197 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
| 198 nDoc = sqlite3_column_int(pStmt, 0); | |
| 199 } | |
| 200 sqlite3_finalize(pStmt); | |
| 201 printf("Number of documents...................... %9d\n", nDoc); | |
| 202 | |
| 203 runSql(db, "CREATE VIRTUAL TABLE %s USING fts4aux(%Q)", zAux, zTab); | |
| 204 pStmt = prepare(db, | |
| 205 "SELECT count(*), sum(occurrences) FROM %s WHERE col='*'", | |
| 206 zAux); | |
| 207 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
| 208 nToken = sqlite3_column_int(pStmt, 0); | |
| 209 nOccurrence = sqlite3_column_int(pStmt, 1); | |
| 210 } | |
| 211 sqlite3_finalize(pStmt); | |
| 212 printf("Total tokens in all documents............ %9d\n", nOccurrence); | |
| 213 printf("Total number of distinct tokens.......... %9d\n", nToken); | |
| 214 if( nToken==0 ) goto end_vocab; | |
| 215 | |
| 216 n = 0; | |
| 217 pStmt = prepare(db, "SELECT count(*) FROM %s" | |
| 218 " WHERE col='*' AND occurrences==1", zAux); | |
| 219 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
| 220 n = sqlite3_column_int(pStmt, 0); | |
| 221 } | |
| 222 sqlite3_finalize(pStmt); | |
| 223 printf("Tokens used exactly once................. %9d %5.2f%%\n", | |
| 224 n, n*100.0/nToken); | |
| 225 | |
| 226 n = 0; | |
| 227 pStmt = prepare(db, "SELECT count(*) FROM %s" | |
| 228 " WHERE col='*' AND documents==1", zAux); | |
| 229 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
| 230 n = sqlite3_column_int(pStmt, 0); | |
| 231 } | |
| 232 sqlite3_finalize(pStmt); | |
| 233 printf("Tokens used in only one document......... %9d %5.2f%%\n", | |
| 234 n, n*100.0/nToken); | |
| 235 | |
| 236 if( nDoc>=2000 ){ | |
| 237 n = 0; | |
| 238 pStmt = prepare(db, "SELECT count(*) FROM %s" | |
| 239 " WHERE col='*' AND occurrences<=%d", zAux, nDoc/1000); | |
| 240 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
| 241 n = sqlite3_column_int(pStmt, 0); | |
| 242 } | |
| 243 sqlite3_finalize(pStmt); | |
| 244 printf("Tokens used in 0.1%% or less of docs...... %9d %5.2f%%\n", | |
| 245 n, n*100.0/nToken); | |
| 246 } | |
| 247 | |
| 248 if( nDoc>=200 ){ | |
| 249 n = 0; | |
| 250 pStmt = prepare(db, "SELECT count(*) FROM %s" | |
| 251 " WHERE col='*' AND occurrences<=%d", zAux, nDoc/100); | |
| 252 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
| 253 n = sqlite3_column_int(pStmt, 0); | |
| 254 } | |
| 255 sqlite3_finalize(pStmt); | |
| 256 printf("Tokens used in 1%% or less of docs........ %9d %5.2f%%\n", | |
| 257 n, n*100.0/nToken); | |
| 258 } | |
| 259 | |
| 260 nTop = atoi(findOption("top", 1, "25")); | |
| 261 printf("The %d most common tokens:\n", nTop); | |
| 262 pStmt = prepare(db, | |
| 263 "SELECT term, documents FROM %s" | |
| 264 " WHERE col='*'" | |
| 265 " ORDER BY documents DESC, term" | |
| 266 " LIMIT %d", zAux, nTop); | |
| 267 i = 0; | |
| 268 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
| 269 i++; | |
| 270 n = sqlite3_column_int(pStmt, 1); | |
| 271 printf(" %2d. %-30s %9d docs %5.2f%%\n", i, | |
| 272 sqlite3_column_text(pStmt, 0), n, n*100.0/nDoc); | |
| 273 } | |
| 274 sqlite3_finalize(pStmt); | |
| 275 | |
| 276 end_vocab: | |
| 277 runSql(db, "ROLLBACK"); | |
| 278 sqlite3_free(zAux); | |
| 279 } | |
| 280 | |
| 281 /* | |
| 282 ** Report on the number and sizes of segments | |
| 283 */ | |
| 284 static void showSegmentStats(sqlite3 *db, const char *zTab){ | |
| 285 sqlite3_stmt *pStmt; | |
| 286 int nSeg = 0; | |
| 287 sqlite3_int64 szSeg = 0, mxSeg = 0; | |
| 288 int nIdx = 0; | |
| 289 sqlite3_int64 szIdx = 0, mxIdx = 0; | |
| 290 int nRoot = 0; | |
| 291 sqlite3_int64 szRoot = 0, mxRoot = 0; | |
| 292 sqlite3_int64 mx; | |
| 293 int nLeaf; | |
| 294 int n; | |
| 295 int pgsz; | |
| 296 int mxLevel; | |
| 297 int i; | |
| 298 | |
| 299 pStmt = prepare(db, | |
| 300 "SELECT count(*), sum(length(block)), max(length(block))" | |
| 301 " FROM '%q_segments'", | |
| 302 zTab); | |
| 303 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
| 304 nSeg = sqlite3_column_int(pStmt, 0); | |
| 305 szSeg = sqlite3_column_int64(pStmt, 1); | |
| 306 mxSeg = sqlite3_column_int64(pStmt, 2); | |
| 307 } | |
| 308 sqlite3_finalize(pStmt); | |
| 309 pStmt = prepare(db, | |
| 310 "SELECT count(*), sum(length(block)), max(length(block))" | |
| 311 " FROM '%q_segments' a JOIN '%q_segdir' b" | |
| 312 " WHERE a.blockid BETWEEN b.leaves_end_block+1 AND b.end_block", | |
| 313 zTab, zTab); | |
| 314 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
| 315 nIdx = sqlite3_column_int(pStmt, 0); | |
| 316 szIdx = sqlite3_column_int64(pStmt, 1); | |
| 317 mxIdx = sqlite3_column_int64(pStmt, 2); | |
| 318 } | |
| 319 sqlite3_finalize(pStmt); | |
| 320 pStmt = prepare(db, | |
| 321 "SELECT count(*), sum(length(root)), max(length(root))" | |
| 322 " FROM '%q_segdir'", | |
| 323 zTab); | |
| 324 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
| 325 nRoot = sqlite3_column_int(pStmt, 0); | |
| 326 szRoot = sqlite3_column_int64(pStmt, 1); | |
| 327 mxRoot = sqlite3_column_int64(pStmt, 2); | |
| 328 } | |
| 329 sqlite3_finalize(pStmt); | |
| 330 | |
| 331 printf("Number of segments....................... %9d\n", nSeg+nRoot); | |
| 332 printf("Number of leaf segments.................. %9d\n", nSeg-nIdx); | |
| 333 printf("Number of index segments................. %9d\n", nIdx); | |
| 334 printf("Number of root segments.................. %9d\n", nRoot); | |
| 335 printf("Total size of all segments............... %9lld\n", szSeg+szRoot); | |
| 336 printf("Total size of all leaf segments.......... %9lld\n", szSeg-szIdx); | |
| 337 printf("Total size of all index segments......... %9lld\n", szIdx); | |
| 338 printf("Total size of all root segments.......... %9lld\n", szRoot); | |
| 339 if( nSeg>0 ){ | |
| 340 printf("Average size of all segments............. %11.1f\n", | |
| 341 (double)(szSeg+szRoot)/(double)(nSeg+nRoot)); | |
| 342 printf("Average size of leaf segments............ %11.1f\n", | |
| 343 (double)(szSeg-szIdx)/(double)(nSeg-nIdx)); | |
| 344 } | |
| 345 if( nIdx>0 ){ | |
| 346 printf("Average size of index segments........... %11.1f\n", | |
| 347 (double)szIdx/(double)nIdx); | |
| 348 } | |
| 349 if( nRoot>0 ){ | |
| 350 printf("Average size of root segments............ %11.1f\n", | |
| 351 (double)szRoot/(double)nRoot); | |
| 352 } | |
| 353 mx = mxSeg; | |
| 354 if( mx<mxRoot ) mx = mxRoot; | |
| 355 printf("Maximum segment size..................... %9lld\n", mx); | |
| 356 printf("Maximum index segment size............... %9lld\n", mxIdx); | |
| 357 printf("Maximum root segment size................ %9lld\n", mxRoot); | |
| 358 | |
| 359 pStmt = prepare(db, "PRAGMA page_size"); | |
| 360 pgsz = 1024; | |
| 361 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
| 362 pgsz = sqlite3_column_int(pStmt, 0); | |
| 363 } | |
| 364 sqlite3_finalize(pStmt); | |
| 365 printf("Database page size....................... %9d\n", pgsz); | |
| 366 pStmt = prepare(db, | |
| 367 "SELECT count(*)" | |
| 368 " FROM '%q_segments' a JOIN '%q_segdir' b" | |
| 369 " WHERE a.blockid BETWEEN b.start_block AND b.leaves_end_block" | |
| 370 " AND length(a.block)>%d", | |
| 371 zTab, zTab, pgsz-45); | |
| 372 n = 0; | |
| 373 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
| 374 n = sqlite3_column_int(pStmt, 0); | |
| 375 } | |
| 376 sqlite3_finalize(pStmt); | |
| 377 nLeaf = nSeg - nIdx; | |
| 378 printf("Leaf segments larger than %5d bytes.... %9d %5.2f%%\n", | |
| 379 pgsz-45, n, nLeaf>0 ? n*100.0/nLeaf : 0.0); | |
| 380 | |
| 381 pStmt = prepare(db, "SELECT max(level%%1024) FROM '%q_segdir'", zTab); | |
| 382 mxLevel = 0; | |
| 383 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
| 384 mxLevel = sqlite3_column_int(pStmt, 0); | |
| 385 } | |
| 386 sqlite3_finalize(pStmt); | |
| 387 | |
| 388 for(i=0; i<=mxLevel; i++){ | |
| 389 pStmt = prepare(db, | |
| 390 "SELECT count(*), sum(len), avg(len), max(len), sum(len>%d)," | |
| 391 " count(distinct idx)" | |
| 392 " FROM (SELECT length(a.block) AS len, idx" | |
| 393 " FROM '%q_segments' a JOIN '%q_segdir' b" | |
| 394 " WHERE (a.blockid BETWEEN b.start_block" | |
| 395 " AND b.leaves_end_block)" | |
| 396 " AND (b.level%%1024)==%d)", | |
| 397 pgsz-45, zTab, zTab, i); | |
| 398 if( sqlite3_step(pStmt)==SQLITE_ROW | |
| 399 && (nLeaf = sqlite3_column_int(pStmt, 0))>0 | |
| 400 ){ | |
| 401 nIdx = sqlite3_column_int(pStmt, 5); | |
| 402 sqlite3_int64 sz; | |
| 403 printf("For level %d:\n", i); | |
| 404 printf(" Number of indexes...................... %9d\n", nIdx); | |
| 405 printf(" Number of leaf segments................ %9d\n", nLeaf); | |
| 406 if( nIdx>1 ){ | |
| 407 printf(" Average leaf segments per index........ %11.1f\n", | |
| 408 (double)nLeaf/(double)nIdx); | |
| 409 } | |
| 410 printf(" Total size of all leaf segments........ %9lld\n", | |
| 411 (sz = sqlite3_column_int64(pStmt, 1))); | |
| 412 printf(" Average size of leaf segments.......... %11.1f\n", | |
| 413 sqlite3_column_double(pStmt, 2)); | |
| 414 if( nIdx>1 ){ | |
| 415 printf(" Average leaf segment size per index.... %11.1f\n", | |
| 416 (double)sz/(double)nIdx); | |
| 417 } | |
| 418 printf(" Maximum leaf segment size.............. %9lld\n", | |
| 419 sqlite3_column_int64(pStmt, 3)); | |
| 420 n = sqlite3_column_int(pStmt, 4); | |
| 421 printf(" Leaf segments larger than %5d bytes.. %9d %5.2f%%\n", | |
| 422 pgsz-45, n, n*100.0/nLeaf); | |
| 423 } | |
| 424 sqlite3_finalize(pStmt); | |
| 425 } | |
| 426 } | |
| 427 | |
| 428 /* | |
| 429 ** Print a single "tree" line of the segdir map output. | |
| 430 */ | |
| 431 static void printTreeLine(sqlite3_int64 iLower, sqlite3_int64 iUpper){ | |
| 432 printf(" tree %9lld", iLower); | |
| 433 if( iUpper>iLower ){ | |
| 434 printf(" thru %9lld (%lld blocks)", iUpper, iUpper-iLower+1); | |
| 435 } | |
| 436 printf("\n"); | |
| 437 } | |
| 438 | |
| 439 /* | |
| 440 ** Check to see if the block of a %_segments entry is NULL. | |
| 441 */ | |
| 442 static int isNullSegment(sqlite3 *db, const char *zTab, sqlite3_int64 iBlockId){ | |
| 443 sqlite3_stmt *pStmt; | |
| 444 int rc = 1; | |
| 445 | |
| 446 pStmt = prepare(db, "SELECT block IS NULL FROM '%q_segments'" | |
| 447 " WHERE blockid=%lld", zTab, iBlockId); | |
| 448 if( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
| 449 rc = sqlite3_column_int(pStmt, 0); | |
| 450 } | |
| 451 sqlite3_finalize(pStmt); | |
| 452 return rc; | |
| 453 } | |
| 454 | |
| 455 /* | |
| 456 ** Show a map of segments derived from the %_segdir table. | |
| 457 */ | |
| 458 static void showSegdirMap(sqlite3 *db, const char *zTab){ | |
| 459 int mxIndex, iIndex; | |
| 460 sqlite3_stmt *pStmt = 0; | |
| 461 sqlite3_stmt *pStmt2 = 0; | |
| 462 int prevLevel; | |
| 463 | |
| 464 pStmt = prepare(db, "SELECT max(level/1024) FROM '%q_segdir'", zTab); | |
| 465 if( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
| 466 mxIndex = sqlite3_column_int(pStmt, 0); | |
| 467 }else{ | |
| 468 mxIndex = 0; | |
| 469 } | |
| 470 sqlite3_finalize(pStmt); | |
| 471 | |
| 472 printf("Number of inverted indices............... %3d\n", mxIndex+1); | |
| 473 pStmt = prepare(db, | |
| 474 "SELECT level, idx, start_block, leaves_end_block, end_block, rowid" | |
| 475 " FROM '%q_segdir'" | |
| 476 " WHERE level/1024==?" | |
| 477 " ORDER BY level DESC, idx", | |
| 478 zTab); | |
| 479 pStmt2 = prepare(db, | |
| 480 "SELECT blockid FROM '%q_segments'" | |
| 481 " WHERE blockid BETWEEN ? AND ? ORDER BY blockid", | |
| 482 zTab); | |
| 483 for(iIndex=0; iIndex<=mxIndex; iIndex++){ | |
| 484 if( mxIndex>0 ){ | |
| 485 printf("**************************** Index %d " | |
| 486 "****************************\n", iIndex); | |
| 487 } | |
| 488 sqlite3_bind_int(pStmt, 1, iIndex); | |
| 489 prevLevel = -1; | |
| 490 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
| 491 int iLevel = sqlite3_column_int(pStmt, 0)%1024; | |
| 492 int iIdx = sqlite3_column_int(pStmt, 1); | |
| 493 sqlite3_int64 iStart = sqlite3_column_int64(pStmt, 2); | |
| 494 sqlite3_int64 iLEnd = sqlite3_column_int64(pStmt, 3); | |
| 495 sqlite3_int64 iEnd = sqlite3_column_int64(pStmt, 4); | |
| 496 char rtag[20]; | |
| 497 if( iLevel!=prevLevel ){ | |
| 498 printf("level %2d idx %2d", iLevel, iIdx); | |
| 499 prevLevel = iLevel; | |
| 500 }else{ | |
| 501 printf(" idx %2d", iIdx); | |
| 502 } | |
| 503 sqlite3_snprintf(sizeof(rtag), rtag, "r%lld", | |
| 504 sqlite3_column_int64(pStmt,5)); | |
| 505 printf(" root %9s\n", rtag); | |
| 506 if( iLEnd>iStart ){ | |
| 507 sqlite3_int64 iLower, iPrev = 0, iX; | |
| 508 if( iLEnd+1<=iEnd ){ | |
| 509 sqlite3_bind_int64(pStmt2, 1, iLEnd+1); | |
| 510 sqlite3_bind_int64(pStmt2, 2, iEnd); | |
| 511 iLower = -1; | |
| 512 while( sqlite3_step(pStmt2)==SQLITE_ROW ){ | |
| 513 iX = sqlite3_column_int64(pStmt2, 0); | |
| 514 if( iLower<0 ){ | |
| 515 iLower = iPrev = iX; | |
| 516 }else if( iX==iPrev+1 ){ | |
| 517 iPrev = iX; | |
| 518 }else{ | |
| 519 printTreeLine(iLower, iPrev); | |
| 520 iLower = iPrev = iX; | |
| 521 } | |
| 522 } | |
| 523 sqlite3_reset(pStmt2); | |
| 524 if( iLower>=0 ){ | |
| 525 if( iLower==iPrev && iLower==iEnd | |
| 526 && isNullSegment(db,zTab,iLower) | |
| 527 ){ | |
| 528 printf(" null %9lld\n", iLower); | |
| 529 }else{ | |
| 530 printTreeLine(iLower, iPrev); | |
| 531 } | |
| 532 } | |
| 533 } | |
| 534 printf(" leaves %9lld thru %9lld (%lld blocks)\n", | |
| 535 iStart, iLEnd, iLEnd - iStart + 1); | |
| 536 } | |
| 537 } | |
| 538 sqlite3_reset(pStmt); | |
| 539 } | |
| 540 sqlite3_finalize(pStmt); | |
| 541 sqlite3_finalize(pStmt2); | |
| 542 } | |
| 543 | |
| 544 /* | |
| 545 ** Decode a single segment block and display the results on stdout. | |
| 546 */ | |
| 547 static void decodeSegment( | |
| 548 const unsigned char *aData, /* Content to print */ | |
| 549 int nData /* Number of bytes of content */ | |
| 550 ){ | |
| 551 sqlite3_int64 iChild = 0; | |
| 552 sqlite3_int64 iPrefix; | |
| 553 sqlite3_int64 nTerm; | |
| 554 sqlite3_int64 n; | |
| 555 sqlite3_int64 iDocsz; | |
| 556 int iHeight; | |
| 557 sqlite3_int64 i = 0; | |
| 558 int cnt = 0; | |
| 559 char zTerm[1000]; | |
| 560 | |
| 561 i += getVarint(aData, &n); | |
| 562 iHeight = (int)n; | |
| 563 printf("height: %d\n", iHeight); | |
| 564 if( iHeight>0 ){ | |
| 565 i += getVarint(aData+i, &iChild); | |
| 566 printf("left-child: %lld\n", iChild); | |
| 567 } | |
| 568 while( i<nData ){ | |
| 569 if( (cnt++)>0 ){ | |
| 570 i += getVarint(aData+i, &iPrefix); | |
| 571 }else{ | |
| 572 iPrefix = 0; | |
| 573 } | |
| 574 i += getVarint(aData+i, &nTerm); | |
| 575 if( iPrefix+nTerm+1 >= sizeof(zTerm) ){ | |
| 576 fprintf(stderr, "term to long\n"); | |
| 577 exit(1); | |
| 578 } | |
| 579 memcpy(zTerm+iPrefix, aData+i, (size_t)nTerm); | |
| 580 zTerm[iPrefix+nTerm] = 0; | |
| 581 i += nTerm; | |
| 582 if( iHeight==0 ){ | |
| 583 i += getVarint(aData+i, &iDocsz); | |
| 584 printf("term: %-25s doclist %7lld bytes offset %lld\n", zTerm, iDocsz, i); | |
| 585 i += iDocsz; | |
| 586 }else{ | |
| 587 printf("term: %-25s child %lld\n", zTerm, ++iChild); | |
| 588 } | |
| 589 } | |
| 590 } | |
| 591 | |
| 592 | |
| 593 /* | |
| 594 ** Print a a blob as hex and ascii. | |
| 595 */ | |
| 596 static void printBlob( | |
| 597 const unsigned char *aData, /* Content to print */ | |
| 598 int nData /* Number of bytes of content */ | |
| 599 ){ | |
| 600 int i, j; | |
| 601 const char *zOfstFmt; | |
| 602 const int perLine = 16; | |
| 603 | |
| 604 if( (nData&~0xfff)==0 ){ | |
| 605 zOfstFmt = " %03x: "; | |
| 606 }else if( (nData&~0xffff)==0 ){ | |
| 607 zOfstFmt = " %04x: "; | |
| 608 }else if( (nData&~0xfffff)==0 ){ | |
| 609 zOfstFmt = " %05x: "; | |
| 610 }else if( (nData&~0xffffff)==0 ){ | |
| 611 zOfstFmt = " %06x: "; | |
| 612 }else{ | |
| 613 zOfstFmt = " %08x: "; | |
| 614 } | |
| 615 | |
| 616 for(i=0; i<nData; i += perLine){ | |
| 617 fprintf(stdout, zOfstFmt, i); | |
| 618 for(j=0; j<perLine; j++){ | |
| 619 if( i+j>nData ){ | |
| 620 fprintf(stdout, " "); | |
| 621 }else{ | |
| 622 fprintf(stdout,"%02x ", aData[i+j]); | |
| 623 } | |
| 624 } | |
| 625 for(j=0; j<perLine; j++){ | |
| 626 if( i+j>nData ){ | |
| 627 fprintf(stdout, " "); | |
| 628 }else{ | |
| 629 fprintf(stdout,"%c", isprint(aData[i+j]) ? aData[i+j] : '.'); | |
| 630 } | |
| 631 } | |
| 632 fprintf(stdout,"\n"); | |
| 633 } | |
| 634 } | |
| 635 | |
| 636 /* | |
| 637 ** Convert text to a 64-bit integer | |
| 638 */ | |
| 639 static sqlite3_int64 atoi64(const char *z){ | |
| 640 sqlite3_int64 v = 0; | |
| 641 while( z[0]>='0' && z[0]<='9' ){ | |
| 642 v = v*10 + z[0] - '0'; | |
| 643 z++; | |
| 644 } | |
| 645 return v; | |
| 646 } | |
| 647 | |
| 648 /* | |
| 649 ** Return a prepared statement which, when stepped, will return in its | |
| 650 ** first column the blob associated with segment zId. If zId begins with | |
| 651 ** 'r' then it is a rowid of a %_segdir entry. Otherwise it is a | |
| 652 ** %_segment entry. | |
| 653 */ | |
| 654 static sqlite3_stmt *prepareToGetSegment( | |
| 655 sqlite3 *db, /* The database */ | |
| 656 const char *zTab, /* The FTS3/4 table name */ | |
| 657 const char *zId /* ID of the segment to open */ | |
| 658 ){ | |
| 659 sqlite3_stmt *pStmt; | |
| 660 if( zId[0]=='r' ){ | |
| 661 pStmt = prepare(db, "SELECT root FROM '%q_segdir' WHERE rowid=%lld", | |
| 662 zTab, atoi64(zId+1)); | |
| 663 }else{ | |
| 664 pStmt = prepare(db, "SELECT block FROM '%q_segments' WHERE blockid=%lld", | |
| 665 zTab, atoi64(zId)); | |
| 666 } | |
| 667 return pStmt; | |
| 668 } | |
| 669 | |
| 670 /* | |
| 671 ** Print the content of a segment or of the root of a segdir. The segment | |
| 672 ** or root is identified by azExtra[0]. If the first character of azExtra[0] | |
| 673 ** is 'r' then the remainder is the integer rowid of the %_segdir entry. | |
| 674 ** If the first character of azExtra[0] is not 'r' then, then all of | |
| 675 ** azExtra[0] is an integer which is the block number. | |
| 676 ** | |
| 677 ** If the --raw option is present in azExtra, then a hex dump is provided. | |
| 678 ** Otherwise a decoding is shown. | |
| 679 */ | |
| 680 static void showSegment(sqlite3 *db, const char *zTab){ | |
| 681 const unsigned char *aData; | |
| 682 int nData; | |
| 683 sqlite3_stmt *pStmt; | |
| 684 | |
| 685 pStmt = prepareToGetSegment(db, zTab, azExtra[0]); | |
| 686 if( sqlite3_step(pStmt)!=SQLITE_ROW ){ | |
| 687 sqlite3_finalize(pStmt); | |
| 688 return; | |
| 689 } | |
| 690 nData = sqlite3_column_bytes(pStmt, 0); | |
| 691 aData = sqlite3_column_blob(pStmt, 0); | |
| 692 printf("Segment %s of size %d bytes:\n", azExtra[0], nData); | |
| 693 if( findOption("raw", 0, 0)!=0 ){ | |
| 694 printBlob(aData, nData); | |
| 695 }else{ | |
| 696 decodeSegment(aData, nData); | |
| 697 } | |
| 698 sqlite3_finalize(pStmt); | |
| 699 } | |
| 700 | |
| 701 /* | |
| 702 ** Decode a single doclist and display the results on stdout. | |
| 703 */ | |
| 704 static void decodeDoclist( | |
| 705 const unsigned char *aData, /* Content to print */ | |
| 706 int nData /* Number of bytes of content */ | |
| 707 ){ | |
| 708 sqlite3_int64 iPrevDocid = 0; | |
| 709 sqlite3_int64 iDocid; | |
| 710 sqlite3_int64 iPos; | |
| 711 sqlite3_int64 iPrevPos = 0; | |
| 712 sqlite3_int64 iCol; | |
| 713 int i = 0; | |
| 714 | |
| 715 while( i<nData ){ | |
| 716 i += getVarint(aData+i, &iDocid); | |
| 717 printf("docid %lld col0", iDocid+iPrevDocid); | |
| 718 iPrevDocid += iDocid; | |
| 719 iPrevPos = 0; | |
| 720 while( 1 ){ | |
| 721 i += getVarint(aData+i, &iPos); | |
| 722 if( iPos==1 ){ | |
| 723 i += getVarint(aData+i, &iCol); | |
| 724 printf(" col%lld", iCol); | |
| 725 iPrevPos = 0; | |
| 726 }else if( iPos==0 ){ | |
| 727 printf("\n"); | |
| 728 break; | |
| 729 }else{ | |
| 730 iPrevPos += iPos - 2; | |
| 731 printf(" %lld", iPrevPos); | |
| 732 } | |
| 733 } | |
| 734 } | |
| 735 } | |
| 736 | |
| 737 | |
| 738 /* | |
| 739 ** Print the content of a doclist. The segment or segdir-root is | |
| 740 ** identified by azExtra[0]. If the first character of azExtra[0] | |
| 741 ** is 'r' then the remainder is the integer rowid of the %_segdir entry. | |
| 742 ** If the first character of azExtra[0] is not 'r' then, then all of | |
| 743 ** azExtra[0] is an integer which is the block number. The offset | |
| 744 ** into the segment is identified by azExtra[1]. The size of the doclist | |
| 745 ** is azExtra[2]. | |
| 746 ** | |
| 747 ** If the --raw option is present in azExtra, then a hex dump is provided. | |
| 748 ** Otherwise a decoding is shown. | |
| 749 */ | |
| 750 static void showDoclist(sqlite3 *db, const char *zTab){ | |
| 751 const unsigned char *aData; | |
| 752 sqlite3_int64 offset; | |
| 753 int nData; | |
| 754 sqlite3_stmt *pStmt; | |
| 755 | |
| 756 offset = atoi64(azExtra[1]); | |
| 757 nData = atoi(azExtra[2]); | |
| 758 pStmt = prepareToGetSegment(db, zTab, azExtra[0]); | |
| 759 if( sqlite3_step(pStmt)!=SQLITE_ROW ){ | |
| 760 sqlite3_finalize(pStmt); | |
| 761 return; | |
| 762 } | |
| 763 aData = sqlite3_column_blob(pStmt, 0); | |
| 764 printf("Doclist at %s offset %lld of size %d bytes:\n", | |
| 765 azExtra[0], offset, nData); | |
| 766 if( findOption("raw", 0, 0)!=0 ){ | |
| 767 printBlob(aData+offset, nData); | |
| 768 }else{ | |
| 769 decodeDoclist(aData+offset, nData); | |
| 770 } | |
| 771 sqlite3_finalize(pStmt); | |
| 772 } | |
| 773 | |
| 774 /* | |
| 775 ** Show the top N largest segments | |
| 776 */ | |
| 777 static void listBigSegments(sqlite3 *db, const char *zTab){ | |
| 778 int nTop, i; | |
| 779 sqlite3_stmt *pStmt; | |
| 780 sqlite3_int64 sz; | |
| 781 sqlite3_int64 id; | |
| 782 | |
| 783 nTop = atoi(findOption("top", 1, "25")); | |
| 784 printf("The %d largest segments:\n", nTop); | |
| 785 pStmt = prepare(db, | |
| 786 "SELECT blockid, length(block) AS len FROM '%q_segments'" | |
| 787 " ORDER BY 2 DESC, 1" | |
| 788 " LIMIT %d", zTab, nTop); | |
| 789 i = 0; | |
| 790 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
| 791 i++; | |
| 792 id = sqlite3_column_int64(pStmt, 0); | |
| 793 sz = sqlite3_column_int64(pStmt, 1); | |
| 794 printf(" %2d. %9lld size %lld\n", i, id, sz); | |
| 795 } | |
| 796 sqlite3_finalize(pStmt); | |
| 797 } | |
| 798 | |
| 799 | |
| 800 | |
| 801 static void usage(const char *argv0){ | |
| 802 fprintf(stderr, "Usage: %s DATABASE\n" | |
| 803 " or: %s DATABASE FTS3TABLE ARGS...\n", argv0, argv0); | |
| 804 fprintf(stderr, | |
| 805 "ARGS:\n" | |
| 806 " big-segments [--top N] show the largest segments\n" | |
| 807 " doclist BLOCKID OFFSET SIZE [--raw] Decode a doclist\n" | |
| 808 " schema FTS table schema\n" | |
| 809 " segdir directory of segments\n" | |
| 810 " segment BLOCKID [--raw] content of a segment\n" | |
| 811 " segment-stats info on segment sizes\n" | |
| 812 " stat the %%_stat table\n" | |
| 813 " vocabulary [--top N] document vocabulary\n" | |
| 814 ); | |
| 815 exit(1); | |
| 816 } | |
| 817 | |
| 818 int main(int argc, char **argv){ | |
| 819 sqlite3 *db; | |
| 820 int rc; | |
| 821 const char *zTab; | |
| 822 const char *zCmd; | |
| 823 | |
| 824 if( argc<2 ) usage(argv[0]); | |
| 825 rc = sqlite3_open(argv[1], &db); | |
| 826 if( rc ){ | |
| 827 fprintf(stderr, "Cannot open %s\n", argv[1]); | |
| 828 exit(1); | |
| 829 } | |
| 830 if( argc==2 ){ | |
| 831 sqlite3_stmt *pStmt; | |
| 832 int cnt = 0; | |
| 833 pStmt = prepare(db, "SELECT b.sql" | |
| 834 " FROM sqlite_master a, sqlite_master b" | |
| 835 " WHERE a.name GLOB '*_segdir'" | |
| 836 " AND b.name=substr(a.name,1,length(a.name)-7)" | |
| 837 " ORDER BY 1"); | |
| 838 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
| 839 cnt++; | |
| 840 printf("%s;\n", sqlite3_column_text(pStmt, 0)); | |
| 841 } | |
| 842 sqlite3_finalize(pStmt); | |
| 843 if( cnt==0 ){ | |
| 844 printf("/* No FTS3/4 tables found in database %s */\n", argv[1]); | |
| 845 } | |
| 846 return 0; | |
| 847 } | |
| 848 if( argc<4 ) usage(argv[0]); | |
| 849 zTab = argv[2]; | |
| 850 zCmd = argv[3]; | |
| 851 nExtra = argc-4; | |
| 852 azExtra = argv+4; | |
| 853 if( strcmp(zCmd,"big-segments")==0 ){ | |
| 854 listBigSegments(db, zTab); | |
| 855 }else if( strcmp(zCmd,"doclist")==0 ){ | |
| 856 if( argc<7 ) usage(argv[0]); | |
| 857 showDoclist(db, zTab); | |
| 858 }else if( strcmp(zCmd,"schema")==0 ){ | |
| 859 showSchema(db, zTab); | |
| 860 }else if( strcmp(zCmd,"segdir")==0 ){ | |
| 861 showSegdirMap(db, zTab); | |
| 862 }else if( strcmp(zCmd,"segment")==0 ){ | |
| 863 if( argc<5 ) usage(argv[0]); | |
| 864 showSegment(db, zTab); | |
| 865 }else if( strcmp(zCmd,"segment-stats")==0 ){ | |
| 866 showSegmentStats(db, zTab); | |
| 867 }else if( strcmp(zCmd,"stat")==0 ){ | |
| 868 showStat(db, zTab); | |
| 869 }else if( strcmp(zCmd,"vocabulary")==0 ){ | |
| 870 showVocabulary(db, zTab); | |
| 871 }else{ | |
| 872 usage(argv[0]); | |
| 873 } | |
| 874 return 0; | |
| 875 } | |
| OLD | NEW |