third_party/sqlite/recover.patch - Issue 885473002: [sql] Rewrite sqlite patching "system".

Side by Side Diff: third_party/sqlite/recover.patch

Issue 885473002: [sql] Rewrite sqlite patching "system". (Closed) Base URL: http://chromium.googlesource.com/chromium/src.git@master

Patch Set: Fixed typo in readme. Created 5 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « third_party/sqlite/patches/0023-fts2-Fix-numerous-out-of-bounds-bugs-reading-corrupt.patch ('k') | third_party/sqlite/separate_cache_pool.patch » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
	(Empty)
1 Add new virtual table 'recover' to src/ and the amalgamation.

2

3 Since recover.c is in somewhat active development, it is possible that

4 the patch below will not reliably re-create the file.

5

6 shess@chromium.org

7

8 Generated with:

9 git diff --cached --relative=third_party/sqlite/src --src-prefix='' --dst-prefix ='' > third_party/sqlite/recover.patch

10 [--cached because otherwise the diff adding recover.c wasn't generated.]

11

12 diff --git Makefile.in Makefile.in

13 index f3239f3..216742c 100644

14 --- Makefile.in

15 +++ Makefile.in

16 @@ -251,6 +251,7 @@ SRC = \

17 $(TOP)/src/prepare.c \

18 $(TOP)/src/printf.c \

19 $(TOP)/src/random.c \

20 + $(TOP)/src/recover.c \

21 $(TOP)/src/resolve.c \

22 $(TOP)/src/rowset.c \

23 $(TOP)/src/select.c \

24 diff --git src/sqlite.h.in src/sqlite.h.in

25 index 62b9326..fb76659 100644

26 --- src/sqlite.h.in

27 +++ src/sqlite.h.in

28 @@ -6403,6 +6403,17 @@ int sqlite3_wal_checkpoint_v2(

29 #define SQLITE_CHECKPOINT_RESTART 2

30

31

32 +/* Begin recover.patch for Chromium */

33 +/*

34 +** Call to initialize the recover virtual-table modules (see recover.c).

35 +**

36 +** This could be loaded by default in main.c, but that would make the

37 +** virtual table available to Web SQL. Breaking it out allows only

38 +** selected users to enable it (currently sql/recovery.cc).

39 +*/

40 +int recoverVtableInit(sqlite3 *db);

41 +/* End recover.patch for Chromium */

42 +

43 /*

44 ** Undo the hack that converts floating point types to integer for

45 ** builds on processors without floating point support.

46 diff --git tool/mksqlite3c.tcl tool/mksqlite3c.tcl

47 index fa99f2d..df2df07 100644

48 --- tool/mksqlite3c.tcl

49 +++ tool/mksqlite3c.tcl

50 @@ -293,6 +293,8 @@ foreach file {

51 main.c

52 notify.c

53

54 + recover.c

55 +

56 fts3.c

57 fts3_aux.c

58 fts3_expr.c

59 diff --git src/recover.c src/recover.c

60 new file mode 100644

61 index 0000000..6430c8b

62 --- /dev/null

63 +++ src/recover.c

64 @@ -0,0 +1,2130 @@

65 +/*

66 +** 2012 Jan 11

67 +**

68 +** The author disclaims copyright to this source code. In place of

69 +** a legal notice, here is a blessing:

70 +**

71 +** May you do good and not evil.

72 +** May you find forgiveness for yourself and forgive others.

73 +** May you share freely, never taking more than you give.

74 +*/

75 +/* TODO(shess): THIS MODULE IS STILL EXPERIMENTAL. DO NOT USE IT. */

76 +/* Implements a virtual table "recover" which can be used to recover

77 + * data from a corrupt table. The table is walked manually, with

78 + * corrupt items skipped. Additionally, any errors while reading will

79 + * be skipped.

80 + *

81 + * Given a table with this definition:

82 + *

83 + * CREATE TABLE Stuff (

84 + * name TEXT PRIMARY KEY,

85 + * value TEXT NOT NULL

86 + * );

87 + *

88 + * to recover the data from teh table, you could do something like:

89 + *

90 + * -- Attach another database, the original is not trustworthy.

91 + * ATTACH DATABASE '/tmp/db.db' AS rdb;

92 + * -- Create a new version of the table.

93 + * CREATE TABLE rdb.Stuff (

94 + * name TEXT PRIMARY KEY,

95 + * value TEXT NOT NULL

96 + * );

97 + * -- This will read the original table's data.

98 + * CREATE VIRTUAL TABLE temp.recover_Stuff using recover(

99 + * main.Stuff,

100 + * name TEXT STRICT NOT NULL, -- only real TEXT data allowed

101 + * value TEXT STRICT NOT NULL

102 + * );

103 + * -- Corruption means the UNIQUE constraint may no longer hold for

104 + * -- Stuff, so either OR REPLACE or OR IGNORE must be used.

105 + * INSERT OR REPLACE INTO rdb.Stuff (rowid, name, value )

106 + * SELECT rowid, name, value FROM temp.recover_Stuff;

107 + * DROP TABLE temp.recover_Stuff;

108 + * DETACH DATABASE rdb;

109 + * -- Move db.db to replace original db in filesystem.

110 + *

111 + *

112 + * Usage

113 + *

114 + * Given the goal of dealing with corruption, it would not be safe to

115 + * create a recovery table in the database being recovered. So

116 + * recovery tables must be created in the temp database. They are not

117 + * appropriate to persist, in any case. [As a bonus, sqlite_master

118 + * tables can be recovered. Perhaps more cute than useful, though.]

119 + *

120 + * The parameters are a specifier for the table to read, and a column

121 + * definition for each bit of data stored in that table. The named

122 + * table must be convertable to a root page number by reading the

123 + * sqlite_master table. Bare table names are assumed to be in

124 + * database 0 ("main"), other databases can be specified in db.table

125 + * fashion.

126 + *

127 + * Column definitions are similar to BUT NOT THE SAME AS those

128 + * provided to CREATE statements:

129 + * column-def: column-name [type-name [STRICT] [NOT NULL]]

130 + * type-name: (ANY\|ROWID\|INTEGER\|FLOAT\|NUMERIC\|TEXT\|BLOB)

131 + *

132 + * Only those exact type names are accepted, there is no type

133 + * intuition. The only constraints accepted are STRICT (see below)

134 + * and NOT NULL. Anything unexpected will cause the create to fail.

135 + *

136 + * ANY is a convenience to indicate that manifest typing is desired.

137 + * It is equivalent to not specifying a type at all. The results for

138 + * such columns will have the type of the data's storage. The exposed

139 + * schema will contain no type for that column.

140 + *

141 + * ROWID is used for columns representing aliases to the rowid

142 + * (INTEGER PRIMARY KEY, with or without AUTOINCREMENT), to make the

143 + * concept explicit. Such columns are actually stored as NULL, so

144 + * they cannot be simply ignored. The exposed schema will be INTEGER

145 + * for that column.

146 + *

147 + * NOT NULL causes rows with a NULL in that column to be skipped. It

148 + * also adds NOT NULL to the column in the exposed schema. If the

149 + * table has ever had columns added using ALTER TABLE, then those

150 + * columns implicitly contain NULL for rows which have not been

151 + * updated. [Workaround using COALESCE() in your SELECT statement.]

152 + *

153 + * The created table is read-only, with no indices. Any SELECT will

154 + * be a full-table scan, returning each valid row read from the

155 + * storage of the backing table. The rowid will be the rowid of the

156 + * row from the backing table. "Valid" means:

157 + * - The cell metadata for the row is well-formed. Mainly this means that

158 + * the cell header info describes a payload of the size indicated by

159 + * the cell's payload size.

160 + * - The cell does not run off the page.

161 + * - The cell does not overlap any other cell on the page.

162 + * - The cell contains doesn't contain too many columns.

163 + * - The types of the serialized data match the indicated types (see below).

164 + *

165 + *

166 + * Type affinity versus type storage.

167 + *

168 + * http://www.sqlite.org/datatype3.html describes SQLite's type

169 + * affinity system. The system provides for automated coercion of

170 + * types in certain cases, transparently enough that many developers

171 + * do not realize that it is happening. Importantly, it implies that

172 + * the raw data stored in the database may not have the obvious type.

173 + *

174 + * Differences between the stored data types and the expected data

175 + * types may be a signal of corruption. This module makes some

176 + * allowances for automatic coercion. It is important to be concious

177 + * of the difference between the schema exposed by the module, and the

178 + * data types read from storage. The following table describes how

179 + * the module interprets things:

180 + *

181 + * type schema data STRICT

182 + * ---- ------ ---- ------

183 + * ANY <none> any any

184 + * ROWID INTEGER n/a n/a

185 + * INTEGER INTEGER integer integer

186 + * FLOAT FLOAT integer or float float

187 + * NUMERIC NUMERIC integer, float, or text integer or float

188 + * TEXT TEXT text or blob text

189 + * BLOB BLOB blob blob

190 + *

191 + * type is the type provided to the recover module, schema is the

192 + * schema exposed by the module, data is the acceptable types of data

193 + * decoded from storage, and STRICT is a modification of that.

194 + *

195 + * A very loose recovery system might use ANY for all columns, then

196 + * use the appropriate sqlite3_column_*() calls to coerce to expected

197 + * types. This doesn't provide much protection if a page from a

198 + * different table with the same column count is linked into an

199 + * inappropriate btree.

200 + *

201 + * A very tight recovery system might use STRICT to enforce typing on

202 + * all columns, preferring to skip rows which are valid at the storage

203 + * level but don't contain the right types. Note that FLOAT STRICT is

204 + * almost certainly not appropriate, since integral values are

205 + * transparently stored as integers, when that is more efficient.

206 + *

207 + * Another option is to use ANY for all columns and inspect each

208 + * result manually (using sqlite3_column_*). This should only be

209 + * necessary in cases where developers have used manifest typing (test

210 + * to make sure before you decide that you aren't using manifest

211 + * typing!).

212 + *

213 + *

214 + * Caveats

215 + *

216 + * Leaf pages not referenced by interior nodes will not be found.

217 + *

218 + * Leaf pages referenced from interior nodes of other tables will not

219 + * be resolved.

220 + *

221 + * Rows referencing invalid overflow pages will be skipped.

222 + *

223 + * SQlite rows have a header which describes how to interpret the rest

224 + * of the payload. The header can be valid in cases where the rest of

225 + * the record is actually corrupt (in the sense that the data is not

226 + * the intended data). This can especially happen WRT overflow pages,

227 + * as lack of atomic updates between pages is the primary form of

228 + * corruption I have seen in the wild.

229 + */

230 +/* The implementation is via a series of cursors. The cursor

231 + * implementations follow the pattern:

232 + *

233 + * // Creates the cursor using various initialization info.

234 + * int cursorCreate(...);

235 + *

236 + * // Returns 1 if there is no more data, 0 otherwise.

237 + * int cursorEOF(Cursor *pCursor);

238 + *

239 + * // Various accessors can be used if not at EOF.

240 + *

241 + * // Move to the next item.

242 + * int cursorNext(Cursor *pCursor);

243 + *

244 + * // Destroy the memory associated with the cursor.

245 + * void cursorDestroy(Cursor *pCursor);

246 + *

247 + * References in the following are to sections at

248 + * http://www.sqlite.org/fileformat2.html .

249 + *

250 + * RecoverLeafCursor iterates the records in a leaf table node

251 + * described in section 1.5 "B-tree Pages". When the node is

252 + * exhausted, an interior cursor is used to get the next leaf node,

253 + * and iteration continues there.

254 + *

255 + * RecoverInteriorCursor iterates the child pages in an interior table

256 + * node described in section 1.5 "B-tree Pages". When the node is

257 + * exhausted, a parent interior cursor is used to get the next

258 + * interior node at the same level, and iteration continues there.

259 + *

260 + * Together these record the path from the leaf level to the root of

261 + * the tree. Iteration happens from the leaves rather than the root

262 + * both for efficiency and putting the special case at the front of

263 + * the list is easier to implement.

264 + *

265 + * RecoverCursor uses a RecoverLeafCursor to iterate the rows of a

266 + * table, returning results via the SQLite virtual table interface.

267 + */

268 +/* TODO(shess): It might be useful to allow DEFAULT in types to

269 + * specify what to do for NULL when an ALTER TABLE case comes up.

270 + * Unfortunately, simply adding it to the exposed schema and using

271 + * sqlite3_result_null() does not cause the default to be generate.

272 + * Handling it ourselves seems hard, unfortunately.

273 + */

274 +

275 +#include <assert.h>

276 +#include <ctype.h>

277 +#include <stdio.h>

278 +#include <string.h>

279 +

280 +/* Internal SQLite things that are used:

281 + * u32, u64, i64 types.

282 + * Btree, Pager, and DbPage structs.

283 + * DbPage.pData, .pPager, and .pgno

284 + * sqlite3 struct.

285 + * sqlite3BtreePager() and sqlite3BtreeGetPageSize()

286 + * sqlite3PagerAcquire() and sqlite3PagerUnref()

287 + * getVarint().

288 + */

289 +#include "sqliteInt.h"

290 +

291 +/* For debugging. */

292 +#if 0

293 +#define FNENTRY() fprintf(stderr, "In %s\n", __FUNCTION__)

294 +#else

295 +#define FNENTRY()

296 +#endif

297 +

298 +/* Generic constants and helper functions. */

299 +

300 +static const unsigned char kTableLeafPage = 0x0D;

301 +static const unsigned char kTableInteriorPage = 0x05;

302 +

303 +/* From section 1.5. */

304 +static const unsigned kiPageTypeOffset = 0;

305 +static const unsigned kiPageFreeBlockOffset = 1;

306 +static const unsigned kiPageCellCountOffset = 3;

307 +static const unsigned kiPageCellContentOffset = 5;

308 +static const unsigned kiPageFragmentedBytesOffset = 7;

309 +static const unsigned knPageLeafHeaderBytes = 8;

310 +/* Interior pages contain an additional field. */

311 +static const unsigned kiPageRightChildOffset = 8;

312 +static const unsigned kiPageInteriorHeaderBytes = 12;

313 +

314 +/* Accepted types are specified by a mask. */

315 +#define MASK_ROWID (1<<0)

316 +#define MASK_INTEGER (1<<1)

317 +#define MASK_FLOAT (1<<2)

318 +#define MASK_TEXT (1<<3)

319 +#define MASK_BLOB (1<<4)

320 +#define MASK_NULL (1<<5)

321 +

322 +/* Helpers to decode fixed-size fields. */

323 +static u32 decodeUnsigned16(const unsigned char *pData){

324 + return (pData[0]<<8) + pData[1];

325 +}

326 +static u32 decodeUnsigned32(const unsigned char *pData){

327 + return (decodeUnsigned16(pData)<<16) + decodeUnsigned16(pData+2);

328 +}

329 +static i64 decodeSigned(const unsigned char *pData, unsigned nBytes){

330 + i64 r = (char)(*pData);

331 + while( --nBytes ){

332 + r <<= 8;

333 + r += *(++pData);

334 + }

335 + return r;

336 +}

337 +/* Derived from vdbeaux.c, sqlite3VdbeSerialGet(), case 7. */

338 +/* TODO(shess): Determine if swapMixedEndianFloat() applies. */

339 +static double decodeFloat64(const unsigned char *pData){

340 +#if !defined(NDEBUG)

341 + static const u64 t1 = ((u64)0x3ff00000)<<32;

342 + static const double r1 = 1.0;

343 + u64 t2 = t1;

344 + assert( sizeof(r1)==sizeof(t2) && memcmp(&r1, &t2, sizeof(r1))==0 );

345 +#endif

346 + i64 x = decodeSigned(pData, 8);

347 + double d;

348 + memcpy(&d, &x, sizeof(x));

349 + return d;

350 +}

351 +

352 +/* Return true if a varint can safely be read from pData/nData. */

353 +/* TODO(shess): DbPage points into the middle of a buffer which

354 + * contains the page data before DbPage. So code should always be

355 + * able to read a small number of varints safely. Consider whether to

356 + * trust that or not.

357 + */

358 +static int checkVarint(const unsigned char *pData, unsigned nData){

359 + unsigned i;

360 +

361 + /* In the worst case the decoder takes all 8 bits of the 9th byte. */

362 + if( nData>=9 ){

363 + return 1;

364 + }

365 +

366 + /* Look for a high-bit-clear byte in what's left. */

367 + for( i=0; i<nData; ++i ){

368 + if( !(pData[i]&0x80) ){

369 + return 1;

370 + }

371 + }

372 +

373 + /* Cannot decode in the space given. */

374 + return 0;

375 +}

376 +

377 +/* Return 1 if n varints can be read from pData/nData. */

378 +static int checkVarints(const unsigned char *pData, unsigned nData,

379 + unsigned n){

380 + unsigned nCur = 0; /* Byte offset within current varint. */

381 + unsigned nFound = 0; /* Number of varints found. */

382 + unsigned i;

383 +

384 + /* In the worst case the decoder takes all 8 bits of the 9th byte. */

385 + if( nData>=9*n ){

386 + return 1;

387 + }

388 +

389 + for( i=0; nFound<n && i<nData; ++i ){

390 + nCur++;

391 + if( nCur==9 \|\| !(pData[i]&0x80) ){

392 + nFound++;

393 + nCur = 0;

394 + }

395 + }

396 +

397 + return nFound==n;

398 +}

399 +

400 +/* ctype and str[n]casecmp() can be affected by locale (eg, tr_TR).

401 + * These versions consider only the ASCII space.

402 + */

403 +/* TODO(shess): It may be reasonable to just remove the need for these

404 + * entirely. The module could require "TEXT STRICT NOT NULL", not

405 + * "Text Strict Not Null" or whatever the developer felt like typing

406 + * that day. Handling corrupt data is a PERFECT place to be pedantic.

407 + */

408 +static int ascii_isspace(char c){

409 + /* From fts3_expr.c */

410 + return c==' ' \|\| c=='\t' \|\| c=='\n' \|\| c=='\r' \|\| c=='\v' \|\| c=='\f';

411 +}

412 +static int ascii_isalnum(int x){

413 + /* From fts3_tokenizer1.c */

414 + return (x>='0' && x<='9') \|\| (x>='A' && x<='Z') \|\| (x>='a' && x<='z');

415 +}

416 +static int ascii_tolower(int x){

417 + /* From fts3_tokenizer1.c */

418 + return (x>='A' && x<='Z') ? x-'A'+'a' : x;

419 +}

420 +/* TODO(shess): Consider sqlite3_strnicmp() */

421 +static int ascii_strncasecmp(const char s1, const char s2, size_t n){

422 + const unsigned char us1 = (const unsigned char )s1;

423 + const unsigned char us2 = (const unsigned char )s2;

424 + while( us1 && us2 && n && ascii_tolower(us1)==ascii_tolower(us2) ){

425 + us1++, us2++, n--;

426 + }

427 + return n ? ascii_tolower(us1)-ascii_tolower(us2) : 0;

428 +}

429 +static int ascii_strcasecmp(const char s1, const char s2){

430 + /* If s2 is equal through strlen(s1), will exit while() due to s1's

431 + * trailing NUL, and return NUL-s2[strlen(s1)].

432 + */

433 + return ascii_strncasecmp(s1, s2, strlen(s1)+1);

434 +}

435 +

436 +/* For some reason I kept making mistakes with offset calculations. */

437 +static const unsigned char PageData(DbPage pPage, unsigned iOffset){

438 + assert( iOffset<=pPage->nPageSize );

439 + return (unsigned char *)pPage->pData + iOffset;

440 +}

441 +

442 +/* The first page in the file contains a file header in the first 100

443 + * bytes. The page's header information comes after that. Note that

444 + * the offsets in the page's header information are relative to the

445 + * beginning of the page, NOT the end of the page header.

446 + */

447 +static const unsigned char PageHeader(DbPage pPage){

448 + if( pPage->pgno==1 ){

449 + const unsigned nDatabaseHeader = 100;

450 + return PageData(pPage, nDatabaseHeader);

451 + }else{

452 + return PageData(pPage, 0);

453 + }

454 +}

455 +

456 +/* Helper to fetch the pager and page size for the named database. */

457 +static int GetPager(sqlite3 db, const char zName,

458 + Pager *pPager, unsigned pnPageSize){

459 + Btree *pBt = NULL;

460 + int i;

461 + for( i=0; i<db->nDb; ++i ){

462 + if( ascii_strcasecmp(db->aDb[i].zName, zName)==0 ){

463 + pBt = db->aDb[i].pBt;

464 + break;

465 + }

466 + }

467 + if( !pBt ){

468 + return SQLITE_ERROR;

469 + }

470 +

471 + *pPager = sqlite3BtreePager(pBt);

472 + *pnPageSize = sqlite3BtreeGetPageSize(pBt) - sqlite3BtreeGetReserve(pBt);

473 + return SQLITE_OK;

474 +}

475 +

476 +/* iSerialType is a type read from a record header. See "2.1 Record Format".

477 + */

478 +

479 +/* Storage size of iSerialType in bytes. My interpretation of SQLite

480 + * documentation is that text and blob fields can have 32-bit length.

481 + * Values past 2^31-12 will need more than 32 bits to encode, which is

482 + * why iSerialType is u64.

483 + */

484 +static u32 SerialTypeLength(u64 iSerialType){

485 + switch( iSerialType ){

486 + case 0 : return 0; /* NULL */

487 + case 1 : return 1; /* Various integers. */

488 + case 2 : return 2;

489 + case 3 : return 3;

490 + case 4 : return 4;

491 + case 5 : return 6;

492 + case 6 : return 8;

493 + case 7 : return 8; /* 64-bit float. */

494 + case 8 : return 0; /* Constant 0. */

495 + case 9 : return 0; /* Constant 1. */

496 + case 10 : case 11 : assert( !"RESERVED TYPE"); return 0;

497 + }

498 + return (u32)((iSerialType>>1) - 6);

499 +}

500 +

501 +/* True if iSerialType refers to a blob. */

502 +static int SerialTypeIsBlob(u64 iSerialType){

503 + assert( iSerialType>=12 );

504 + return (iSerialType%2)==0;

505 +}

506 +

507 +/* Returns true if the serialized type represented by iSerialType is

508 + * compatible with the given type mask.

509 + */

510 +static int SerialTypeIsCompatible(u64 iSerialType, unsigned char mask){

511 + switch( iSerialType ){

512 + case 0 : return (mask&MASK_NULL)!=0;

513 + case 1 : return (mask&MASK_INTEGER)!=0;

514 + case 2 : return (mask&MASK_INTEGER)!=0;

515 + case 3 : return (mask&MASK_INTEGER)!=0;

516 + case 4 : return (mask&MASK_INTEGER)!=0;

517 + case 5 : return (mask&MASK_INTEGER)!=0;

518 + case 6 : return (mask&MASK_INTEGER)!=0;

519 + case 7 : return (mask&MASK_FLOAT)!=0;

520 + case 8 : return (mask&MASK_INTEGER)!=0;

521 + case 9 : return (mask&MASK_INTEGER)!=0;

522 + case 10 : assert( !"RESERVED TYPE"); return 0;

523 + case 11 : assert( !"RESERVED TYPE"); return 0;

524 + }

525 + return (mask&(SerialTypeIsBlob(iSerialType) ? MASK_BLOB : MASK_TEXT));

526 +}

527 +

528 +/* Versions of strdup() with return values appropriate for

529 + * sqlite3_free(). malloc.c has sqlite3DbStrDup()/NDup(), but those

530 + * need sqlite3DbFree(), which seems intrusive.

531 + */

532 +static char sqlite3_strndup(const char z, unsigned n){

533 + char *zNew;

534 +

535 + if( z==NULL ){

536 + return NULL;

537 + }

538 +

539 + zNew = sqlite3_malloc(n+1);

540 + if( zNew!=NULL ){

541 + memcpy(zNew, z, n);

542 + zNew[n] = '\0';

543 + }

544 + return zNew;

545 +}

546 +static char sqlite3_strdup(const char z){

547 + if( z==NULL ){

548 + return NULL;

549 + }

550 + return sqlite3_strndup(z, strlen(z));

551 +}

552 +

553 +/* Fetch the page number of zTable in zDb from sqlite_master in zDb,

554 + * and put it in *piRootPage.

555 + */

556 +static int getRootPage(sqlite3 db, const char zDb, const char *zTable,

557 + u32 *piRootPage){

558 + char zSql; / SQL selecting root page of named element. */

559 + sqlite3_stmt *pStmt;

560 + int rc;

561 +

562 + if( strcmp(zTable, "sqlite_master")==0 ){

563 + *piRootPage = 1;

564 + return SQLITE_OK;

565 + }

566 +

567 + zSql = sqlite3_mprintf("SELECT rootpage FROM %s.sqlite_master "

568 + "WHERE type = 'table' AND tbl_name = %Q",

569 + zDb, zTable);

570 + if( !zSql ){

571 + return SQLITE_NOMEM;

572 + }

573 +

574 + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);

575 + sqlite3_free(zSql);

576 + if( rc!=SQLITE_OK ){

577 + return rc;

578 + }

579 +

580 + /* Require a result. */

581 + rc = sqlite3_step(pStmt);

582 + if( rc==SQLITE_DONE ){

583 + rc = SQLITE_CORRUPT;

584 + }else if( rc==SQLITE_ROW ){

585 + *piRootPage = sqlite3_column_int(pStmt, 0);

586 +

587 + /* Require only one result. */

588 + rc = sqlite3_step(pStmt);

589 + if( rc==SQLITE_DONE ){

590 + rc = SQLITE_OK;

591 + }else if( rc==SQLITE_ROW ){

592 + rc = SQLITE_CORRUPT;

593 + }

594 + }

595 + sqlite3_finalize(pStmt);

596 + return rc;

597 +}

598 +

599 +static int getEncoding(sqlite3 db, const char zDb, int* piEncoding){

600 + sqlite3_stmt *pStmt;

601 + int rc;

602 + char *zSql = sqlite3_mprintf("PRAGMA %s.encoding", zDb);

603 + if( !zSql ){

604 + return SQLITE_NOMEM;

605 + }

606 +

607 + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);

608 + sqlite3_free(zSql);

609 + if( rc!=SQLITE_OK ){

610 + return rc;

611 + }

612 +

613 + /* Require a result. */

614 + rc = sqlite3_step(pStmt);

615 + if( rc==SQLITE_DONE ){

616 + /* This case should not be possible. */

617 + rc = SQLITE_CORRUPT;

618 + }else if( rc==SQLITE_ROW ){

619 + if( sqlite3_column_type(pStmt, 0)==SQLITE_TEXT ){

620 + const char* z = (const char *)sqlite3_column_text(pStmt, 0);

621 + /* These strings match the literals in pragma.c. */

622 + if( !strcmp(z, "UTF-16le") ){

623 + *piEncoding = SQLITE_UTF16LE;

624 + }else if( !strcmp(z, "UTF-16be") ){

625 + *piEncoding = SQLITE_UTF16BE;

626 + }else if( !strcmp(z, "UTF-8") ){

627 + *piEncoding = SQLITE_UTF8;

628 + }else{

629 + /* This case should not be possible. */

630 + *piEncoding = SQLITE_UTF8;

631 + }

632 + }else{

633 + /* This case should not be possible. */

634 + *piEncoding = SQLITE_UTF8;

635 + }

636 +

637 + /* Require only one result. */

638 + rc = sqlite3_step(pStmt);

639 + if( rc==SQLITE_DONE ){

640 + rc = SQLITE_OK;

641 + }else if( rc==SQLITE_ROW ){

642 + /* This case should not be possible. */

643 + rc = SQLITE_CORRUPT;

644 + }

645 + }

646 + sqlite3_finalize(pStmt);

647 + return rc;

648 +}

649 +

650 +/* Cursor for iterating interior nodes. Interior page cells contain a

651 + * child page number and a rowid. The child page contains items left

652 + * of the rowid (less than). The rightmost page of the subtree is

653 + * stored in the page header.

654 + *

655 + * interiorCursorDestroy - release all resources associated with the

656 + * cursor and any parent cursors.

657 + * interiorCursorCreate - create a cursor with the given parent and page.

658 + * interiorCursorEOF - returns true if neither the cursor nor the

659 + * parent cursors can return any more data.

660 + * interiorCursorNextPage - fetch the next child page from the cursor.

661 + *

662 + * Logically, interiorCursorNextPage() returns the next child page

663 + * number from the page the cursor is currently reading, calling the

664 + * parent cursor as necessary to get new pages to read, until done.

665 + * SQLITE_ROW if a page is returned, SQLITE_DONE if out of pages,

666 + * error otherwise. Unfortunately, if the table is corrupted

667 + * unexpected pages can be returned. If any unexpected page is found,

668 + * leaf or otherwise, it is returned to the caller for processing,

669 + * with the interior cursor left empty. The next call to

670 + * interiorCursorNextPage() will recurse to the parent cursor until an

671 + * interior page to iterate is returned.

672 + *

673 + * Note that while interiorCursorNextPage() will refuse to follow

674 + * loops, it does not keep track of pages returned for purposes of

675 + * preventing duplication.

676 + *

677 + * Note that interiorCursorEOF() could return false (not at EOF), and

678 + * interiorCursorNextPage() could still return SQLITE_DONE. This

679 + * could happen if there are more cells to iterate in an interior

680 + * page, but those cells refer to invalid pages.

681 + */

682 +typedef struct RecoverInteriorCursor RecoverInteriorCursor;

683 +struct RecoverInteriorCursor {

684 + RecoverInteriorCursor pParent; / Parent node to this node. */

685 + DbPage pPage; / Reference to leaf page. */

686 + unsigned nPageSize; /* Size of page. */

687 + unsigned nChildren; /* Number of children on the page. */

688 + unsigned iChild; /* Index of next child to return. */

689 +};

690 +

691 +static void interiorCursorDestroy(RecoverInteriorCursor *pCursor){

692 + /* Destroy all the cursors to the root. */

693 + while( pCursor ){

694 + RecoverInteriorCursor *p = pCursor;

695 + pCursor = pCursor->pParent;

696 +

697 + if( p->pPage ){

698 + sqlite3PagerUnref(p->pPage);

699 + p->pPage = NULL;

700 + }

701 +

702 + memset(p, 0xA5, sizeof(*p));

703 + sqlite3_free(p);

704 + }

705 +}

706 +

707 +/* Internal helper. Reset storage in preparation for iterating pPage. */

708 +static void interiorCursorSetPage(RecoverInteriorCursor *pCursor,

709 + DbPage *pPage){

710 + assert( PageHeader(pPage)[kiPageTypeOffset]==kTableInteriorPage );

711 +

712 + if( pCursor->pPage ){

713 + sqlite3PagerUnref(pCursor->pPage);

714 + pCursor->pPage = NULL;

715 + }

716 + pCursor->pPage = pPage;

717 + pCursor->iChild = 0;

718 +

719 + /* A child for each cell, plus one in the header. */

720 + /* TODO(shess): Sanity-check the count? Page header plus per-cell

721 + * cost of 16-bit offset, 32-bit page number, and one varint

722 + * (minimum 1 byte).

723 + */

724 + pCursor->nChildren = decodeUnsigned16(PageHeader(pPage) +

725 + kiPageCellCountOffset) + 1;

726 +}

727 +

728 +static int interiorCursorCreate(RecoverInteriorCursor *pParent,

729 + DbPage *pPage, int nPageSize,

730 + RecoverInteriorCursor **ppCursor){

731 + RecoverInteriorCursor *pCursor =

732 + sqlite3_malloc(sizeof(RecoverInteriorCursor));

733 + if( !pCursor ){

734 + return SQLITE_NOMEM;

735 + }

736 +

737 + memset(pCursor, 0, sizeof(*pCursor));

738 + pCursor->pParent = pParent;

739 + pCursor->nPageSize = nPageSize;

740 + interiorCursorSetPage(pCursor, pPage);

741 + *ppCursor = pCursor;

742 + return SQLITE_OK;

743 +}

744 +

745 +/* Internal helper. Return the child page number at iChild. */

746 +static unsigned interiorCursorChildPage(RecoverInteriorCursor *pCursor){

747 + const unsigned char pPageHeader; / Header of the current page. */

748 + const unsigned char pCellOffsets; / Offset to page's cell offsets. */

749 + unsigned iCellOffset; /* Offset of target cell. */

750 +

751 + assert( pCursor->iChild<pCursor->nChildren );

752 +

753 + /* Rightmost child is in the header. */

754 + pPageHeader = PageHeader(pCursor->pPage);

755 + if( pCursor->iChild==pCursor->nChildren-1 ){

756 + return decodeUnsigned32(pPageHeader + kiPageRightChildOffset);

757 + }

758 +

759 + /* Each cell is a 4-byte integer page number and a varint rowid

760 + * which is greater than the rowid of items in that sub-tree (this

761 + * module ignores ordering). The offset is from the beginning of the

762 + * page, not from the page header.

763 + */

764 + pCellOffsets = pPageHeader + kiPageInteriorHeaderBytes;

765 + iCellOffset = decodeUnsigned16(pCellOffsets + pCursor->iChild*2);

766 + if( iCellOffset<=pCursor->nPageSize-4 ){

767 + return decodeUnsigned32(PageData(pCursor->pPage, iCellOffset));

768 + }

769 +

770 + /* TODO(shess): Check for cell overlaps? Cells require 4 bytes plus

771 + * a varint. Check could be identical to leaf check (or even a

772 + * shared helper testing for "Cells starting in this range"?).

773 + */

774 +

775 + /* If the offset is broken, return an invalid page number. */

776 + return 0;

777 +}

778 +

779 +static int interiorCursorEOF(RecoverInteriorCursor *pCursor){

780 + /* Find a parent with remaining children. EOF if none found. */

781 + while( pCursor && pCursor->iChild>=pCursor->nChildren ){

782 + pCursor = pCursor->pParent;

783 + }

784 + return pCursor==NULL;

785 +}

786 +

787 +/* Internal helper. Used to detect if iPage would cause a loop. */

788 +static int interiorCursorPageInUse(RecoverInteriorCursor *pCursor,

789 + unsigned iPage){

790 + /* Find any parent using the indicated page. */

791 + while( pCursor && pCursor->pPage->pgno!=iPage ){

792 + pCursor = pCursor->pParent;

793 + }

794 + return pCursor!=NULL;

795 +}

796 +

797 +/* Get the next page from the interior cursor at *ppCursor. Returns

798 + * SQLITE_ROW with the page in *ppPage, or SQLITE_DONE if out of

799 + * pages, or the error SQLite returned.

800 + *

801 + * If the tree is uneven, then when the cursor attempts to get a new

802 + * interior page from the parent cursor, it may get a non-interior

803 + * page. In that case, the new page is returned, and *ppCursor is

804 + * updated to point to the parent cursor (this cursor is freed).

805 + */

806 +/* TODO(shess): I've tried to avoid recursion in most of this code,

807 + * but this case is more challenging because the recursive call is in

808 + * the middle of operation. One option for converting it without

809 + * adding memory management would be to retain the head pointer and

810 + * use a helper to "back up" as needed. Another option would be to

811 + * reverse the list during traversal.

812 + */

813 +static int interiorCursorNextPage(RecoverInteriorCursor **ppCursor,

814 + DbPage **ppPage){

815 + RecoverInteriorCursor pCursor = ppCursor;

816 + while( 1 ){

817 + int rc;

818 + const unsigned char pPageHeader; / Header of found page. */

819 +

820 + /* Find a valid child page which isn't on the stack. */

821 + while( pCursor->iChild<pCursor->nChildren ){

822 + const unsigned iPage = interiorCursorChildPage(pCursor);

823 + pCursor->iChild++;

824 + if( interiorCursorPageInUse(pCursor, iPage) ){

825 + fprintf(stderr, "Loop detected at %d\n", iPage);

826 + }else{

827 + int rc = sqlite3PagerAcquire(pCursor->pPage->pPager, iPage, ppPage, 0);

828 + if( rc==SQLITE_OK ){

829 + return SQLITE_ROW;

830 + }

831 + }

832 + }

833 +

834 + /* This page has no more children. Get next page from parent. */

835 + if( !pCursor->pParent ){

836 + return SQLITE_DONE;

837 + }

838 + rc = interiorCursorNextPage(&pCursor->pParent, ppPage);

839 + if( rc!=SQLITE_ROW ){

840 + return rc;

841 + }

842 +

843 + /* If a non-interior page is received, that either means that the

844 + * tree is uneven, or that a child was re-used (say as an overflow

845 + * page). Remove this cursor and let the caller handle the page.

846 + */

847 + pPageHeader = PageHeader(*ppPage);

848 + if( pPageHeader[kiPageTypeOffset]!=kTableInteriorPage ){

849 + *ppCursor = pCursor->pParent;

850 + pCursor->pParent = NULL;

851 + interiorCursorDestroy(pCursor);

852 + return SQLITE_ROW;

853 + }

854 +

855 + /* Iterate the new page. */

856 + interiorCursorSetPage(pCursor, *ppPage);

857 + *ppPage = NULL;

858 + }

859 +

860 + assert(NULL); /* NOTREACHED() */

861 + return SQLITE_CORRUPT;

862 +}

863 +

864 +/* Large rows are spilled to overflow pages. The row's main page

865 + * stores the overflow page number after the local payload, with a

866 + * linked list forward from there as necessary. overflowMaybeCreate()

867 + * and overflowGetSegment() provide an abstraction for accessing such

868 + * data while centralizing the code.

869 + *

870 + * overflowDestroy - releases all resources associated with the structure.

871 + * overflowMaybeCreate - create the overflow structure if it is needed

872 + * to represent the given record. See function comment.

873 + * overflowGetSegment - fetch a segment from the record, accounting

874 + * for overflow pages. Segments which are not

875 + * entirely contained with a page are constructed

876 + * into a buffer which is returned. See function comment.

877 + */

878 +typedef struct RecoverOverflow RecoverOverflow;

879 +struct RecoverOverflow {

880 + RecoverOverflow *pNextOverflow;

881 + DbPage *pPage;

882 + unsigned nPageSize;

883 +};

884 +

885 +static void overflowDestroy(RecoverOverflow *pOverflow){

886 + while( pOverflow ){

887 + RecoverOverflow *p = pOverflow;

888 + pOverflow = p->pNextOverflow;

889 +

890 + if( p->pPage ){

891 + sqlite3PagerUnref(p->pPage);

892 + p->pPage = NULL;

893 + }

894 +

895 + memset(p, 0xA5, sizeof(*p));

896 + sqlite3_free(p);

897 + }

898 +}

899 +

900 +/* Internal helper. Used to detect if iPage would cause a loop. */

901 +static int overflowPageInUse(RecoverOverflow *pOverflow, unsigned iPage){

902 + while( pOverflow && pOverflow->pPage->pgno!=iPage ){

903 + pOverflow = pOverflow->pNextOverflow;

904 + }

905 + return pOverflow!=NULL;

906 +}

907 +

908 +/* Setup to access an nRecordBytes record beginning at iRecordOffset

909 + * in pPage. If nRecordBytes can be satisfied entirely from pPage,

910 + * then no overflow pages are needed an *pnLocalRecordBytes is set to

911 + * nRecordBytes. Otherwise, *ppOverflow is set to the head of a list

912 + * of overflow pages, and *pnLocalRecordBytes is set to the number of

913 + * bytes local to pPage.

914 + *

915 + * overflowGetSegment() will do the right thing regardless of whether

916 + * those values are set to be in-page or not.

917 + */

918 +static int overflowMaybeCreate(DbPage *pPage, unsigned nPageSize,

919 + unsigned iRecordOffset, unsigned nRecordBytes,

920 + unsigned *pnLocalRecordBytes,

921 + RecoverOverflow **ppOverflow){

922 + unsigned nLocalRecordBytes; /* Record bytes in the leaf page. */

923 + unsigned iNextPage; /* Next page number for record data. */

924 + unsigned nBytes; /* Maximum record bytes as of current page. */

925 + int rc;

926 + RecoverOverflow pFirstOverflow; / First in linked list of pages. */

927 + RecoverOverflow pLastOverflow; / End of linked list. */

928 +

929 + /* Calculations from the "Table B-Tree Leaf Cell" part of section

930 + * 1.5 of http://www.sqlite.org/fileformat2.html . maxLocal and

931 + * minLocal to match naming in btree.c.

932 + */

933 + const unsigned maxLocal = nPageSize - 35;

934 + const unsigned minLocal = ((nPageSize-12)32/255)-23; / m */

935 +

936 + /* Always fit anything smaller than maxLocal. */

937 + if( nRecordBytes<=maxLocal ){

938 + *pnLocalRecordBytes = nRecordBytes;

939 + *ppOverflow = NULL;

940 + return SQLITE_OK;

941 + }

942 +

943 + /* Calculate the remainder after accounting for minLocal on the leaf

944 + * page and what packs evenly into overflow pages. If the remainder

945 + * does not fit into maxLocal, then a partially-full overflow page

946 + * will be required in any case, so store as little as possible locally.

947 + */

948 + nLocalRecordBytes = minLocal+((nRecordBytes-minLocal)%(nPageSize-4));

949 + if( maxLocal<nLocalRecordBytes ){

950 + nLocalRecordBytes = minLocal;

951 + }

952 +

953 + /* Don't read off the end of the page. */

954 + if( iRecordOffset+nLocalRecordBytes+4>nPageSize ){

955 + return SQLITE_CORRUPT;

956 + }

957 +

958 + /* First overflow page number is after the local bytes. */

959 + iNextPage =

960 + decodeUnsigned32(PageData(pPage, iRecordOffset + nLocalRecordBytes));

961 + nBytes = nLocalRecordBytes;

962 +

963 + /* While there are more pages to read, and more bytes are needed,

964 + * get another page.

965 + */

966 + pFirstOverflow = pLastOverflow = NULL;

967 + rc = SQLITE_OK;

968 + while( iNextPage && nBytes<nRecordBytes ){

969 + RecoverOverflow pOverflow; / New overflow page for the list. */

970 +

971 + rc = sqlite3PagerAcquire(pPage->pPager, iNextPage, &pPage, 0);

972 + if( rc!=SQLITE_OK ){

973 + break;

974 + }

975 +

976 + pOverflow = sqlite3_malloc(sizeof(RecoverOverflow));

977 + if( !pOverflow ){

978 + sqlite3PagerUnref(pPage);

979 + rc = SQLITE_NOMEM;

980 + break;

981 + }

982 + memset(pOverflow, 0, sizeof(*pOverflow));

983 + pOverflow->pPage = pPage;

984 + pOverflow->nPageSize = nPageSize;

985 +

986 + if( !pFirstOverflow ){

987 + pFirstOverflow = pOverflow;

988 + }else{

989 + pLastOverflow->pNextOverflow = pOverflow;

990 + }

991 + pLastOverflow = pOverflow;

992 +

993 + iNextPage = decodeUnsigned32(pPage->pData);

994 + nBytes += nPageSize-4;

995 +

996 + /* Avoid loops. */

997 + if( overflowPageInUse(pFirstOverflow, iNextPage) ){

998 + fprintf(stderr, "Overflow loop detected at %d\n", iNextPage);

999 + rc = SQLITE_CORRUPT;

1000 + break;

1001 + }

1002 + }

1003 +

1004 + /* If there were not enough pages, or too many, things are corrupt.

1005 + * Not having enough pages is an obvious problem, all the data

1006 + * cannot be read. Too many pages means that the contents of the

1007 + * row between the main page and the overflow page(s) is

1008 + * inconsistent (most likely one or more of the overflow pages does

1009 + * not really belong to this row).

1010 + */

1011 + if( rc==SQLITE_OK && (nBytes<nRecordBytes \|\| iNextPage) ){

1012 + rc = SQLITE_CORRUPT;

1013 + }

1014 +

1015 + if( rc==SQLITE_OK ){

1016 + *ppOverflow = pFirstOverflow;

1017 + *pnLocalRecordBytes = nLocalRecordBytes;

1018 + }else if( pFirstOverflow ){

1019 + overflowDestroy(pFirstOverflow);

1020 + }

1021 + return rc;

1022 +}

1023 +

1024 +/* Use in concert with overflowMaybeCreate() to efficiently read parts

1025 + * of a potentially-overflowing record. pPage and iRecordOffset are

1026 + * the values passed into overflowMaybeCreate(), nLocalRecordBytes and

1027 + * pOverflow are the values returned by that call.

1028 + *

1029 + * On SQLITE_OK, *ppBase points to nRequestBytes of data at

1030 + * iRequestOffset within the record. If the data exists contiguously

1031 + * in a page, a direct pointer is returned, otherwise a buffer from

1032 + * sqlite3_malloc() is returned with the data. *pbFree is set true if

1033 + * sqlite3_free() should be called on *ppBase.

1034 + */

1035 +/* Operation of this function is subtle. At any time, pPage is the

1036 + * current page, with iRecordOffset and nLocalRecordBytes being record

1037 + * data within pPage, and pOverflow being the overflow page after

1038 + * pPage. This allows the code to handle both the initial leaf page

1039 + * and overflow pages consistently by adjusting the values

1040 + * appropriately.

1041 + */

1042 +static int overflowGetSegment(DbPage *pPage, unsigned iRecordOffset,

1043 + unsigned nLocalRecordBytes,

1044 + RecoverOverflow *pOverflow,

1045 + unsigned iRequestOffset, unsigned nRequestBytes,

1046 + unsigned char *ppBase, int pbFree){

1047 + unsigned nBase; /* Amount of data currently collected. */

1048 + unsigned char pBase; / Buffer to collect record data into. */

1049 +

1050 + /* Skip to the page containing the start of the data. */

1051 + while( iRequestOffset>=nLocalRecordBytes && pOverflow ){

1052 + /* Factor out current page's contribution. */

1053 + iRequestOffset -= nLocalRecordBytes;

1054 +

1055 + /* Move forward to the next page in the list. */

1056 + pPage = pOverflow->pPage;

1057 + iRecordOffset = 4;

1058 + nLocalRecordBytes = pOverflow->nPageSize - iRecordOffset;

1059 + pOverflow = pOverflow->pNextOverflow;

1060 + }

1061 +

1062 + /* If the requested data is entirely within this page, return a

1063 + * pointer into the page.

1064 + */

1065 + if( iRequestOffset+nRequestBytes<=nLocalRecordBytes ){

1066 + /* TODO(shess): "assignment discards qualifiers from pointer target type"

1067 + * Having ppBase be const makes sense, but sqlite3_free() takes non-const.

1068 + */

1069 + ppBase = (unsigned char )PageData(pPage, iRecordOffset + iRequestOffset);

1070 + *pbFree = 0;

1071 + return SQLITE_OK;

1072 + }

1073 +

1074 + /* The data range would require additional pages. */

1075 + if( !pOverflow ){

1076 + /* Should never happen, the range is outside the nRecordBytes

1077 + * passed to overflowMaybeCreate().

1078 + */

1079 + assert(NULL); /* NOTREACHED */

1080 + return SQLITE_ERROR;

1081 + }

1082 +

1083 + /* Get a buffer to construct into. */

1084 + nBase = 0;

1085 + pBase = sqlite3_malloc(nRequestBytes);

1086 + if( !pBase ){

1087 + return SQLITE_NOMEM;

1088 + }

1089 + while( nBase<nRequestBytes ){

1090 + /* Copy over data present on this page. */

1091 + unsigned nCopyBytes = nRequestBytes - nBase;

1092 + if( nLocalRecordBytes-iRequestOffset<nCopyBytes ){

1093 + nCopyBytes = nLocalRecordBytes - iRequestOffset;

1094 + }

1095 + memcpy(pBase + nBase, PageData(pPage, iRecordOffset + iRequestOffset),

1096 + nCopyBytes);

1097 + nBase += nCopyBytes;

1098 +

1099 + if( pOverflow ){

1100 + /* Copy from start of record data in future pages. */

1101 + iRequestOffset = 0;

1102 +

1103 + /* Move forward to the next page in the list. Should match

1104 + * first while() loop.

1105 + */

1106 + pPage = pOverflow->pPage;

1107 + iRecordOffset = 4;

1108 + nLocalRecordBytes = pOverflow->nPageSize - iRecordOffset;

1109 + pOverflow = pOverflow->pNextOverflow;

1110 + }else if( nBase<nRequestBytes ){

1111 + /* Ran out of overflow pages with data left to deliver. Not

1112 + * possible if the requested range fits within nRecordBytes

1113 + * passed to overflowMaybeCreate() when creating pOverflow.

1114 + */

1115 + assert(NULL); /* NOTREACHED */

1116 + sqlite3_free(pBase);

1117 + return SQLITE_ERROR;

1118 + }

1119 + }

1120 + assert( nBase==nRequestBytes );

1121 + *ppBase = pBase;

1122 + *pbFree = 1;

1123 + return SQLITE_OK;

1124 +}

1125 +

1126 +/* Primary structure for iterating the contents of a table.

1127 + *

1128 + * leafCursorDestroy - release all resources associated with the cursor.

1129 + * leafCursorCreate - create a cursor to iterate items from tree at

1130 + * the provided root page.

1131 + * leafCursorNextValidCell - get the cursor ready to access data from

1132 + * the next valid cell in the table.

1133 + * leafCursorCellRowid - get the current cell's rowid.

1134 + * leafCursorCellColumns - get current cell's column count.

1135 + * leafCursorCellColInfo - get type and data for a column in current cell.

1136 + *

1137 + * leafCursorNextValidCell skips cells which fail simple integrity

1138 + * checks, such as overlapping other cells, or being located at

1139 + * impossible offsets, or where header data doesn't correctly describe

1140 + * payload data. Returns SQLITE_ROW if a valid cell is found,

1141 + * SQLITE_DONE if all pages in the tree were exhausted.

1142 + *

1143 + * leafCursorCellColInfo() accounts for overflow pages in the style of

1144 + * overflowGetSegment().

1145 + */

1146 +typedef struct RecoverLeafCursor RecoverLeafCursor;

1147 +struct RecoverLeafCursor {

1148 + RecoverInteriorCursor pParent; / Parent node to this node. */

1149 + DbPage pPage; / Reference to leaf page. */

1150 + unsigned nPageSize; /* Size of pPage. */

1151 + unsigned nCells; /* Number of cells in pPage. */

1152 + unsigned iCell; /* Current cell. */

1153 +

1154 + /* Info parsed from data in iCell. */

1155 + i64 iRowid; /* rowid parsed. */

1156 + unsigned nRecordCols; /* how many items in the record. */

1157 + u64 iRecordOffset; /* offset to record data. */

1158 + /* TODO(shess): nRecordBytes and nRecordHeaderBytes are used in

1159 + * leafCursorCellColInfo() to prevent buffer overruns.

1160 + * leafCursorCellDecode() already verified that the cell is valid, so

1161 + * those checks should be redundant.

1162 + */

1163 + u64 nRecordBytes; /* Size of record data. */

1164 + unsigned nLocalRecordBytes; /* Amount of record data in-page. */

1165 + unsigned nRecordHeaderBytes; /* Size of record header data. */

1166 + unsigned char pRecordHeader; / Pointer to record header data. */

1167 + int bFreeRecordHeader; /* True if record header requires free. */

1168 + RecoverOverflow pOverflow; / Cell overflow info, if needed. */

1169 +};

1170 +

1171 +/* Internal helper shared between next-page and create-cursor. If

1172 + * pPage is a leaf page, it will be stored in the cursor and state

1173 + * initialized for reading cells.

1174 + *

1175 + * If pPage is an interior page, a new parent cursor is created and

1176 + * injected on the stack. This is necessary to handle trees with

1177 + * uneven depth, but also is used during initial setup.

1178 + *

1179 + * If pPage is not a table page at all, it is discarded.

1180 + *

1181 + * If SQLITE_OK is returned, the caller no longer owns pPage,

1182 + * otherwise the caller is responsible for discarding it.

1183 + */

1184 +static int leafCursorLoadPage(RecoverLeafCursor pCursor, DbPage pPage){

1185 + const unsigned char pPageHeader; / Header of pPage /

1186 +

1187 + /* Release the current page. */

1188 + if( pCursor->pPage ){

1189 + sqlite3PagerUnref(pCursor->pPage);

1190 + pCursor->pPage = NULL;

1191 + pCursor->iCell = pCursor->nCells = 0;

1192 + }

1193 +

1194 + /* If the page is an unexpected interior node, inject a new stack

1195 + * layer and try again from there.

1196 + */

1197 + pPageHeader = PageHeader(pPage);

1198 + if( pPageHeader[kiPageTypeOffset]==kTableInteriorPage ){

1199 + RecoverInteriorCursor *pParent;

1200 + int rc = interiorCursorCreate(pCursor->pParent, pPage, pCursor->nPageSize,

1201 + &pParent);

1202 + if( rc!=SQLITE_OK ){

1203 + return rc;

1204 + }

1205 + pCursor->pParent = pParent;

1206 + return SQLITE_OK;

1207 + }

1208 +

1209 + /* Not a leaf page, skip it. */

1210 + if( pPageHeader[kiPageTypeOffset]!=kTableLeafPage ){

1211 + sqlite3PagerUnref(pPage);

1212 + return SQLITE_OK;

1213 + }

1214 +

1215 + /* Take ownership of the page and start decoding. */

1216 + pCursor->pPage = pPage;

1217 + pCursor->iCell = 0;

1218 + pCursor->nCells = decodeUnsigned16(pPageHeader + kiPageCellCountOffset);

1219 + return SQLITE_OK;

1220 +}

1221 +

1222 +/* Get the next leaf-level page in the tree. Returns SQLITE_ROW when

1223 + * a leaf page is found, SQLITE_DONE when no more leaves exist, or any

1224 + * error which occurred.

1225 + */

1226 +static int leafCursorNextPage(RecoverLeafCursor *pCursor){

1227 + if( !pCursor->pParent ){

1228 + return SQLITE_DONE;

1229 + }

1230 +

1231 + /* Repeatedly load the parent's next child page until a leaf is found. */

1232 + do {

1233 + DbPage *pNextPage;

1234 + int rc = interiorCursorNextPage(&pCursor->pParent, &pNextPage);

1235 + if( rc!=SQLITE_ROW ){

1236 + assert( rc==SQLITE_DONE );

1237 + return rc;

1238 + }

1239 +

1240 + rc = leafCursorLoadPage(pCursor, pNextPage);

1241 + if( rc!=SQLITE_OK ){

1242 + sqlite3PagerUnref(pNextPage);

1243 + return rc;

1244 + }

1245 + } while( !pCursor->pPage );

1246 +

1247 + return SQLITE_ROW;

1248 +}

1249 +

1250 +static void leafCursorDestroyCellData(RecoverLeafCursor *pCursor){

1251 + if( pCursor->bFreeRecordHeader ){

1252 + sqlite3_free(pCursor->pRecordHeader);

1253 + }

1254 + pCursor->bFreeRecordHeader = 0;

1255 + pCursor->pRecordHeader = NULL;

1256 +

1257 + if( pCursor->pOverflow ){

1258 + overflowDestroy(pCursor->pOverflow);

1259 + pCursor->pOverflow = NULL;

1260 + }

1261 +}

1262 +

1263 +static void leafCursorDestroy(RecoverLeafCursor *pCursor){

1264 + leafCursorDestroyCellData(pCursor);

1265 +

1266 + if( pCursor->pParent ){

1267 + interiorCursorDestroy(pCursor->pParent);

1268 + pCursor->pParent = NULL;

1269 + }

1270 +

1271 + if( pCursor->pPage ){

1272 + sqlite3PagerUnref(pCursor->pPage);

1273 + pCursor->pPage = NULL;

1274 + }

1275 +

1276 + memset(pCursor, 0xA5, sizeof(*pCursor));

1277 + sqlite3_free(pCursor);

1278 +}

1279 +

1280 +/* Create a cursor to iterate the rows from the leaf pages of a table

1281 + * rooted at iRootPage.

1282 + */

1283 +/* TODO(shess): recoverOpen() calls this to setup the cursor, and I

1284 + * think that recoverFilter() may make a hard assumption that the

1285 + * cursor returned will turn up at least one valid cell.

1286 + *

1287 + * The cases I can think of which break this assumption are:

1288 + * - pPage is a valid leaf page with no valid cells.

1289 + * - pPage is a valid interior page with no valid leaves.

1290 + * - pPage is a valid interior page who's leaves contain no valid cells.

1291 + * - pPage is not a valid leaf or interior page.

1292 + */

1293 +static int leafCursorCreate(Pager *pPager, unsigned nPageSize,

1294 + u32 iRootPage, RecoverLeafCursor **ppCursor){

1295 + DbPage pPage; / Reference to page at iRootPage. */

1296 + RecoverLeafCursor pCursor; / Leaf cursor being constructed. */

1297 + int rc;

1298 +

1299 + /* Start out with the root page. */

1300 + rc = sqlite3PagerAcquire(pPager, iRootPage, &pPage, 0);

1301 + if( rc!=SQLITE_OK ){

1302 + return rc;

1303 + }

1304 +

1305 + pCursor = sqlite3_malloc(sizeof(RecoverLeafCursor));

1306 + if( !pCursor ){

1307 + sqlite3PagerUnref(pPage);

1308 + return SQLITE_NOMEM;

1309 + }

1310 + memset(pCursor, 0, sizeof(*pCursor));

1311 +

1312 + pCursor->nPageSize = nPageSize;

1313 +

1314 + rc = leafCursorLoadPage(pCursor, pPage);

1315 + if( rc!=SQLITE_OK ){

1316 + sqlite3PagerUnref(pPage);

1317 + leafCursorDestroy(pCursor);

1318 + return rc;

1319 + }

1320 +

1321 + /* pPage wasn't a leaf page, find the next leaf page. */

1322 + if( !pCursor->pPage ){

1323 + rc = leafCursorNextPage(pCursor);

1324 + if( rc!=SQLITE_DONE && rc!=SQLITE_ROW ){

1325 + leafCursorDestroy(pCursor);

1326 + return rc;

1327 + }

1328 + }

1329 +

1330 + *ppCursor = pCursor;

1331 + return SQLITE_OK;

1332 +}

1333 +

1334 +/* Useful for setting breakpoints. */

1335 +static int ValidateError(){

1336 + return SQLITE_ERROR;

1337 +}

1338 +

1339 +/* Setup the cursor for reading the information from cell iCell. */

1340 +static int leafCursorCellDecode(RecoverLeafCursor *pCursor){

1341 + const unsigned char pPageHeader; / Header of current page. */

1342 + const unsigned char pCellOffsets; / Pointer to page's cell offsets. */

1343 + unsigned iCellOffset; /* Offset of current cell (iCell). */

1344 + const unsigned char pCell; / Pointer to data at iCellOffset. */

1345 + unsigned nCellMaxBytes; /* Maximum local size of iCell. */

1346 + unsigned iEndOffset; /* End of iCell's in-page data. */

1347 + u64 nRecordBytes; /* Expected size of cell, w/overflow. */

1348 + u64 iRowid; /* iCell's rowid (in table). */

1349 + unsigned nRead; /* Amount of cell read. */

1350 + unsigned nRecordHeaderRead; /* Header data read. */

1351 + u64 nRecordHeaderBytes; /* Header size expected. */

1352 + unsigned nRecordCols; /* Columns read from header. */

1353 + u64 nRecordColBytes; /* Bytes in payload for those columns. */

1354 + unsigned i;

1355 + int rc;

1356 +

1357 + assert( pCursor->iCell<pCursor->nCells );

1358 +

1359 + leafCursorDestroyCellData(pCursor);

1360 +

1361 + /* Find the offset to the row. */

1362 + pPageHeader = PageHeader(pCursor->pPage);

1363 + pCellOffsets = pPageHeader + knPageLeafHeaderBytes;

1364 + iCellOffset = decodeUnsigned16(pCellOffsets + pCursor->iCell*2);

1365 + if( iCellOffset>=pCursor->nPageSize ){

1366 + return ValidateError();

1367 + }

1368 +

1369 + pCell = PageData(pCursor->pPage, iCellOffset);

1370 + nCellMaxBytes = pCursor->nPageSize - iCellOffset;

1371 +

1372 + /* B-tree leaf cells lead with varint record size, varint rowid and

1373 + * varint header size.

1374 + */

1375 + /* TODO(shess): The smallest page size is 512 bytes, which has an m

1376 + * of 39. Three varints need at most 27 bytes to encode. I think.

1377 + */

1378 + if( !checkVarints(pCell, nCellMaxBytes, 3) ){

1379 + return ValidateError();

1380 + }

1381 +

1382 + nRead = getVarint(pCell, &nRecordBytes);

1383 + assert( iCellOffset+nRead<=pCursor->nPageSize );

1384 + pCursor->nRecordBytes = nRecordBytes;

1385 +

1386 + nRead += getVarint(pCell + nRead, &iRowid);

1387 + assert( iCellOffset+nRead<=pCursor->nPageSize );

1388 + pCursor->iRowid = (i64)iRowid;

1389 +

1390 + pCursor->iRecordOffset = iCellOffset + nRead;

1391 +

1392 + /* Start overflow setup here because nLocalRecordBytes is needed to

1393 + * check cell overlap.

1394 + */

1395 + rc = overflowMaybeCreate(pCursor->pPage, pCursor->nPageSize,

1396 + pCursor->iRecordOffset, pCursor->nRecordBytes,

1397 + &pCursor->nLocalRecordBytes,

1398 + &pCursor->pOverflow);

1399 + if( rc!=SQLITE_OK ){

1400 + return ValidateError();

1401 + }

1402 +

1403 + /* Check that no other cell starts within this cell. */

1404 + iEndOffset = pCursor->iRecordOffset + pCursor->nLocalRecordBytes;

1405 + for( i=0; i<pCursor->nCells; ++i ){

1406 + const unsigned iOtherOffset = decodeUnsigned16(pCellOffsets + i*2);

1407 + if( iOtherOffset>iCellOffset && iOtherOffset<iEndOffset ){

1408 + return ValidateError();

1409 + }

1410 + }

1411 +

1412 + nRecordHeaderRead = getVarint(pCell + nRead, &nRecordHeaderBytes);

1413 + assert( nRecordHeaderBytes<=nRecordBytes );

1414 + pCursor->nRecordHeaderBytes = nRecordHeaderBytes;

1415 +

1416 + /* Large headers could overflow if pages are small. */

1417 + rc = overflowGetSegment(pCursor->pPage,

1418 + pCursor->iRecordOffset, pCursor->nLocalRecordBytes,

1419 + pCursor->pOverflow, 0, nRecordHeaderBytes,

1420 + &pCursor->pRecordHeader, &pCursor->bFreeRecordHeader) ;

1421 + if( rc!=SQLITE_OK ){

1422 + return ValidateError();

1423 + }

1424 +

1425 + /* Tally up the column count and size of data. */

1426 + nRecordCols = 0;

1427 + nRecordColBytes = 0;

1428 + while( nRecordHeaderRead<nRecordHeaderBytes ){

1429 + u64 iSerialType; /* Type descriptor for current column. */

1430 + if( !checkVarint(pCursor->pRecordHeader + nRecordHeaderRead,

1431 + nRecordHeaderBytes - nRecordHeaderRead) ){

1432 + return ValidateError();

1433 + }

1434 + nRecordHeaderRead += getVarint(pCursor->pRecordHeader + nRecordHeaderRead,

1435 + &iSerialType);

1436 + if( iSerialType==10 \|\| iSerialType==11 ){

1437 + return ValidateError();

1438 + }

1439 + nRecordColBytes += SerialTypeLength(iSerialType);

1440 + nRecordCols++;

1441 + }

1442 + pCursor->nRecordCols = nRecordCols;

1443 +

1444 + /* Parsing the header used as many bytes as expected. */

1445 + if( nRecordHeaderRead!=nRecordHeaderBytes ){

1446 + return ValidateError();

1447 + }

1448 +

1449 + /* Calculated record is size of expected record. */

1450 + if( nRecordHeaderBytes+nRecordColBytes!=nRecordBytes ){

1451 + return ValidateError();

1452 + }

1453 +

1454 + return SQLITE_OK;

1455 +}

1456 +

1457 +static i64 leafCursorCellRowid(RecoverLeafCursor *pCursor){

1458 + return pCursor->iRowid;

1459 +}

1460 +

1461 +static unsigned leafCursorCellColumns(RecoverLeafCursor *pCursor){

1462 + return pCursor->nRecordCols;

1463 +}

1464 +

1465 +/* Get the column info for the cell. Pass NULL for ppBase to prevent

1466 + * retrieving the data segment. If pbFree is true, ppBase must be

1467 + * freed by the caller using sqlite3_free().

1468 + */

1469 +static int leafCursorCellColInfo(RecoverLeafCursor *pCursor,

1470 + unsigned iCol, u64 *piColType,

1471 + unsigned char *ppBase, int pbFree){

1472 + const unsigned char pRecordHeader; / Current cell's header. */

1473 + u64 nRecordHeaderBytes; /* Bytes in pRecordHeader. */

1474 + unsigned nRead; /* Bytes read from header. */

1475 + u64 iColEndOffset; /* Offset to end of column in cell. */

1476 + unsigned nColsSkipped; /* Count columns as procesed. */

1477 + u64 iSerialType; /* Type descriptor for current column. * /

1478 +

1479 + /* Implicit NULL for columns past the end. This case happens when

1480 + * rows have not been updated since an ALTER TABLE added columns.

1481 + * It is more convenient to address here than in callers.

1482 + */

1483 + if( iCol>=pCursor->nRecordCols ){

1484 + *piColType = 0;

1485 + if( ppBase ){

1486 + *ppBase = 0;

1487 + *pbFree = 0;

1488 + }

1489 + return SQLITE_OK;

1490 + }

1491 +

1492 + /* Must be able to decode header size. */

1493 + pRecordHeader = pCursor->pRecordHeader;

1494 + if( !checkVarint(pRecordHeader, pCursor->nRecordHeaderBytes) ){

1495 + return SQLITE_CORRUPT;

1496 + }

1497 +

1498 + /* Rather than caching the header size and how many bytes it took,

1499 + * decode it every time.

1500 + */

1501 + nRead = getVarint(pRecordHeader, &nRecordHeaderBytes);

1502 + assert( nRecordHeaderBytes==pCursor->nRecordHeaderBytes );

1503 +

1504 + /* Scan forward to the indicated column. Scans to _after_ column

1505 + * for later range checking.

1506 + */

1507 + /* TODO(shess): This could get expensive for very wide tables. An

1508 + * array of iSerialType could be built in leafCursorCellDecode(), but

1509 + * the number of columns is dynamic per row, so it would add memory

1510 + * management complexity. Enough info to efficiently forward

1511 + * iterate could be kept, if all clients forward iterate

1512 + * (recoverColumn() may not).

1513 + */

1514 + iColEndOffset = 0;

1515 + nColsSkipped = 0;

1516 + while( nColsSkipped<=iCol && nRead<nRecordHeaderBytes ){

1517 + if( !checkVarint(pRecordHeader + nRead, nRecordHeaderBytes - nRead) ){

1518 + return SQLITE_CORRUPT;

1519 + }

1520 + nRead += getVarint(pRecordHeader + nRead, &iSerialType);

1521 + iColEndOffset += SerialTypeLength(iSerialType);

1522 + nColsSkipped++;

1523 + }

1524 +

1525 + /* Column's data extends past record's end. */

1526 + if( nRecordHeaderBytes+iColEndOffset>pCursor->nRecordBytes ){

1527 + return SQLITE_CORRUPT;

1528 + }

1529 +

1530 + *piColType = iSerialType;

1531 + if( ppBase ){

1532 + const u32 nColBytes = SerialTypeLength(iSerialType);

1533 +

1534 + /* Offset from start of record to beginning of column. */

1535 + const unsigned iColOffset = nRecordHeaderBytes+iColEndOffset-nColBytes;

1536 +

1537 + return overflowGetSegment(pCursor->pPage, pCursor->iRecordOffset,

1538 + pCursor->nLocalRecordBytes, pCursor->pOverflow,

1539 + iColOffset, nColBytes, ppBase, pbFree);

1540 + }

1541 + return SQLITE_OK;

1542 +}

1543 +

1544 +static int leafCursorNextValidCell(RecoverLeafCursor *pCursor){

1545 + while( 1 ){

1546 + int rc;

1547 +

1548 + /* Move to the next cell. */

1549 + pCursor->iCell++;

1550 +

1551 + /* No more cells, get the next leaf. */

1552 + if( pCursor->iCell>=pCursor->nCells ){

1553 + rc = leafCursorNextPage(pCursor);

1554 + if( rc!=SQLITE_ROW ){

1555 + return rc;

1556 + }

1557 + assert( pCursor->iCell==0 );

1558 + }

1559 +

1560 + /* If the cell is valid, indicate that a row is available. */

1561 + rc = leafCursorCellDecode(pCursor);

1562 + if( rc==SQLITE_OK ){

1563 + return SQLITE_ROW;

1564 + }

1565 +

1566 + /* Iterate until done or a valid row is found. */

1567 + /* TODO(shess): Remove debugging output. */

1568 + fprintf(stderr, "Skipping invalid cell\n");

1569 + }

1570 + return SQLITE_ERROR;

1571 +}

1572 +

1573 +typedef struct Recover Recover;

1574 +struct Recover {

1575 + sqlite3_vtab base;

1576 + sqlite3 db; / Host database connection */

1577 + char zDb; / Database containing target table */

1578 + char zTable; / Target table */

1579 + unsigned nCols; /* Number of columns in target table */

1580 + unsigned char pTypes; / Types of columns in target table */

1581 +};

1582 +

1583 +/* Internal helper for deleting the module. */

1584 +static void recoverRelease(Recover *pRecover){

1585 + sqlite3_free(pRecover->zDb);

1586 + sqlite3_free(pRecover->zTable);

1587 + sqlite3_free(pRecover->pTypes);

1588 + memset(pRecover, 0xA5, sizeof(*pRecover));

1589 + sqlite3_free(pRecover);

1590 +}

1591 +

1592 +/* Helper function for initializing the module. Forward-declared so

1593 + * recoverCreate() and recoverConnect() can see it.

1594 + */

1595 +static int recoverInit(

1596 + sqlite3 , void , int, const char const, sqlite3_vtab , char

1597 +);

1598 +

1599 +static int recoverCreate(

1600 + sqlite3 *db,

1601 + void *pAux,

1602 + int argc, const char constargv,

1603 + sqlite3_vtab **ppVtab,

1604 + char **pzErr

1605 +){

1606 + FNENTRY();

1607 + return recoverInit(db, pAux, argc, argv, ppVtab, pzErr);

1608 +}

1609 +

1610 +/* This should never be called. */

1611 +static int recoverConnect(

1612 + sqlite3 *db,

1613 + void *pAux,

1614 + int argc, const char constargv,

1615 + sqlite3_vtab **ppVtab,

1616 + char **pzErr

1617 +){

1618 + FNENTRY();

1619 + return recoverInit(db, pAux, argc, argv, ppVtab, pzErr);

1620 +}

1621 +

1622 +/* No indices supported. */

1623 +static int recoverBestIndex(sqlite3_vtab tab, sqlite3_index_info pIdxInfo){

1624 + FNENTRY();

1625 + return SQLITE_OK;

1626 +}

1627 +

1628 +/* Logically, this should never be called. */

1629 +static int recoverDisconnect(sqlite3_vtab *pVtab){

1630 + FNENTRY();

1631 + recoverRelease((Recover*)pVtab);

1632 + return SQLITE_OK;

1633 +}

1634 +

1635 +static int recoverDestroy(sqlite3_vtab *pVtab){

1636 + FNENTRY();

1637 + recoverRelease((Recover*)pVtab);

1638 + return SQLITE_OK;

1639 +}

1640 +

1641 +typedef struct RecoverCursor RecoverCursor;

1642 +struct RecoverCursor {

1643 + sqlite3_vtab_cursor base;

1644 + RecoverLeafCursor *pLeafCursor;

1645 + int iEncoding;

1646 + int bEOF;

1647 +};

1648 +

1649 +static int recoverOpen(sqlite3_vtab pVTab, sqlite3_vtab_cursor *ppCursor){

1650 + Recover pRecover = (Recover)pVTab;

1651 + u32 iRootPage; /* Root page of the backing table. */

1652 + int iEncoding; /* UTF encoding for backing database. */

1653 + unsigned nPageSize; /* Size of pages in backing database. */

1654 + Pager pPager; / Backing database pager. */

1655 + RecoverLeafCursor pLeafCursor; / Cursor to read table's leaf pages. */

1656 + RecoverCursor pCursor; / Cursor to read rows from leaves. */

1657 + int rc;

1658 +

1659 + FNENTRY();

1660 +

1661 + iRootPage = 0;

1662 + rc = getRootPage(pRecover->db, pRecover->zDb, pRecover->zTable,

1663 + &iRootPage);

1664 + if( rc!=SQLITE_OK ){

1665 + return rc;

1666 + }

1667 +

1668 + iEncoding = 0;

1669 + rc = getEncoding(pRecover->db, pRecover->zDb, &iEncoding);

1670 + if( rc!=SQLITE_OK ){

1671 + return rc;

1672 + }

1673 +

1674 + rc = GetPager(pRecover->db, pRecover->zDb, &pPager, &nPageSize);

1675 + if( rc!=SQLITE_OK ){

1676 + return rc;

1677 + }

1678 +

1679 + rc = leafCursorCreate(pPager, nPageSize, iRootPage, &pLeafCursor);

1680 + if( rc!=SQLITE_OK ){

1681 + return rc;

1682 + }

1683 +

1684 + pCursor = sqlite3_malloc(sizeof(RecoverCursor));

1685 + if( !pCursor ){

1686 + leafCursorDestroy(pLeafCursor);

1687 + return SQLITE_NOMEM;

1688 + }

1689 + memset(pCursor, 0, sizeof(*pCursor));

1690 + pCursor->base.pVtab = pVTab;

1691 + pCursor->pLeafCursor = pLeafCursor;

1692 + pCursor->iEncoding = iEncoding;

1693 +

1694 + ppCursor = (sqlite3_vtab_cursor)pCursor;

1695 + return SQLITE_OK;

1696 +}

1697 +

1698 +static int recoverClose(sqlite3_vtab_cursor *cur){

1699 + RecoverCursor pCursor = (RecoverCursor)cur;

1700 + FNENTRY();

1701 + if( pCursor->pLeafCursor ){

1702 + leafCursorDestroy(pCursor->pLeafCursor);

1703 + pCursor->pLeafCursor = NULL;

1704 + }

1705 + memset(pCursor, 0xA5, sizeof(*pCursor));

1706 + sqlite3_free(cur);

1707 + return SQLITE_OK;

1708 +}

1709 +

1710 +/* Helpful place to set a breakpoint. */

1711 +static int RecoverInvalidCell(){

1712 + return SQLITE_ERROR;

1713 +}

1714 +

1715 +/* Returns SQLITE_OK if the cell has an appropriate number of columns

1716 + * with the appropriate types of data.

1717 + */

1718 +static int recoverValidateLeafCell(Recover pRecover, RecoverCursor pCursor){

1719 + unsigned i;

1720 +

1721 + /* If the row's storage has too many columns, skip it. */

1722 + if( leafCursorCellColumns(pCursor->pLeafCursor)>pRecover->nCols ){

1723 + return RecoverInvalidCell();

1724 + }

1725 +

1726 + /* Skip rows with unexpected types. */

1727 + for( i=0; i<pRecover->nCols; ++i ){

1728 + u64 iType; /* Storage type of column i. */

1729 + int rc;

1730 +

1731 + /* ROWID alias. */

1732 + if( (pRecover->pTypes[i]&MASK_ROWID) ){

1733 + continue;

1734 + }

1735 +

1736 + rc = leafCursorCellColInfo(pCursor->pLeafCursor, i, &iType, NULL, NULL);

1737 + assert( rc==SQLITE_OK );

1738 + if( rc!=SQLITE_OK \|\| !SerialTypeIsCompatible(iType, pRecover->pTypes[i]) ){

1739 + return RecoverInvalidCell();

1740 + }

1741 + }

1742 +

1743 + return SQLITE_OK;

1744 +}

1745 +

1746 +static int recoverNext(sqlite3_vtab_cursor *pVtabCursor){

1747 + RecoverCursor pCursor = (RecoverCursor)pVtabCursor;

1748 + Recover pRecover = (Recover)pCursor->base.pVtab;

1749 + int rc;

1750 +

1751 + FNENTRY();

1752 +

1753 + /* Scan forward to the next cell with valid storage, then check that

1754 + * the stored data matches the schema.

1755 + */

1756 + while( (rc = leafCursorNextValidCell(pCursor->pLeafCursor))==SQLITE_ROW ){

1757 + if( recoverValidateLeafCell(pRecover, pCursor)==SQLITE_OK ){

1758 + return SQLITE_OK;

1759 + }

1760 + }

1761 +

1762 + if( rc==SQLITE_DONE ){

1763 + pCursor->bEOF = 1;

1764 + return SQLITE_OK;

1765 + }

1766 +

1767 + assert( rc!=SQLITE_OK );

1768 + return rc;

1769 +}

1770 +

1771 +static int recoverFilter(

1772 + sqlite3_vtab_cursor *pVtabCursor,

1773 + int idxNum, const char *idxStr,

1774 + int argc, sqlite3_value **argv

1775 +){

1776 + RecoverCursor pCursor = (RecoverCursor)pVtabCursor;

1777 + Recover pRecover = (Recover)pCursor->base.pVtab;

1778 + int rc;

1779 +

1780 + FNENTRY();

1781 +

1782 + /* Load the first cell, and iterate forward if it's not valid. */

1783 + /* TODO(shess): What happens if no cells at all are valid? */

1784 + rc = leafCursorCellDecode(pCursor->pLeafCursor);

1785 + if( rc!=SQLITE_OK \|\| recoverValidateLeafCell(pRecover, pCursor)!=SQLITE_OK ){

1786 + return recoverNext(pVtabCursor);

1787 + }

1788 +

1789 + return SQLITE_OK;

1790 +}

1791 +

1792 +static int recoverEof(sqlite3_vtab_cursor *pVtabCursor){

1793 + RecoverCursor pCursor = (RecoverCursor)pVtabCursor;

1794 + FNENTRY();

1795 + return pCursor->bEOF;

1796 +}

1797 +

1798 +static int recoverColumn(sqlite3_vtab_cursor cur, sqlite3_context ctx, int i) {

1799 + RecoverCursor pCursor = (RecoverCursor)cur;

1800 + Recover pRecover = (Recover)pCursor->base.pVtab;

1801 + u64 iColType; /* Storage type of column i. */

1802 + unsigned char pColData; / Column i's data. */

1803 + int shouldFree; /* Non-zero if pColData should be freed. */

1804 + int rc;

1805 +

1806 + FNENTRY();

1807 +

1808 + if( i>=pRecover->nCols ){

1809 + return SQLITE_ERROR;

1810 + }

1811 +

1812 + /* ROWID alias. */

1813 + if( (pRecover->pTypes[i]&MASK_ROWID) ){

1814 + sqlite3_result_int64(ctx, leafCursorCellRowid(pCursor->pLeafCursor));

1815 + return SQLITE_OK;

1816 + }

1817 +

1818 + pColData = NULL;

1819 + shouldFree = 0;

1820 + rc = leafCursorCellColInfo(pCursor->pLeafCursor, i, &iColType,

1821 + &pColData, &shouldFree);

1822 + if( rc!=SQLITE_OK ){

1823 + return rc;

1824 + }

1825 + /* recoverValidateLeafCell() should guarantee that this will never

1826 + * occur.

1827 + */

1828 + if( !SerialTypeIsCompatible(iColType, pRecover->pTypes[i]) ){

1829 + if( shouldFree ){

1830 + sqlite3_free(pColData);

1831 + }

1832 + return SQLITE_ERROR;

1833 + }

1834 +

1835 + switch( iColType ){

1836 + case 0 : sqlite3_result_null(ctx); break;

1837 + case 1 : sqlite3_result_int64(ctx, decodeSigned(pColData, 1)); break;

1838 + case 2 : sqlite3_result_int64(ctx, decodeSigned(pColData, 2)); break;

1839 + case 3 : sqlite3_result_int64(ctx, decodeSigned(pColData, 3)); break;

1840 + case 4 : sqlite3_result_int64(ctx, decodeSigned(pColData, 4)); break;

1841 + case 5 : sqlite3_result_int64(ctx, decodeSigned(pColData, 6)); break;

1842 + case 6 : sqlite3_result_int64(ctx, decodeSigned(pColData, 8)); break;

1843 + case 7 : sqlite3_result_double(ctx, decodeFloat64(pColData)); break;

1844 + case 8 : sqlite3_result_int(ctx, 0); break;

1845 + case 9 : sqlite3_result_int(ctx, 1); break;

1846 + case 10 : assert( iColType!=10 ); break;

1847 + case 11 : assert( iColType!=11 ); break;

1848 +

1849 + default : {

1850 + u32 l = SerialTypeLength(iColType);

1851 +

1852 + /* If pColData was already allocated, arrange to pass ownership. */

1853 + sqlite3_destructor_type pFn = SQLITE_TRANSIENT;

1854 + if( shouldFree ){

1855 + pFn = sqlite3_free;

1856 + shouldFree = 0;

1857 + }

1858 +

1859 + if( SerialTypeIsBlob(iColType) ){

1860 + sqlite3_result_blob(ctx, pColData, l, pFn);

1861 + }else{

1862 + if( pCursor->iEncoding==SQLITE_UTF16LE ){

1863 + sqlite3_result_text16le(ctx, (const void*)pColData, l, pFn);

1864 + }else if( pCursor->iEncoding==SQLITE_UTF16BE ){

1865 + sqlite3_result_text16be(ctx, (const void*)pColData, l, pFn);

1866 + }else{

1867 + sqlite3_result_text(ctx, (const char*)pColData, l, pFn);

1868 + }

1869 + }

1870 + } break;

1871 + }

1872 + if( shouldFree ){

1873 + sqlite3_free(pColData);

1874 + }

1875 + return SQLITE_OK;

1876 +}

1877 +

1878 +static int recoverRowid(sqlite3_vtab_cursor pVtabCursor, sqlite_int64 pRowid) {

1879 + RecoverCursor pCursor = (RecoverCursor)pVtabCursor;

1880 + FNENTRY();

1881 + *pRowid = leafCursorCellRowid(pCursor->pLeafCursor);

1882 + return SQLITE_OK;

1883 +}

1884 +

1885 +static sqlite3_module recoverModule = {

1886 + 0, /* iVersion */

1887 + recoverCreate, /* xCreate - create a table */

1888 + recoverConnect, /* xConnect - connect to an existing table */

1889 + recoverBestIndex, /* xBestIndex - Determine search strategy */

1890 + recoverDisconnect, /* xDisconnect - Disconnect from a table */

1891 + recoverDestroy, /* xDestroy - Drop a table */

1892 + recoverOpen, /* xOpen - open a cursor */

1893 + recoverClose, /* xClose - close a cursor */

1894 + recoverFilter, /* xFilter - configure scan constraints */

1895 + recoverNext, /* xNext - advance a cursor */

1896 + recoverEof, /* xEof */

1897 + recoverColumn, /* xColumn - read data */

1898 + recoverRowid, /* xRowid - read data */

1899 + 0, /* xUpdate - write data */

1900 + 0, /* xBegin - begin transaction */

1901 + 0, /* xSync - sync transaction */

1902 + 0, /* xCommit - commit transaction */

1903 + 0, /* xRollback - rollback transaction */

1904 + 0, /* xFindFunction - function overloading */

1905 + 0, /* xRename - rename the table */

1906 +};

1907 +

1908 +int recoverVtableInit(sqlite3 *db){

1909 + return sqlite3_create_module_v2(db, "recover", &recoverModule, NULL, 0);

1910 +}

1911 +

1912 +/* This section of code is for parsing the create input and

1913 + * initializing the module.

1914 + */

1915 +

1916 +/* Find the next word in zText and place the endpoints in pzWord*.

1917 + * Returns true if the word is non-empty. "Word" is defined as

1918 + * ASCII alphanumeric plus '_' at this time.

1919 + */

1920 +static int findWord(const char *zText,

1921 + const char pzWordStart, const char pzWordEnd){

1922 + int r;

1923 + while( ascii_isspace(*zText) ){

1924 + zText++;

1925 + }

1926 + *pzWordStart = zText;

1927 + while( ascii_isalnum(zText) \|\| zText=='_' ){

1928 + zText++;

1929 + }

1930 + r = zText>pzWordStart; / In case pzWordStart==pzWordEnd */

1931 + *pzWordEnd = zText;

1932 + return r;

1933 +}

1934 +

1935 +/* Return true if the next word in zText is zWord, also setting

1936 + * *pzContinue to the character after the word.

1937 + */

1938 +static int expectWord(const char zText, const char zWord,

1939 + const char **pzContinue){

1940 + const char zWordStart, zWordEnd;

1941 + if( findWord(zText, &zWordStart, &zWordEnd) &&

1942 + ascii_strncasecmp(zWord, zWordStart, zWordEnd - zWordStart)==0 ){

1943 + *pzContinue = zWordEnd;

1944 + return 1;

1945 + }

1946 + return 0;

1947 +}

1948 +

1949 +/* Parse the name and type information out of parameter. In case of

1950 + * success, *pzNameStart/End contain the name of the column,

1951 + * pzTypeStart/End contain the top-level type, and pTypeMask has the

1952 + * type mask to use for the column.

1953 + */

1954 +static int findNameAndType(const char *parameter,

1955 + const char pzNameStart, const char pzNameEnd,

1956 + const char pzTypeStart, const char pzTypeEnd,

1957 + unsigned char *pTypeMask){

1958 + unsigned nNameLen; /* Length of found name. */

1959 + const char zEnd; / Current end of parsed column information. */

1960 + int bNotNull; /* Non-zero if NULL is not allowed for name. */

1961 + int bStrict; /* Non-zero if column requires exact type match. */

1962 + const char zDummy; / Dummy parameter, result unused. */

1963 + unsigned i;

1964 +

1965 + /* strictMask is used for STRICT, strictMask\|otherMask if STRICT is

1966 + * not supplied. zReplace provides an alternate type to expose to

1967 + * the caller.

1968 + */

1969 + static struct {

1970 + const char *zName;

1971 + unsigned char strictMask;

1972 + unsigned char otherMask;

1973 + const char *zReplace;

1974 + } kTypeInfo[] = {

1975 + { "ANY",

1976 + MASK_INTEGER \| MASK_FLOAT \| MASK_BLOB \| MASK_TEXT \| MASK_NULL,

1977 + 0, "",

1978 + },

1979 + { "ROWID", MASK_INTEGER \| MASK_ROWID, 0, "INTEGER", },

1980 + { "INTEGER", MASK_INTEGER \| MASK_NULL, 0, NULL, },

1981 + { "FLOAT", MASK_FLOAT \| MASK_NULL, MASK_INTEGER, NULL, },

1982 + { "NUMERIC", MASK_INTEGER \| MASK_FLOAT \| MASK_NULL, MASK_TEXT, NULL, },

1983 + { "TEXT", MASK_TEXT \| MASK_NULL, MASK_BLOB, NULL, },

1984 + { "BLOB", MASK_BLOB \| MASK_NULL, 0, NULL, },

1985 + };

1986 +

1987 + if( !findWord(parameter, pzNameStart, pzNameEnd) ){

1988 + return SQLITE_MISUSE;

1989 + }

1990 +

1991 + /* Manifest typing, accept any storage type. */

1992 + if( !findWord(*pzNameEnd, pzTypeStart, pzTypeEnd) ){

1993 + pzTypeEnd = pzTypeStart = "";

1994 + *pTypeMask = MASK_INTEGER \| MASK_FLOAT \| MASK_BLOB \| MASK_TEXT \| MASK_NULL;

1995 + return SQLITE_OK;

1996 + }

1997 +

1998 + nNameLen = pzTypeEnd - pzTypeStart;

1999 + for( i=0; i<ArraySize(kTypeInfo); ++i ){

2000 + if( ascii_strncasecmp(kTypeInfo[i].zName, *pzTypeStart, nNameLen)==0 ){

2001 + break;

2002 + }

2003 + }

2004 + if( i==ArraySize(kTypeInfo) ){

2005 + return SQLITE_MISUSE;

2006 + }

2007 +

2008 + zEnd = *pzTypeEnd;

2009 + bStrict = 0;

2010 + if( expectWord(zEnd, "STRICT", &zEnd) ){

2011 + /* TODO(shess): Ick. But I don't want another single-purpose

2012 + * flag, either.

2013 + */

2014 + if( kTypeInfo[i].zReplace && !kTypeInfo[i].zReplace[0] ){

2015 + return SQLITE_MISUSE;

2016 + }

2017 + bStrict = 1;

2018 + }

2019 +

2020 + bNotNull = 0;

2021 + if( expectWord(zEnd, "NOT", &zEnd) ){

2022 + if( expectWord(zEnd, "NULL", &zEnd) ){

2023 + bNotNull = 1;

2024 + }else{

2025 + /* Anything other than NULL after NOT is an error. */

2026 + return SQLITE_MISUSE;

2027 + }

2028 + }

2029 +

2030 + /* Anything else is an error. */

2031 + if( findWord(zEnd, &zDummy, &zDummy) ){

2032 + return SQLITE_MISUSE;

2033 + }

2034 +

2035 + *pTypeMask = kTypeInfo[i].strictMask;

2036 + if( !bStrict ){

2037 + *pTypeMask \|= kTypeInfo[i].otherMask;

2038 + }

2039 + if( bNotNull ){

2040 + *pTypeMask &= ~MASK_NULL;

2041 + }

2042 + if( kTypeInfo[i].zReplace ){

2043 + *pzTypeStart = kTypeInfo[i].zReplace;

2044 + pzTypeEnd = pzTypeStart + strlen(*pzTypeStart);

2045 + }

2046 + return SQLITE_OK;

2047 +}

2048 +

2049 +/* Parse the arguments, placing type masks in *pTypes and the exposed

2050 + * schema in *pzCreateSql (for sqlite3_declare_vtab).

2051 + */

2052 +static int ParseColumnsAndGenerateCreate(unsigned nCols,

2053 + const char const pCols,

2054 + char **pzCreateSql,

2055 + unsigned char *pTypes,

2056 + char **pzErr){

2057 + unsigned i;

2058 + char *zCreateSql = sqlite3_mprintf("CREATE TABLE x(");

2059 + if( !zCreateSql ){

2060 + return SQLITE_NOMEM;

2061 + }

2062 +

2063 + for( i=0; i<nCols; i++ ){

2064 + const char *zSep = (i < nCols - 1 ? ", " : ")");

2065 + const char *zNotNull = "";

2066 + const char zNameStart, zNameEnd;

2067 + const char zTypeStart, zTypeEnd;

2068 + int rc = findNameAndType(pCols[i],

2069 + &zNameStart, &zNameEnd,

2070 + &zTypeStart, &zTypeEnd,

2071 + &pTypes[i]);

2072 + if( rc!=SQLITE_OK ){

2073 + *pzErr = sqlite3_mprintf("unable to parse column %d", i);

2074 + sqlite3_free(zCreateSql);

2075 + return rc;

2076 + }

2077 +

2078 + if( !(pTypes[i]&MASK_NULL) ){

2079 + zNotNull = " NOT NULL";

2080 + }

2081 +

2082 + /* Add name and type to the create statement. */

2083 + zCreateSql = sqlite3_mprintf("%z%.s %.s%s%s",

2084 + zCreateSql,

2085 + zNameEnd - zNameStart, zNameStart,

2086 + zTypeEnd - zTypeStart, zTypeStart,

2087 + zNotNull, zSep);

2088 + if( !zCreateSql ){

2089 + return SQLITE_NOMEM;

2090 + }

2091 + }

2092 +

2093 + *pzCreateSql = zCreateSql;

2094 + return SQLITE_OK;

2095 +}

2096 +

2097 +/* Helper function for initializing the module. */

2098 +/* argv[0] module name

2099 + * argv[1] db name for virtual table

2100 + * argv[2] virtual table name

2101 + * argv[3] backing table name

2102 + * argv[4] columns

2103 + */

2104 +/* TODO(shess): Since connect isn't supported, could inline into

2105 + * recoverCreate().

2106 + */

2107 +/* TODO(shess): Explore cases where it would make sense to set pzErr. /

2108 +static int recoverInit(

2109 + sqlite3 db, / Database connection */

2110 + void pAux, / unused */

2111 + int argc, const char constargv, /* Parameters to CREATE TABLE statement * /

2112 + sqlite3_vtab *ppVtab, / OUT: New virtual table */

2113 + char *pzErr / OUT: Error message, if any */

2114 +){

2115 + const unsigned kTypeCol = 4; /* First argument with column type info. */

2116 + Recover pRecover; / Virtual table structure being created. */

2117 + char zDot; / Any dot found in "db.table" backing. */

2118 + u32 iRootPage; /* Root page of backing table. */

2119 + char zCreateSql; / Schema of created virtual table. */

2120 + int rc;

2121 +

2122 + /* Require to be in the temp database. */

2123 + if( ascii_strcasecmp(argv[1], "temp")!=0 ){

2124 + *pzErr = sqlite3_mprintf("recover table must be in temp database");

2125 + return SQLITE_MISUSE;

2126 + }

2127 +

2128 + /* Need the backing table and at least one column. */

2129 + if( argc<=kTypeCol ){

2130 + *pzErr = sqlite3_mprintf("no columns specified");

2131 + return SQLITE_MISUSE;

2132 + }

2133 +

2134 + pRecover = sqlite3_malloc(sizeof(Recover));

2135 + if( !pRecover ){

2136 + return SQLITE_NOMEM;

2137 + }

2138 + memset(pRecover, 0, sizeof(*pRecover));

2139 + pRecover->base.pModule = &recoverModule;

2140 + pRecover->db = db;

2141 +

2142 + /* Parse out db.table, assuming main if no dot. */

2143 + zDot = strchr(argv[3], '.');

2144 + if( !zDot ){

2145 + pRecover->zDb = sqlite3_strdup(db->aDb[0].zName);

2146 + pRecover->zTable = sqlite3_strdup(argv[3]);

2147 + }else if( zDot>argv[3] && zDot[1]!='\0' ){

2148 + pRecover->zDb = sqlite3_strndup(argv[3], zDot - argv[3]);

2149 + pRecover->zTable = sqlite3_strdup(zDot + 1);

2150 + }else{

2151 + /* ".table" or "db." not allowed. */

2152 + *pzErr = sqlite3_mprintf("ill-formed table specifier");

2153 + recoverRelease(pRecover);

2154 + return SQLITE_ERROR;

2155 + }

2156 +

2157 + pRecover->nCols = argc - kTypeCol;

2158 + pRecover->pTypes = sqlite3_malloc(pRecover->nCols);

2159 + if( !pRecover->zDb \|\| !pRecover->zTable \|\| !pRecover->pTypes ){

2160 + recoverRelease(pRecover);

2161 + return SQLITE_NOMEM;

2162 + }

2163 +

2164 + /* Require the backing table to exist. */

2165 + /* TODO(shess): Be more pedantic about the form of the descriptor

2166 + * string. This already fails for poorly-formed strings, simply

2167 + * because there won't be a root page, but it would make more sense

2168 + * to be explicit.

2169 + */

2170 + rc = getRootPage(pRecover->db, pRecover->zDb, pRecover->zTable, &iRootPage);

2171 + if( rc!=SQLITE_OK ){

2172 + *pzErr = sqlite3_mprintf("unable to find backing table");

2173 + recoverRelease(pRecover);

2174 + return rc;

2175 + }

2176 +

2177 + /* Parse the column definitions. */

2178 + rc = ParseColumnsAndGenerateCreate(pRecover->nCols, argv + kTypeCol,

2179 + &zCreateSql, pRecover->pTypes, pzErr);

2180 + if( rc!=SQLITE_OK ){

2181 + recoverRelease(pRecover);

2182 + return rc;

2183 + }

2184 +

2185 + rc = sqlite3_declare_vtab(db, zCreateSql);

2186 + sqlite3_free(zCreateSql);

2187 + if( rc!=SQLITE_OK ){

2188 + recoverRelease(pRecover);

2189 + return rc;

2190 + }

2191 +

2192 + ppVtab = (sqlite3_vtab )pRecover;

2193 + return SQLITE_OK;

2194 +}

OLD	NEW