| OLD | NEW |
| (Empty) |
| 1 Add new virtual table 'recover' to src/ and the amalgamation. | |
| 2 | |
| 3 Since recover.c is in somewhat active development, it is possible that | |
| 4 the patch below will not reliably re-create the file. | |
| 5 | |
| 6 shess@chromium.org | |
| 7 | |
| 8 Generated with: | |
| 9 git diff --cached --relative=third_party/sqlite/src --src-prefix='' --dst-prefix
='' > third_party/sqlite/recover.patch | |
| 10 [--cached because otherwise the diff adding recover.c wasn't generated.] | |
| 11 | |
| 12 diff --git Makefile.in Makefile.in | |
| 13 index f3239f3..216742c 100644 | |
| 14 --- Makefile.in | |
| 15 +++ Makefile.in | |
| 16 @@ -251,6 +251,7 @@ SRC = \ | |
| 17 $(TOP)/src/prepare.c \ | |
| 18 $(TOP)/src/printf.c \ | |
| 19 $(TOP)/src/random.c \ | |
| 20 + $(TOP)/src/recover.c \ | |
| 21 $(TOP)/src/resolve.c \ | |
| 22 $(TOP)/src/rowset.c \ | |
| 23 $(TOP)/src/select.c \ | |
| 24 diff --git src/sqlite.h.in src/sqlite.h.in | |
| 25 index 62b9326..fb76659 100644 | |
| 26 --- src/sqlite.h.in | |
| 27 +++ src/sqlite.h.in | |
| 28 @@ -6403,6 +6403,17 @@ int sqlite3_wal_checkpoint_v2( | |
| 29 #define SQLITE_CHECKPOINT_RESTART 2 | |
| 30 | |
| 31 | |
| 32 +/* Begin recover.patch for Chromium */ | |
| 33 +/* | |
| 34 +** Call to initialize the recover virtual-table modules (see recover.c). | |
| 35 +** | |
| 36 +** This could be loaded by default in main.c, but that would make the | |
| 37 +** virtual table available to Web SQL. Breaking it out allows only | |
| 38 +** selected users to enable it (currently sql/recovery.cc). | |
| 39 +*/ | |
| 40 +int recoverVtableInit(sqlite3 *db); | |
| 41 +/* End recover.patch for Chromium */ | |
| 42 + | |
| 43 /* | |
| 44 ** Undo the hack that converts floating point types to integer for | |
| 45 ** builds on processors without floating point support. | |
| 46 diff --git tool/mksqlite3c.tcl tool/mksqlite3c.tcl | |
| 47 index fa99f2d..df2df07 100644 | |
| 48 --- tool/mksqlite3c.tcl | |
| 49 +++ tool/mksqlite3c.tcl | |
| 50 @@ -293,6 +293,8 @@ foreach file { | |
| 51 main.c | |
| 52 notify.c | |
| 53 | |
| 54 + recover.c | |
| 55 + | |
| 56 fts3.c | |
| 57 fts3_aux.c | |
| 58 fts3_expr.c | |
| 59 diff --git src/recover.c src/recover.c | |
| 60 new file mode 100644 | |
| 61 index 0000000..6430c8b | |
| 62 --- /dev/null | |
| 63 +++ src/recover.c | |
| 64 @@ -0,0 +1,2130 @@ | |
| 65 +/* | |
| 66 +** 2012 Jan 11 | |
| 67 +** | |
| 68 +** The author disclaims copyright to this source code. In place of | |
| 69 +** a legal notice, here is a blessing: | |
| 70 +** | |
| 71 +** May you do good and not evil. | |
| 72 +** May you find forgiveness for yourself and forgive others. | |
| 73 +** May you share freely, never taking more than you give. | |
| 74 +*/ | |
| 75 +/* TODO(shess): THIS MODULE IS STILL EXPERIMENTAL. DO NOT USE IT. */ | |
| 76 +/* Implements a virtual table "recover" which can be used to recover | |
| 77 + * data from a corrupt table. The table is walked manually, with | |
| 78 + * corrupt items skipped. Additionally, any errors while reading will | |
| 79 + * be skipped. | |
| 80 + * | |
| 81 + * Given a table with this definition: | |
| 82 + * | |
| 83 + * CREATE TABLE Stuff ( | |
| 84 + * name TEXT PRIMARY KEY, | |
| 85 + * value TEXT NOT NULL | |
| 86 + * ); | |
| 87 + * | |
| 88 + * to recover the data from teh table, you could do something like: | |
| 89 + * | |
| 90 + * -- Attach another database, the original is not trustworthy. | |
| 91 + * ATTACH DATABASE '/tmp/db.db' AS rdb; | |
| 92 + * -- Create a new version of the table. | |
| 93 + * CREATE TABLE rdb.Stuff ( | |
| 94 + * name TEXT PRIMARY KEY, | |
| 95 + * value TEXT NOT NULL | |
| 96 + * ); | |
| 97 + * -- This will read the original table's data. | |
| 98 + * CREATE VIRTUAL TABLE temp.recover_Stuff using recover( | |
| 99 + * main.Stuff, | |
| 100 + * name TEXT STRICT NOT NULL, -- only real TEXT data allowed | |
| 101 + * value TEXT STRICT NOT NULL | |
| 102 + * ); | |
| 103 + * -- Corruption means the UNIQUE constraint may no longer hold for | |
| 104 + * -- Stuff, so either OR REPLACE or OR IGNORE must be used. | |
| 105 + * INSERT OR REPLACE INTO rdb.Stuff (rowid, name, value ) | |
| 106 + * SELECT rowid, name, value FROM temp.recover_Stuff; | |
| 107 + * DROP TABLE temp.recover_Stuff; | |
| 108 + * DETACH DATABASE rdb; | |
| 109 + * -- Move db.db to replace original db in filesystem. | |
| 110 + * | |
| 111 + * | |
| 112 + * Usage | |
| 113 + * | |
| 114 + * Given the goal of dealing with corruption, it would not be safe to | |
| 115 + * create a recovery table in the database being recovered. So | |
| 116 + * recovery tables must be created in the temp database. They are not | |
| 117 + * appropriate to persist, in any case. [As a bonus, sqlite_master | |
| 118 + * tables can be recovered. Perhaps more cute than useful, though.] | |
| 119 + * | |
| 120 + * The parameters are a specifier for the table to read, and a column | |
| 121 + * definition for each bit of data stored in that table. The named | |
| 122 + * table must be convertable to a root page number by reading the | |
| 123 + * sqlite_master table. Bare table names are assumed to be in | |
| 124 + * database 0 ("main"), other databases can be specified in db.table | |
| 125 + * fashion. | |
| 126 + * | |
| 127 + * Column definitions are similar to BUT NOT THE SAME AS those | |
| 128 + * provided to CREATE statements: | |
| 129 + * column-def: column-name [type-name [STRICT] [NOT NULL]] | |
| 130 + * type-name: (ANY|ROWID|INTEGER|FLOAT|NUMERIC|TEXT|BLOB) | |
| 131 + * | |
| 132 + * Only those exact type names are accepted, there is no type | |
| 133 + * intuition. The only constraints accepted are STRICT (see below) | |
| 134 + * and NOT NULL. Anything unexpected will cause the create to fail. | |
| 135 + * | |
| 136 + * ANY is a convenience to indicate that manifest typing is desired. | |
| 137 + * It is equivalent to not specifying a type at all. The results for | |
| 138 + * such columns will have the type of the data's storage. The exposed | |
| 139 + * schema will contain no type for that column. | |
| 140 + * | |
| 141 + * ROWID is used for columns representing aliases to the rowid | |
| 142 + * (INTEGER PRIMARY KEY, with or without AUTOINCREMENT), to make the | |
| 143 + * concept explicit. Such columns are actually stored as NULL, so | |
| 144 + * they cannot be simply ignored. The exposed schema will be INTEGER | |
| 145 + * for that column. | |
| 146 + * | |
| 147 + * NOT NULL causes rows with a NULL in that column to be skipped. It | |
| 148 + * also adds NOT NULL to the column in the exposed schema. If the | |
| 149 + * table has ever had columns added using ALTER TABLE, then those | |
| 150 + * columns implicitly contain NULL for rows which have not been | |
| 151 + * updated. [Workaround using COALESCE() in your SELECT statement.] | |
| 152 + * | |
| 153 + * The created table is read-only, with no indices. Any SELECT will | |
| 154 + * be a full-table scan, returning each valid row read from the | |
| 155 + * storage of the backing table. The rowid will be the rowid of the | |
| 156 + * row from the backing table. "Valid" means: | |
| 157 + * - The cell metadata for the row is well-formed. Mainly this means that | |
| 158 + * the cell header info describes a payload of the size indicated by | |
| 159 + * the cell's payload size. | |
| 160 + * - The cell does not run off the page. | |
| 161 + * - The cell does not overlap any other cell on the page. | |
| 162 + * - The cell contains doesn't contain too many columns. | |
| 163 + * - The types of the serialized data match the indicated types (see below). | |
| 164 + * | |
| 165 + * | |
| 166 + * Type affinity versus type storage. | |
| 167 + * | |
| 168 + * http://www.sqlite.org/datatype3.html describes SQLite's type | |
| 169 + * affinity system. The system provides for automated coercion of | |
| 170 + * types in certain cases, transparently enough that many developers | |
| 171 + * do not realize that it is happening. Importantly, it implies that | |
| 172 + * the raw data stored in the database may not have the obvious type. | |
| 173 + * | |
| 174 + * Differences between the stored data types and the expected data | |
| 175 + * types may be a signal of corruption. This module makes some | |
| 176 + * allowances for automatic coercion. It is important to be concious | |
| 177 + * of the difference between the schema exposed by the module, and the | |
| 178 + * data types read from storage. The following table describes how | |
| 179 + * the module interprets things: | |
| 180 + * | |
| 181 + * type schema data STRICT | |
| 182 + * ---- ------ ---- ------ | |
| 183 + * ANY <none> any any | |
| 184 + * ROWID INTEGER n/a n/a | |
| 185 + * INTEGER INTEGER integer integer | |
| 186 + * FLOAT FLOAT integer or float float | |
| 187 + * NUMERIC NUMERIC integer, float, or text integer or float | |
| 188 + * TEXT TEXT text or blob text | |
| 189 + * BLOB BLOB blob blob | |
| 190 + * | |
| 191 + * type is the type provided to the recover module, schema is the | |
| 192 + * schema exposed by the module, data is the acceptable types of data | |
| 193 + * decoded from storage, and STRICT is a modification of that. | |
| 194 + * | |
| 195 + * A very loose recovery system might use ANY for all columns, then | |
| 196 + * use the appropriate sqlite3_column_*() calls to coerce to expected | |
| 197 + * types. This doesn't provide much protection if a page from a | |
| 198 + * different table with the same column count is linked into an | |
| 199 + * inappropriate btree. | |
| 200 + * | |
| 201 + * A very tight recovery system might use STRICT to enforce typing on | |
| 202 + * all columns, preferring to skip rows which are valid at the storage | |
| 203 + * level but don't contain the right types. Note that FLOAT STRICT is | |
| 204 + * almost certainly not appropriate, since integral values are | |
| 205 + * transparently stored as integers, when that is more efficient. | |
| 206 + * | |
| 207 + * Another option is to use ANY for all columns and inspect each | |
| 208 + * result manually (using sqlite3_column_*). This should only be | |
| 209 + * necessary in cases where developers have used manifest typing (test | |
| 210 + * to make sure before you decide that you aren't using manifest | |
| 211 + * typing!). | |
| 212 + * | |
| 213 + * | |
| 214 + * Caveats | |
| 215 + * | |
| 216 + * Leaf pages not referenced by interior nodes will not be found. | |
| 217 + * | |
| 218 + * Leaf pages referenced from interior nodes of other tables will not | |
| 219 + * be resolved. | |
| 220 + * | |
| 221 + * Rows referencing invalid overflow pages will be skipped. | |
| 222 + * | |
| 223 + * SQlite rows have a header which describes how to interpret the rest | |
| 224 + * of the payload. The header can be valid in cases where the rest of | |
| 225 + * the record is actually corrupt (in the sense that the data is not | |
| 226 + * the intended data). This can especially happen WRT overflow pages, | |
| 227 + * as lack of atomic updates between pages is the primary form of | |
| 228 + * corruption I have seen in the wild. | |
| 229 + */ | |
| 230 +/* The implementation is via a series of cursors. The cursor | |
| 231 + * implementations follow the pattern: | |
| 232 + * | |
| 233 + * // Creates the cursor using various initialization info. | |
| 234 + * int cursorCreate(...); | |
| 235 + * | |
| 236 + * // Returns 1 if there is no more data, 0 otherwise. | |
| 237 + * int cursorEOF(Cursor *pCursor); | |
| 238 + * | |
| 239 + * // Various accessors can be used if not at EOF. | |
| 240 + * | |
| 241 + * // Move to the next item. | |
| 242 + * int cursorNext(Cursor *pCursor); | |
| 243 + * | |
| 244 + * // Destroy the memory associated with the cursor. | |
| 245 + * void cursorDestroy(Cursor *pCursor); | |
| 246 + * | |
| 247 + * References in the following are to sections at | |
| 248 + * http://www.sqlite.org/fileformat2.html . | |
| 249 + * | |
| 250 + * RecoverLeafCursor iterates the records in a leaf table node | |
| 251 + * described in section 1.5 "B-tree Pages". When the node is | |
| 252 + * exhausted, an interior cursor is used to get the next leaf node, | |
| 253 + * and iteration continues there. | |
| 254 + * | |
| 255 + * RecoverInteriorCursor iterates the child pages in an interior table | |
| 256 + * node described in section 1.5 "B-tree Pages". When the node is | |
| 257 + * exhausted, a parent interior cursor is used to get the next | |
| 258 + * interior node at the same level, and iteration continues there. | |
| 259 + * | |
| 260 + * Together these record the path from the leaf level to the root of | |
| 261 + * the tree. Iteration happens from the leaves rather than the root | |
| 262 + * both for efficiency and putting the special case at the front of | |
| 263 + * the list is easier to implement. | |
| 264 + * | |
| 265 + * RecoverCursor uses a RecoverLeafCursor to iterate the rows of a | |
| 266 + * table, returning results via the SQLite virtual table interface. | |
| 267 + */ | |
| 268 +/* TODO(shess): It might be useful to allow DEFAULT in types to | |
| 269 + * specify what to do for NULL when an ALTER TABLE case comes up. | |
| 270 + * Unfortunately, simply adding it to the exposed schema and using | |
| 271 + * sqlite3_result_null() does not cause the default to be generate. | |
| 272 + * Handling it ourselves seems hard, unfortunately. | |
| 273 + */ | |
| 274 + | |
| 275 +#include <assert.h> | |
| 276 +#include <ctype.h> | |
| 277 +#include <stdio.h> | |
| 278 +#include <string.h> | |
| 279 + | |
| 280 +/* Internal SQLite things that are used: | |
| 281 + * u32, u64, i64 types. | |
| 282 + * Btree, Pager, and DbPage structs. | |
| 283 + * DbPage.pData, .pPager, and .pgno | |
| 284 + * sqlite3 struct. | |
| 285 + * sqlite3BtreePager() and sqlite3BtreeGetPageSize() | |
| 286 + * sqlite3PagerAcquire() and sqlite3PagerUnref() | |
| 287 + * getVarint(). | |
| 288 + */ | |
| 289 +#include "sqliteInt.h" | |
| 290 + | |
| 291 +/* For debugging. */ | |
| 292 +#if 0 | |
| 293 +#define FNENTRY() fprintf(stderr, "In %s\n", __FUNCTION__) | |
| 294 +#else | |
| 295 +#define FNENTRY() | |
| 296 +#endif | |
| 297 + | |
| 298 +/* Generic constants and helper functions. */ | |
| 299 + | |
| 300 +static const unsigned char kTableLeafPage = 0x0D; | |
| 301 +static const unsigned char kTableInteriorPage = 0x05; | |
| 302 + | |
| 303 +/* From section 1.5. */ | |
| 304 +static const unsigned kiPageTypeOffset = 0; | |
| 305 +static const unsigned kiPageFreeBlockOffset = 1; | |
| 306 +static const unsigned kiPageCellCountOffset = 3; | |
| 307 +static const unsigned kiPageCellContentOffset = 5; | |
| 308 +static const unsigned kiPageFragmentedBytesOffset = 7; | |
| 309 +static const unsigned knPageLeafHeaderBytes = 8; | |
| 310 +/* Interior pages contain an additional field. */ | |
| 311 +static const unsigned kiPageRightChildOffset = 8; | |
| 312 +static const unsigned kiPageInteriorHeaderBytes = 12; | |
| 313 + | |
| 314 +/* Accepted types are specified by a mask. */ | |
| 315 +#define MASK_ROWID (1<<0) | |
| 316 +#define MASK_INTEGER (1<<1) | |
| 317 +#define MASK_FLOAT (1<<2) | |
| 318 +#define MASK_TEXT (1<<3) | |
| 319 +#define MASK_BLOB (1<<4) | |
| 320 +#define MASK_NULL (1<<5) | |
| 321 + | |
| 322 +/* Helpers to decode fixed-size fields. */ | |
| 323 +static u32 decodeUnsigned16(const unsigned char *pData){ | |
| 324 + return (pData[0]<<8) + pData[1]; | |
| 325 +} | |
| 326 +static u32 decodeUnsigned32(const unsigned char *pData){ | |
| 327 + return (decodeUnsigned16(pData)<<16) + decodeUnsigned16(pData+2); | |
| 328 +} | |
| 329 +static i64 decodeSigned(const unsigned char *pData, unsigned nBytes){ | |
| 330 + i64 r = (char)(*pData); | |
| 331 + while( --nBytes ){ | |
| 332 + r <<= 8; | |
| 333 + r += *(++pData); | |
| 334 + } | |
| 335 + return r; | |
| 336 +} | |
| 337 +/* Derived from vdbeaux.c, sqlite3VdbeSerialGet(), case 7. */ | |
| 338 +/* TODO(shess): Determine if swapMixedEndianFloat() applies. */ | |
| 339 +static double decodeFloat64(const unsigned char *pData){ | |
| 340 +#if !defined(NDEBUG) | |
| 341 + static const u64 t1 = ((u64)0x3ff00000)<<32; | |
| 342 + static const double r1 = 1.0; | |
| 343 + u64 t2 = t1; | |
| 344 + assert( sizeof(r1)==sizeof(t2) && memcmp(&r1, &t2, sizeof(r1))==0 ); | |
| 345 +#endif | |
| 346 + i64 x = decodeSigned(pData, 8); | |
| 347 + double d; | |
| 348 + memcpy(&d, &x, sizeof(x)); | |
| 349 + return d; | |
| 350 +} | |
| 351 + | |
| 352 +/* Return true if a varint can safely be read from pData/nData. */ | |
| 353 +/* TODO(shess): DbPage points into the middle of a buffer which | |
| 354 + * contains the page data before DbPage. So code should always be | |
| 355 + * able to read a small number of varints safely. Consider whether to | |
| 356 + * trust that or not. | |
| 357 + */ | |
| 358 +static int checkVarint(const unsigned char *pData, unsigned nData){ | |
| 359 + unsigned i; | |
| 360 + | |
| 361 + /* In the worst case the decoder takes all 8 bits of the 9th byte. */ | |
| 362 + if( nData>=9 ){ | |
| 363 + return 1; | |
| 364 + } | |
| 365 + | |
| 366 + /* Look for a high-bit-clear byte in what's left. */ | |
| 367 + for( i=0; i<nData; ++i ){ | |
| 368 + if( !(pData[i]&0x80) ){ | |
| 369 + return 1; | |
| 370 + } | |
| 371 + } | |
| 372 + | |
| 373 + /* Cannot decode in the space given. */ | |
| 374 + return 0; | |
| 375 +} | |
| 376 + | |
| 377 +/* Return 1 if n varints can be read from pData/nData. */ | |
| 378 +static int checkVarints(const unsigned char *pData, unsigned nData, | |
| 379 + unsigned n){ | |
| 380 + unsigned nCur = 0; /* Byte offset within current varint. */ | |
| 381 + unsigned nFound = 0; /* Number of varints found. */ | |
| 382 + unsigned i; | |
| 383 + | |
| 384 + /* In the worst case the decoder takes all 8 bits of the 9th byte. */ | |
| 385 + if( nData>=9*n ){ | |
| 386 + return 1; | |
| 387 + } | |
| 388 + | |
| 389 + for( i=0; nFound<n && i<nData; ++i ){ | |
| 390 + nCur++; | |
| 391 + if( nCur==9 || !(pData[i]&0x80) ){ | |
| 392 + nFound++; | |
| 393 + nCur = 0; | |
| 394 + } | |
| 395 + } | |
| 396 + | |
| 397 + return nFound==n; | |
| 398 +} | |
| 399 + | |
| 400 +/* ctype and str[n]casecmp() can be affected by locale (eg, tr_TR). | |
| 401 + * These versions consider only the ASCII space. | |
| 402 + */ | |
| 403 +/* TODO(shess): It may be reasonable to just remove the need for these | |
| 404 + * entirely. The module could require "TEXT STRICT NOT NULL", not | |
| 405 + * "Text Strict Not Null" or whatever the developer felt like typing | |
| 406 + * that day. Handling corrupt data is a PERFECT place to be pedantic. | |
| 407 + */ | |
| 408 +static int ascii_isspace(char c){ | |
| 409 + /* From fts3_expr.c */ | |
| 410 + return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; | |
| 411 +} | |
| 412 +static int ascii_isalnum(int x){ | |
| 413 + /* From fts3_tokenizer1.c */ | |
| 414 + return (x>='0' && x<='9') || (x>='A' && x<='Z') || (x>='a' && x<='z'); | |
| 415 +} | |
| 416 +static int ascii_tolower(int x){ | |
| 417 + /* From fts3_tokenizer1.c */ | |
| 418 + return (x>='A' && x<='Z') ? x-'A'+'a' : x; | |
| 419 +} | |
| 420 +/* TODO(shess): Consider sqlite3_strnicmp() */ | |
| 421 +static int ascii_strncasecmp(const char *s1, const char *s2, size_t n){ | |
| 422 + const unsigned char *us1 = (const unsigned char *)s1; | |
| 423 + const unsigned char *us2 = (const unsigned char *)s2; | |
| 424 + while( *us1 && *us2 && n && ascii_tolower(*us1)==ascii_tolower(*us2) ){ | |
| 425 + us1++, us2++, n--; | |
| 426 + } | |
| 427 + return n ? ascii_tolower(*us1)-ascii_tolower(*us2) : 0; | |
| 428 +} | |
| 429 +static int ascii_strcasecmp(const char *s1, const char *s2){ | |
| 430 + /* If s2 is equal through strlen(s1), will exit while() due to s1's | |
| 431 + * trailing NUL, and return NUL-s2[strlen(s1)]. | |
| 432 + */ | |
| 433 + return ascii_strncasecmp(s1, s2, strlen(s1)+1); | |
| 434 +} | |
| 435 + | |
| 436 +/* For some reason I kept making mistakes with offset calculations. */ | |
| 437 +static const unsigned char *PageData(DbPage *pPage, unsigned iOffset){ | |
| 438 + assert( iOffset<=pPage->nPageSize ); | |
| 439 + return (unsigned char *)pPage->pData + iOffset; | |
| 440 +} | |
| 441 + | |
| 442 +/* The first page in the file contains a file header in the first 100 | |
| 443 + * bytes. The page's header information comes after that. Note that | |
| 444 + * the offsets in the page's header information are relative to the | |
| 445 + * beginning of the page, NOT the end of the page header. | |
| 446 + */ | |
| 447 +static const unsigned char *PageHeader(DbPage *pPage){ | |
| 448 + if( pPage->pgno==1 ){ | |
| 449 + const unsigned nDatabaseHeader = 100; | |
| 450 + return PageData(pPage, nDatabaseHeader); | |
| 451 + }else{ | |
| 452 + return PageData(pPage, 0); | |
| 453 + } | |
| 454 +} | |
| 455 + | |
| 456 +/* Helper to fetch the pager and page size for the named database. */ | |
| 457 +static int GetPager(sqlite3 *db, const char *zName, | |
| 458 + Pager **pPager, unsigned *pnPageSize){ | |
| 459 + Btree *pBt = NULL; | |
| 460 + int i; | |
| 461 + for( i=0; i<db->nDb; ++i ){ | |
| 462 + if( ascii_strcasecmp(db->aDb[i].zName, zName)==0 ){ | |
| 463 + pBt = db->aDb[i].pBt; | |
| 464 + break; | |
| 465 + } | |
| 466 + } | |
| 467 + if( !pBt ){ | |
| 468 + return SQLITE_ERROR; | |
| 469 + } | |
| 470 + | |
| 471 + *pPager = sqlite3BtreePager(pBt); | |
| 472 + *pnPageSize = sqlite3BtreeGetPageSize(pBt) - sqlite3BtreeGetReserve(pBt); | |
| 473 + return SQLITE_OK; | |
| 474 +} | |
| 475 + | |
| 476 +/* iSerialType is a type read from a record header. See "2.1 Record Format". | |
| 477 + */ | |
| 478 + | |
| 479 +/* Storage size of iSerialType in bytes. My interpretation of SQLite | |
| 480 + * documentation is that text and blob fields can have 32-bit length. | |
| 481 + * Values past 2^31-12 will need more than 32 bits to encode, which is | |
| 482 + * why iSerialType is u64. | |
| 483 + */ | |
| 484 +static u32 SerialTypeLength(u64 iSerialType){ | |
| 485 + switch( iSerialType ){ | |
| 486 + case 0 : return 0; /* NULL */ | |
| 487 + case 1 : return 1; /* Various integers. */ | |
| 488 + case 2 : return 2; | |
| 489 + case 3 : return 3; | |
| 490 + case 4 : return 4; | |
| 491 + case 5 : return 6; | |
| 492 + case 6 : return 8; | |
| 493 + case 7 : return 8; /* 64-bit float. */ | |
| 494 + case 8 : return 0; /* Constant 0. */ | |
| 495 + case 9 : return 0; /* Constant 1. */ | |
| 496 + case 10 : case 11 : assert( !"RESERVED TYPE"); return 0; | |
| 497 + } | |
| 498 + return (u32)((iSerialType>>1) - 6); | |
| 499 +} | |
| 500 + | |
| 501 +/* True if iSerialType refers to a blob. */ | |
| 502 +static int SerialTypeIsBlob(u64 iSerialType){ | |
| 503 + assert( iSerialType>=12 ); | |
| 504 + return (iSerialType%2)==0; | |
| 505 +} | |
| 506 + | |
| 507 +/* Returns true if the serialized type represented by iSerialType is | |
| 508 + * compatible with the given type mask. | |
| 509 + */ | |
| 510 +static int SerialTypeIsCompatible(u64 iSerialType, unsigned char mask){ | |
| 511 + switch( iSerialType ){ | |
| 512 + case 0 : return (mask&MASK_NULL)!=0; | |
| 513 + case 1 : return (mask&MASK_INTEGER)!=0; | |
| 514 + case 2 : return (mask&MASK_INTEGER)!=0; | |
| 515 + case 3 : return (mask&MASK_INTEGER)!=0; | |
| 516 + case 4 : return (mask&MASK_INTEGER)!=0; | |
| 517 + case 5 : return (mask&MASK_INTEGER)!=0; | |
| 518 + case 6 : return (mask&MASK_INTEGER)!=0; | |
| 519 + case 7 : return (mask&MASK_FLOAT)!=0; | |
| 520 + case 8 : return (mask&MASK_INTEGER)!=0; | |
| 521 + case 9 : return (mask&MASK_INTEGER)!=0; | |
| 522 + case 10 : assert( !"RESERVED TYPE"); return 0; | |
| 523 + case 11 : assert( !"RESERVED TYPE"); return 0; | |
| 524 + } | |
| 525 + return (mask&(SerialTypeIsBlob(iSerialType) ? MASK_BLOB : MASK_TEXT)); | |
| 526 +} | |
| 527 + | |
| 528 +/* Versions of strdup() with return values appropriate for | |
| 529 + * sqlite3_free(). malloc.c has sqlite3DbStrDup()/NDup(), but those | |
| 530 + * need sqlite3DbFree(), which seems intrusive. | |
| 531 + */ | |
| 532 +static char *sqlite3_strndup(const char *z, unsigned n){ | |
| 533 + char *zNew; | |
| 534 + | |
| 535 + if( z==NULL ){ | |
| 536 + return NULL; | |
| 537 + } | |
| 538 + | |
| 539 + zNew = sqlite3_malloc(n+1); | |
| 540 + if( zNew!=NULL ){ | |
| 541 + memcpy(zNew, z, n); | |
| 542 + zNew[n] = '\0'; | |
| 543 + } | |
| 544 + return zNew; | |
| 545 +} | |
| 546 +static char *sqlite3_strdup(const char *z){ | |
| 547 + if( z==NULL ){ | |
| 548 + return NULL; | |
| 549 + } | |
| 550 + return sqlite3_strndup(z, strlen(z)); | |
| 551 +} | |
| 552 + | |
| 553 +/* Fetch the page number of zTable in zDb from sqlite_master in zDb, | |
| 554 + * and put it in *piRootPage. | |
| 555 + */ | |
| 556 +static int getRootPage(sqlite3 *db, const char *zDb, const char *zTable, | |
| 557 + u32 *piRootPage){ | |
| 558 + char *zSql; /* SQL selecting root page of named element. */ | |
| 559 + sqlite3_stmt *pStmt; | |
| 560 + int rc; | |
| 561 + | |
| 562 + if( strcmp(zTable, "sqlite_master")==0 ){ | |
| 563 + *piRootPage = 1; | |
| 564 + return SQLITE_OK; | |
| 565 + } | |
| 566 + | |
| 567 + zSql = sqlite3_mprintf("SELECT rootpage FROM %s.sqlite_master " | |
| 568 + "WHERE type = 'table' AND tbl_name = %Q", | |
| 569 + zDb, zTable); | |
| 570 + if( !zSql ){ | |
| 571 + return SQLITE_NOMEM; | |
| 572 + } | |
| 573 + | |
| 574 + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); | |
| 575 + sqlite3_free(zSql); | |
| 576 + if( rc!=SQLITE_OK ){ | |
| 577 + return rc; | |
| 578 + } | |
| 579 + | |
| 580 + /* Require a result. */ | |
| 581 + rc = sqlite3_step(pStmt); | |
| 582 + if( rc==SQLITE_DONE ){ | |
| 583 + rc = SQLITE_CORRUPT; | |
| 584 + }else if( rc==SQLITE_ROW ){ | |
| 585 + *piRootPage = sqlite3_column_int(pStmt, 0); | |
| 586 + | |
| 587 + /* Require only one result. */ | |
| 588 + rc = sqlite3_step(pStmt); | |
| 589 + if( rc==SQLITE_DONE ){ | |
| 590 + rc = SQLITE_OK; | |
| 591 + }else if( rc==SQLITE_ROW ){ | |
| 592 + rc = SQLITE_CORRUPT; | |
| 593 + } | |
| 594 + } | |
| 595 + sqlite3_finalize(pStmt); | |
| 596 + return rc; | |
| 597 +} | |
| 598 + | |
| 599 +static int getEncoding(sqlite3 *db, const char *zDb, int* piEncoding){ | |
| 600 + sqlite3_stmt *pStmt; | |
| 601 + int rc; | |
| 602 + char *zSql = sqlite3_mprintf("PRAGMA %s.encoding", zDb); | |
| 603 + if( !zSql ){ | |
| 604 + return SQLITE_NOMEM; | |
| 605 + } | |
| 606 + | |
| 607 + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); | |
| 608 + sqlite3_free(zSql); | |
| 609 + if( rc!=SQLITE_OK ){ | |
| 610 + return rc; | |
| 611 + } | |
| 612 + | |
| 613 + /* Require a result. */ | |
| 614 + rc = sqlite3_step(pStmt); | |
| 615 + if( rc==SQLITE_DONE ){ | |
| 616 + /* This case should not be possible. */ | |
| 617 + rc = SQLITE_CORRUPT; | |
| 618 + }else if( rc==SQLITE_ROW ){ | |
| 619 + if( sqlite3_column_type(pStmt, 0)==SQLITE_TEXT ){ | |
| 620 + const char* z = (const char *)sqlite3_column_text(pStmt, 0); | |
| 621 + /* These strings match the literals in pragma.c. */ | |
| 622 + if( !strcmp(z, "UTF-16le") ){ | |
| 623 + *piEncoding = SQLITE_UTF16LE; | |
| 624 + }else if( !strcmp(z, "UTF-16be") ){ | |
| 625 + *piEncoding = SQLITE_UTF16BE; | |
| 626 + }else if( !strcmp(z, "UTF-8") ){ | |
| 627 + *piEncoding = SQLITE_UTF8; | |
| 628 + }else{ | |
| 629 + /* This case should not be possible. */ | |
| 630 + *piEncoding = SQLITE_UTF8; | |
| 631 + } | |
| 632 + }else{ | |
| 633 + /* This case should not be possible. */ | |
| 634 + *piEncoding = SQLITE_UTF8; | |
| 635 + } | |
| 636 + | |
| 637 + /* Require only one result. */ | |
| 638 + rc = sqlite3_step(pStmt); | |
| 639 + if( rc==SQLITE_DONE ){ | |
| 640 + rc = SQLITE_OK; | |
| 641 + }else if( rc==SQLITE_ROW ){ | |
| 642 + /* This case should not be possible. */ | |
| 643 + rc = SQLITE_CORRUPT; | |
| 644 + } | |
| 645 + } | |
| 646 + sqlite3_finalize(pStmt); | |
| 647 + return rc; | |
| 648 +} | |
| 649 + | |
| 650 +/* Cursor for iterating interior nodes. Interior page cells contain a | |
| 651 + * child page number and a rowid. The child page contains items left | |
| 652 + * of the rowid (less than). The rightmost page of the subtree is | |
| 653 + * stored in the page header. | |
| 654 + * | |
| 655 + * interiorCursorDestroy - release all resources associated with the | |
| 656 + * cursor and any parent cursors. | |
| 657 + * interiorCursorCreate - create a cursor with the given parent and page. | |
| 658 + * interiorCursorEOF - returns true if neither the cursor nor the | |
| 659 + * parent cursors can return any more data. | |
| 660 + * interiorCursorNextPage - fetch the next child page from the cursor. | |
| 661 + * | |
| 662 + * Logically, interiorCursorNextPage() returns the next child page | |
| 663 + * number from the page the cursor is currently reading, calling the | |
| 664 + * parent cursor as necessary to get new pages to read, until done. | |
| 665 + * SQLITE_ROW if a page is returned, SQLITE_DONE if out of pages, | |
| 666 + * error otherwise. Unfortunately, if the table is corrupted | |
| 667 + * unexpected pages can be returned. If any unexpected page is found, | |
| 668 + * leaf or otherwise, it is returned to the caller for processing, | |
| 669 + * with the interior cursor left empty. The next call to | |
| 670 + * interiorCursorNextPage() will recurse to the parent cursor until an | |
| 671 + * interior page to iterate is returned. | |
| 672 + * | |
| 673 + * Note that while interiorCursorNextPage() will refuse to follow | |
| 674 + * loops, it does not keep track of pages returned for purposes of | |
| 675 + * preventing duplication. | |
| 676 + * | |
| 677 + * Note that interiorCursorEOF() could return false (not at EOF), and | |
| 678 + * interiorCursorNextPage() could still return SQLITE_DONE. This | |
| 679 + * could happen if there are more cells to iterate in an interior | |
| 680 + * page, but those cells refer to invalid pages. | |
| 681 + */ | |
| 682 +typedef struct RecoverInteriorCursor RecoverInteriorCursor; | |
| 683 +struct RecoverInteriorCursor { | |
| 684 + RecoverInteriorCursor *pParent; /* Parent node to this node. */ | |
| 685 + DbPage *pPage; /* Reference to leaf page. */ | |
| 686 + unsigned nPageSize; /* Size of page. */ | |
| 687 + unsigned nChildren; /* Number of children on the page. */ | |
| 688 + unsigned iChild; /* Index of next child to return. */ | |
| 689 +}; | |
| 690 + | |
| 691 +static void interiorCursorDestroy(RecoverInteriorCursor *pCursor){ | |
| 692 + /* Destroy all the cursors to the root. */ | |
| 693 + while( pCursor ){ | |
| 694 + RecoverInteriorCursor *p = pCursor; | |
| 695 + pCursor = pCursor->pParent; | |
| 696 + | |
| 697 + if( p->pPage ){ | |
| 698 + sqlite3PagerUnref(p->pPage); | |
| 699 + p->pPage = NULL; | |
| 700 + } | |
| 701 + | |
| 702 + memset(p, 0xA5, sizeof(*p)); | |
| 703 + sqlite3_free(p); | |
| 704 + } | |
| 705 +} | |
| 706 + | |
| 707 +/* Internal helper. Reset storage in preparation for iterating pPage. */ | |
| 708 +static void interiorCursorSetPage(RecoverInteriorCursor *pCursor, | |
| 709 + DbPage *pPage){ | |
| 710 + assert( PageHeader(pPage)[kiPageTypeOffset]==kTableInteriorPage ); | |
| 711 + | |
| 712 + if( pCursor->pPage ){ | |
| 713 + sqlite3PagerUnref(pCursor->pPage); | |
| 714 + pCursor->pPage = NULL; | |
| 715 + } | |
| 716 + pCursor->pPage = pPage; | |
| 717 + pCursor->iChild = 0; | |
| 718 + | |
| 719 + /* A child for each cell, plus one in the header. */ | |
| 720 + /* TODO(shess): Sanity-check the count? Page header plus per-cell | |
| 721 + * cost of 16-bit offset, 32-bit page number, and one varint | |
| 722 + * (minimum 1 byte). | |
| 723 + */ | |
| 724 + pCursor->nChildren = decodeUnsigned16(PageHeader(pPage) + | |
| 725 + kiPageCellCountOffset) + 1; | |
| 726 +} | |
| 727 + | |
| 728 +static int interiorCursorCreate(RecoverInteriorCursor *pParent, | |
| 729 + DbPage *pPage, int nPageSize, | |
| 730 + RecoverInteriorCursor **ppCursor){ | |
| 731 + RecoverInteriorCursor *pCursor = | |
| 732 + sqlite3_malloc(sizeof(RecoverInteriorCursor)); | |
| 733 + if( !pCursor ){ | |
| 734 + return SQLITE_NOMEM; | |
| 735 + } | |
| 736 + | |
| 737 + memset(pCursor, 0, sizeof(*pCursor)); | |
| 738 + pCursor->pParent = pParent; | |
| 739 + pCursor->nPageSize = nPageSize; | |
| 740 + interiorCursorSetPage(pCursor, pPage); | |
| 741 + *ppCursor = pCursor; | |
| 742 + return SQLITE_OK; | |
| 743 +} | |
| 744 + | |
| 745 +/* Internal helper. Return the child page number at iChild. */ | |
| 746 +static unsigned interiorCursorChildPage(RecoverInteriorCursor *pCursor){ | |
| 747 + const unsigned char *pPageHeader; /* Header of the current page. */ | |
| 748 + const unsigned char *pCellOffsets; /* Offset to page's cell offsets. */ | |
| 749 + unsigned iCellOffset; /* Offset of target cell. */ | |
| 750 + | |
| 751 + assert( pCursor->iChild<pCursor->nChildren ); | |
| 752 + | |
| 753 + /* Rightmost child is in the header. */ | |
| 754 + pPageHeader = PageHeader(pCursor->pPage); | |
| 755 + if( pCursor->iChild==pCursor->nChildren-1 ){ | |
| 756 + return decodeUnsigned32(pPageHeader + kiPageRightChildOffset); | |
| 757 + } | |
| 758 + | |
| 759 + /* Each cell is a 4-byte integer page number and a varint rowid | |
| 760 + * which is greater than the rowid of items in that sub-tree (this | |
| 761 + * module ignores ordering). The offset is from the beginning of the | |
| 762 + * page, not from the page header. | |
| 763 + */ | |
| 764 + pCellOffsets = pPageHeader + kiPageInteriorHeaderBytes; | |
| 765 + iCellOffset = decodeUnsigned16(pCellOffsets + pCursor->iChild*2); | |
| 766 + if( iCellOffset<=pCursor->nPageSize-4 ){ | |
| 767 + return decodeUnsigned32(PageData(pCursor->pPage, iCellOffset)); | |
| 768 + } | |
| 769 + | |
| 770 + /* TODO(shess): Check for cell overlaps? Cells require 4 bytes plus | |
| 771 + * a varint. Check could be identical to leaf check (or even a | |
| 772 + * shared helper testing for "Cells starting in this range"?). | |
| 773 + */ | |
| 774 + | |
| 775 + /* If the offset is broken, return an invalid page number. */ | |
| 776 + return 0; | |
| 777 +} | |
| 778 + | |
| 779 +static int interiorCursorEOF(RecoverInteriorCursor *pCursor){ | |
| 780 + /* Find a parent with remaining children. EOF if none found. */ | |
| 781 + while( pCursor && pCursor->iChild>=pCursor->nChildren ){ | |
| 782 + pCursor = pCursor->pParent; | |
| 783 + } | |
| 784 + return pCursor==NULL; | |
| 785 +} | |
| 786 + | |
| 787 +/* Internal helper. Used to detect if iPage would cause a loop. */ | |
| 788 +static int interiorCursorPageInUse(RecoverInteriorCursor *pCursor, | |
| 789 + unsigned iPage){ | |
| 790 + /* Find any parent using the indicated page. */ | |
| 791 + while( pCursor && pCursor->pPage->pgno!=iPage ){ | |
| 792 + pCursor = pCursor->pParent; | |
| 793 + } | |
| 794 + return pCursor!=NULL; | |
| 795 +} | |
| 796 + | |
| 797 +/* Get the next page from the interior cursor at *ppCursor. Returns | |
| 798 + * SQLITE_ROW with the page in *ppPage, or SQLITE_DONE if out of | |
| 799 + * pages, or the error SQLite returned. | |
| 800 + * | |
| 801 + * If the tree is uneven, then when the cursor attempts to get a new | |
| 802 + * interior page from the parent cursor, it may get a non-interior | |
| 803 + * page. In that case, the new page is returned, and *ppCursor is | |
| 804 + * updated to point to the parent cursor (this cursor is freed). | |
| 805 + */ | |
| 806 +/* TODO(shess): I've tried to avoid recursion in most of this code, | |
| 807 + * but this case is more challenging because the recursive call is in | |
| 808 + * the middle of operation. One option for converting it without | |
| 809 + * adding memory management would be to retain the head pointer and | |
| 810 + * use a helper to "back up" as needed. Another option would be to | |
| 811 + * reverse the list during traversal. | |
| 812 + */ | |
| 813 +static int interiorCursorNextPage(RecoverInteriorCursor **ppCursor, | |
| 814 + DbPage **ppPage){ | |
| 815 + RecoverInteriorCursor *pCursor = *ppCursor; | |
| 816 + while( 1 ){ | |
| 817 + int rc; | |
| 818 + const unsigned char *pPageHeader; /* Header of found page. */ | |
| 819 + | |
| 820 + /* Find a valid child page which isn't on the stack. */ | |
| 821 + while( pCursor->iChild<pCursor->nChildren ){ | |
| 822 + const unsigned iPage = interiorCursorChildPage(pCursor); | |
| 823 + pCursor->iChild++; | |
| 824 + if( interiorCursorPageInUse(pCursor, iPage) ){ | |
| 825 + fprintf(stderr, "Loop detected at %d\n", iPage); | |
| 826 + }else{ | |
| 827 + int rc = sqlite3PagerAcquire(pCursor->pPage->pPager, iPage, ppPage, 0); | |
| 828 + if( rc==SQLITE_OK ){ | |
| 829 + return SQLITE_ROW; | |
| 830 + } | |
| 831 + } | |
| 832 + } | |
| 833 + | |
| 834 + /* This page has no more children. Get next page from parent. */ | |
| 835 + if( !pCursor->pParent ){ | |
| 836 + return SQLITE_DONE; | |
| 837 + } | |
| 838 + rc = interiorCursorNextPage(&pCursor->pParent, ppPage); | |
| 839 + if( rc!=SQLITE_ROW ){ | |
| 840 + return rc; | |
| 841 + } | |
| 842 + | |
| 843 + /* If a non-interior page is received, that either means that the | |
| 844 + * tree is uneven, or that a child was re-used (say as an overflow | |
| 845 + * page). Remove this cursor and let the caller handle the page. | |
| 846 + */ | |
| 847 + pPageHeader = PageHeader(*ppPage); | |
| 848 + if( pPageHeader[kiPageTypeOffset]!=kTableInteriorPage ){ | |
| 849 + *ppCursor = pCursor->pParent; | |
| 850 + pCursor->pParent = NULL; | |
| 851 + interiorCursorDestroy(pCursor); | |
| 852 + return SQLITE_ROW; | |
| 853 + } | |
| 854 + | |
| 855 + /* Iterate the new page. */ | |
| 856 + interiorCursorSetPage(pCursor, *ppPage); | |
| 857 + *ppPage = NULL; | |
| 858 + } | |
| 859 + | |
| 860 + assert(NULL); /* NOTREACHED() */ | |
| 861 + return SQLITE_CORRUPT; | |
| 862 +} | |
| 863 + | |
| 864 +/* Large rows are spilled to overflow pages. The row's main page | |
| 865 + * stores the overflow page number after the local payload, with a | |
| 866 + * linked list forward from there as necessary. overflowMaybeCreate() | |
| 867 + * and overflowGetSegment() provide an abstraction for accessing such | |
| 868 + * data while centralizing the code. | |
| 869 + * | |
| 870 + * overflowDestroy - releases all resources associated with the structure. | |
| 871 + * overflowMaybeCreate - create the overflow structure if it is needed | |
| 872 + * to represent the given record. See function comment. | |
| 873 + * overflowGetSegment - fetch a segment from the record, accounting | |
| 874 + * for overflow pages. Segments which are not | |
| 875 + * entirely contained with a page are constructed | |
| 876 + * into a buffer which is returned. See function comment. | |
| 877 + */ | |
| 878 +typedef struct RecoverOverflow RecoverOverflow; | |
| 879 +struct RecoverOverflow { | |
| 880 + RecoverOverflow *pNextOverflow; | |
| 881 + DbPage *pPage; | |
| 882 + unsigned nPageSize; | |
| 883 +}; | |
| 884 + | |
| 885 +static void overflowDestroy(RecoverOverflow *pOverflow){ | |
| 886 + while( pOverflow ){ | |
| 887 + RecoverOverflow *p = pOverflow; | |
| 888 + pOverflow = p->pNextOverflow; | |
| 889 + | |
| 890 + if( p->pPage ){ | |
| 891 + sqlite3PagerUnref(p->pPage); | |
| 892 + p->pPage = NULL; | |
| 893 + } | |
| 894 + | |
| 895 + memset(p, 0xA5, sizeof(*p)); | |
| 896 + sqlite3_free(p); | |
| 897 + } | |
| 898 +} | |
| 899 + | |
| 900 +/* Internal helper. Used to detect if iPage would cause a loop. */ | |
| 901 +static int overflowPageInUse(RecoverOverflow *pOverflow, unsigned iPage){ | |
| 902 + while( pOverflow && pOverflow->pPage->pgno!=iPage ){ | |
| 903 + pOverflow = pOverflow->pNextOverflow; | |
| 904 + } | |
| 905 + return pOverflow!=NULL; | |
| 906 +} | |
| 907 + | |
| 908 +/* Setup to access an nRecordBytes record beginning at iRecordOffset | |
| 909 + * in pPage. If nRecordBytes can be satisfied entirely from pPage, | |
| 910 + * then no overflow pages are needed an *pnLocalRecordBytes is set to | |
| 911 + * nRecordBytes. Otherwise, *ppOverflow is set to the head of a list | |
| 912 + * of overflow pages, and *pnLocalRecordBytes is set to the number of | |
| 913 + * bytes local to pPage. | |
| 914 + * | |
| 915 + * overflowGetSegment() will do the right thing regardless of whether | |
| 916 + * those values are set to be in-page or not. | |
| 917 + */ | |
| 918 +static int overflowMaybeCreate(DbPage *pPage, unsigned nPageSize, | |
| 919 + unsigned iRecordOffset, unsigned nRecordBytes, | |
| 920 + unsigned *pnLocalRecordBytes, | |
| 921 + RecoverOverflow **ppOverflow){ | |
| 922 + unsigned nLocalRecordBytes; /* Record bytes in the leaf page. */ | |
| 923 + unsigned iNextPage; /* Next page number for record data. */ | |
| 924 + unsigned nBytes; /* Maximum record bytes as of current page. */ | |
| 925 + int rc; | |
| 926 + RecoverOverflow *pFirstOverflow; /* First in linked list of pages. */ | |
| 927 + RecoverOverflow *pLastOverflow; /* End of linked list. */ | |
| 928 + | |
| 929 + /* Calculations from the "Table B-Tree Leaf Cell" part of section | |
| 930 + * 1.5 of http://www.sqlite.org/fileformat2.html . maxLocal and | |
| 931 + * minLocal to match naming in btree.c. | |
| 932 + */ | |
| 933 + const unsigned maxLocal = nPageSize - 35; | |
| 934 + const unsigned minLocal = ((nPageSize-12)*32/255)-23; /* m */ | |
| 935 + | |
| 936 + /* Always fit anything smaller than maxLocal. */ | |
| 937 + if( nRecordBytes<=maxLocal ){ | |
| 938 + *pnLocalRecordBytes = nRecordBytes; | |
| 939 + *ppOverflow = NULL; | |
| 940 + return SQLITE_OK; | |
| 941 + } | |
| 942 + | |
| 943 + /* Calculate the remainder after accounting for minLocal on the leaf | |
| 944 + * page and what packs evenly into overflow pages. If the remainder | |
| 945 + * does not fit into maxLocal, then a partially-full overflow page | |
| 946 + * will be required in any case, so store as little as possible locally. | |
| 947 + */ | |
| 948 + nLocalRecordBytes = minLocal+((nRecordBytes-minLocal)%(nPageSize-4)); | |
| 949 + if( maxLocal<nLocalRecordBytes ){ | |
| 950 + nLocalRecordBytes = minLocal; | |
| 951 + } | |
| 952 + | |
| 953 + /* Don't read off the end of the page. */ | |
| 954 + if( iRecordOffset+nLocalRecordBytes+4>nPageSize ){ | |
| 955 + return SQLITE_CORRUPT; | |
| 956 + } | |
| 957 + | |
| 958 + /* First overflow page number is after the local bytes. */ | |
| 959 + iNextPage = | |
| 960 + decodeUnsigned32(PageData(pPage, iRecordOffset + nLocalRecordBytes)); | |
| 961 + nBytes = nLocalRecordBytes; | |
| 962 + | |
| 963 + /* While there are more pages to read, and more bytes are needed, | |
| 964 + * get another page. | |
| 965 + */ | |
| 966 + pFirstOverflow = pLastOverflow = NULL; | |
| 967 + rc = SQLITE_OK; | |
| 968 + while( iNextPage && nBytes<nRecordBytes ){ | |
| 969 + RecoverOverflow *pOverflow; /* New overflow page for the list. */ | |
| 970 + | |
| 971 + rc = sqlite3PagerAcquire(pPage->pPager, iNextPage, &pPage, 0); | |
| 972 + if( rc!=SQLITE_OK ){ | |
| 973 + break; | |
| 974 + } | |
| 975 + | |
| 976 + pOverflow = sqlite3_malloc(sizeof(RecoverOverflow)); | |
| 977 + if( !pOverflow ){ | |
| 978 + sqlite3PagerUnref(pPage); | |
| 979 + rc = SQLITE_NOMEM; | |
| 980 + break; | |
| 981 + } | |
| 982 + memset(pOverflow, 0, sizeof(*pOverflow)); | |
| 983 + pOverflow->pPage = pPage; | |
| 984 + pOverflow->nPageSize = nPageSize; | |
| 985 + | |
| 986 + if( !pFirstOverflow ){ | |
| 987 + pFirstOverflow = pOverflow; | |
| 988 + }else{ | |
| 989 + pLastOverflow->pNextOverflow = pOverflow; | |
| 990 + } | |
| 991 + pLastOverflow = pOverflow; | |
| 992 + | |
| 993 + iNextPage = decodeUnsigned32(pPage->pData); | |
| 994 + nBytes += nPageSize-4; | |
| 995 + | |
| 996 + /* Avoid loops. */ | |
| 997 + if( overflowPageInUse(pFirstOverflow, iNextPage) ){ | |
| 998 + fprintf(stderr, "Overflow loop detected at %d\n", iNextPage); | |
| 999 + rc = SQLITE_CORRUPT; | |
| 1000 + break; | |
| 1001 + } | |
| 1002 + } | |
| 1003 + | |
| 1004 + /* If there were not enough pages, or too many, things are corrupt. | |
| 1005 + * Not having enough pages is an obvious problem, all the data | |
| 1006 + * cannot be read. Too many pages means that the contents of the | |
| 1007 + * row between the main page and the overflow page(s) is | |
| 1008 + * inconsistent (most likely one or more of the overflow pages does | |
| 1009 + * not really belong to this row). | |
| 1010 + */ | |
| 1011 + if( rc==SQLITE_OK && (nBytes<nRecordBytes || iNextPage) ){ | |
| 1012 + rc = SQLITE_CORRUPT; | |
| 1013 + } | |
| 1014 + | |
| 1015 + if( rc==SQLITE_OK ){ | |
| 1016 + *ppOverflow = pFirstOverflow; | |
| 1017 + *pnLocalRecordBytes = nLocalRecordBytes; | |
| 1018 + }else if( pFirstOverflow ){ | |
| 1019 + overflowDestroy(pFirstOverflow); | |
| 1020 + } | |
| 1021 + return rc; | |
| 1022 +} | |
| 1023 + | |
| 1024 +/* Use in concert with overflowMaybeCreate() to efficiently read parts | |
| 1025 + * of a potentially-overflowing record. pPage and iRecordOffset are | |
| 1026 + * the values passed into overflowMaybeCreate(), nLocalRecordBytes and | |
| 1027 + * pOverflow are the values returned by that call. | |
| 1028 + * | |
| 1029 + * On SQLITE_OK, *ppBase points to nRequestBytes of data at | |
| 1030 + * iRequestOffset within the record. If the data exists contiguously | |
| 1031 + * in a page, a direct pointer is returned, otherwise a buffer from | |
| 1032 + * sqlite3_malloc() is returned with the data. *pbFree is set true if | |
| 1033 + * sqlite3_free() should be called on *ppBase. | |
| 1034 + */ | |
| 1035 +/* Operation of this function is subtle. At any time, pPage is the | |
| 1036 + * current page, with iRecordOffset and nLocalRecordBytes being record | |
| 1037 + * data within pPage, and pOverflow being the overflow page after | |
| 1038 + * pPage. This allows the code to handle both the initial leaf page | |
| 1039 + * and overflow pages consistently by adjusting the values | |
| 1040 + * appropriately. | |
| 1041 + */ | |
| 1042 +static int overflowGetSegment(DbPage *pPage, unsigned iRecordOffset, | |
| 1043 + unsigned nLocalRecordBytes, | |
| 1044 + RecoverOverflow *pOverflow, | |
| 1045 + unsigned iRequestOffset, unsigned nRequestBytes, | |
| 1046 + unsigned char **ppBase, int *pbFree){ | |
| 1047 + unsigned nBase; /* Amount of data currently collected. */ | |
| 1048 + unsigned char *pBase; /* Buffer to collect record data into. */ | |
| 1049 + | |
| 1050 + /* Skip to the page containing the start of the data. */ | |
| 1051 + while( iRequestOffset>=nLocalRecordBytes && pOverflow ){ | |
| 1052 + /* Factor out current page's contribution. */ | |
| 1053 + iRequestOffset -= nLocalRecordBytes; | |
| 1054 + | |
| 1055 + /* Move forward to the next page in the list. */ | |
| 1056 + pPage = pOverflow->pPage; | |
| 1057 + iRecordOffset = 4; | |
| 1058 + nLocalRecordBytes = pOverflow->nPageSize - iRecordOffset; | |
| 1059 + pOverflow = pOverflow->pNextOverflow; | |
| 1060 + } | |
| 1061 + | |
| 1062 + /* If the requested data is entirely within this page, return a | |
| 1063 + * pointer into the page. | |
| 1064 + */ | |
| 1065 + if( iRequestOffset+nRequestBytes<=nLocalRecordBytes ){ | |
| 1066 + /* TODO(shess): "assignment discards qualifiers from pointer target type" | |
| 1067 + * Having ppBase be const makes sense, but sqlite3_free() takes non-const. | |
| 1068 + */ | |
| 1069 + *ppBase = (unsigned char *)PageData(pPage, iRecordOffset + iRequestOffset); | |
| 1070 + *pbFree = 0; | |
| 1071 + return SQLITE_OK; | |
| 1072 + } | |
| 1073 + | |
| 1074 + /* The data range would require additional pages. */ | |
| 1075 + if( !pOverflow ){ | |
| 1076 + /* Should never happen, the range is outside the nRecordBytes | |
| 1077 + * passed to overflowMaybeCreate(). | |
| 1078 + */ | |
| 1079 + assert(NULL); /* NOTREACHED */ | |
| 1080 + return SQLITE_ERROR; | |
| 1081 + } | |
| 1082 + | |
| 1083 + /* Get a buffer to construct into. */ | |
| 1084 + nBase = 0; | |
| 1085 + pBase = sqlite3_malloc(nRequestBytes); | |
| 1086 + if( !pBase ){ | |
| 1087 + return SQLITE_NOMEM; | |
| 1088 + } | |
| 1089 + while( nBase<nRequestBytes ){ | |
| 1090 + /* Copy over data present on this page. */ | |
| 1091 + unsigned nCopyBytes = nRequestBytes - nBase; | |
| 1092 + if( nLocalRecordBytes-iRequestOffset<nCopyBytes ){ | |
| 1093 + nCopyBytes = nLocalRecordBytes - iRequestOffset; | |
| 1094 + } | |
| 1095 + memcpy(pBase + nBase, PageData(pPage, iRecordOffset + iRequestOffset), | |
| 1096 + nCopyBytes); | |
| 1097 + nBase += nCopyBytes; | |
| 1098 + | |
| 1099 + if( pOverflow ){ | |
| 1100 + /* Copy from start of record data in future pages. */ | |
| 1101 + iRequestOffset = 0; | |
| 1102 + | |
| 1103 + /* Move forward to the next page in the list. Should match | |
| 1104 + * first while() loop. | |
| 1105 + */ | |
| 1106 + pPage = pOverflow->pPage; | |
| 1107 + iRecordOffset = 4; | |
| 1108 + nLocalRecordBytes = pOverflow->nPageSize - iRecordOffset; | |
| 1109 + pOverflow = pOverflow->pNextOverflow; | |
| 1110 + }else if( nBase<nRequestBytes ){ | |
| 1111 + /* Ran out of overflow pages with data left to deliver. Not | |
| 1112 + * possible if the requested range fits within nRecordBytes | |
| 1113 + * passed to overflowMaybeCreate() when creating pOverflow. | |
| 1114 + */ | |
| 1115 + assert(NULL); /* NOTREACHED */ | |
| 1116 + sqlite3_free(pBase); | |
| 1117 + return SQLITE_ERROR; | |
| 1118 + } | |
| 1119 + } | |
| 1120 + assert( nBase==nRequestBytes ); | |
| 1121 + *ppBase = pBase; | |
| 1122 + *pbFree = 1; | |
| 1123 + return SQLITE_OK; | |
| 1124 +} | |
| 1125 + | |
| 1126 +/* Primary structure for iterating the contents of a table. | |
| 1127 + * | |
| 1128 + * leafCursorDestroy - release all resources associated with the cursor. | |
| 1129 + * leafCursorCreate - create a cursor to iterate items from tree at | |
| 1130 + * the provided root page. | |
| 1131 + * leafCursorNextValidCell - get the cursor ready to access data from | |
| 1132 + * the next valid cell in the table. | |
| 1133 + * leafCursorCellRowid - get the current cell's rowid. | |
| 1134 + * leafCursorCellColumns - get current cell's column count. | |
| 1135 + * leafCursorCellColInfo - get type and data for a column in current cell. | |
| 1136 + * | |
| 1137 + * leafCursorNextValidCell skips cells which fail simple integrity | |
| 1138 + * checks, such as overlapping other cells, or being located at | |
| 1139 + * impossible offsets, or where header data doesn't correctly describe | |
| 1140 + * payload data. Returns SQLITE_ROW if a valid cell is found, | |
| 1141 + * SQLITE_DONE if all pages in the tree were exhausted. | |
| 1142 + * | |
| 1143 + * leafCursorCellColInfo() accounts for overflow pages in the style of | |
| 1144 + * overflowGetSegment(). | |
| 1145 + */ | |
| 1146 +typedef struct RecoverLeafCursor RecoverLeafCursor; | |
| 1147 +struct RecoverLeafCursor { | |
| 1148 + RecoverInteriorCursor *pParent; /* Parent node to this node. */ | |
| 1149 + DbPage *pPage; /* Reference to leaf page. */ | |
| 1150 + unsigned nPageSize; /* Size of pPage. */ | |
| 1151 + unsigned nCells; /* Number of cells in pPage. */ | |
| 1152 + unsigned iCell; /* Current cell. */ | |
| 1153 + | |
| 1154 + /* Info parsed from data in iCell. */ | |
| 1155 + i64 iRowid; /* rowid parsed. */ | |
| 1156 + unsigned nRecordCols; /* how many items in the record. */ | |
| 1157 + u64 iRecordOffset; /* offset to record data. */ | |
| 1158 + /* TODO(shess): nRecordBytes and nRecordHeaderBytes are used in | |
| 1159 + * leafCursorCellColInfo() to prevent buffer overruns. | |
| 1160 + * leafCursorCellDecode() already verified that the cell is valid, so | |
| 1161 + * those checks should be redundant. | |
| 1162 + */ | |
| 1163 + u64 nRecordBytes; /* Size of record data. */ | |
| 1164 + unsigned nLocalRecordBytes; /* Amount of record data in-page. */ | |
| 1165 + unsigned nRecordHeaderBytes; /* Size of record header data. */ | |
| 1166 + unsigned char *pRecordHeader; /* Pointer to record header data. */ | |
| 1167 + int bFreeRecordHeader; /* True if record header requires free. */ | |
| 1168 + RecoverOverflow *pOverflow; /* Cell overflow info, if needed. */ | |
| 1169 +}; | |
| 1170 + | |
| 1171 +/* Internal helper shared between next-page and create-cursor. If | |
| 1172 + * pPage is a leaf page, it will be stored in the cursor and state | |
| 1173 + * initialized for reading cells. | |
| 1174 + * | |
| 1175 + * If pPage is an interior page, a new parent cursor is created and | |
| 1176 + * injected on the stack. This is necessary to handle trees with | |
| 1177 + * uneven depth, but also is used during initial setup. | |
| 1178 + * | |
| 1179 + * If pPage is not a table page at all, it is discarded. | |
| 1180 + * | |
| 1181 + * If SQLITE_OK is returned, the caller no longer owns pPage, | |
| 1182 + * otherwise the caller is responsible for discarding it. | |
| 1183 + */ | |
| 1184 +static int leafCursorLoadPage(RecoverLeafCursor *pCursor, DbPage *pPage){ | |
| 1185 + const unsigned char *pPageHeader; /* Header of *pPage */ | |
| 1186 + | |
| 1187 + /* Release the current page. */ | |
| 1188 + if( pCursor->pPage ){ | |
| 1189 + sqlite3PagerUnref(pCursor->pPage); | |
| 1190 + pCursor->pPage = NULL; | |
| 1191 + pCursor->iCell = pCursor->nCells = 0; | |
| 1192 + } | |
| 1193 + | |
| 1194 + /* If the page is an unexpected interior node, inject a new stack | |
| 1195 + * layer and try again from there. | |
| 1196 + */ | |
| 1197 + pPageHeader = PageHeader(pPage); | |
| 1198 + if( pPageHeader[kiPageTypeOffset]==kTableInteriorPage ){ | |
| 1199 + RecoverInteriorCursor *pParent; | |
| 1200 + int rc = interiorCursorCreate(pCursor->pParent, pPage, pCursor->nPageSize, | |
| 1201 + &pParent); | |
| 1202 + if( rc!=SQLITE_OK ){ | |
| 1203 + return rc; | |
| 1204 + } | |
| 1205 + pCursor->pParent = pParent; | |
| 1206 + return SQLITE_OK; | |
| 1207 + } | |
| 1208 + | |
| 1209 + /* Not a leaf page, skip it. */ | |
| 1210 + if( pPageHeader[kiPageTypeOffset]!=kTableLeafPage ){ | |
| 1211 + sqlite3PagerUnref(pPage); | |
| 1212 + return SQLITE_OK; | |
| 1213 + } | |
| 1214 + | |
| 1215 + /* Take ownership of the page and start decoding. */ | |
| 1216 + pCursor->pPage = pPage; | |
| 1217 + pCursor->iCell = 0; | |
| 1218 + pCursor->nCells = decodeUnsigned16(pPageHeader + kiPageCellCountOffset); | |
| 1219 + return SQLITE_OK; | |
| 1220 +} | |
| 1221 + | |
| 1222 +/* Get the next leaf-level page in the tree. Returns SQLITE_ROW when | |
| 1223 + * a leaf page is found, SQLITE_DONE when no more leaves exist, or any | |
| 1224 + * error which occurred. | |
| 1225 + */ | |
| 1226 +static int leafCursorNextPage(RecoverLeafCursor *pCursor){ | |
| 1227 + if( !pCursor->pParent ){ | |
| 1228 + return SQLITE_DONE; | |
| 1229 + } | |
| 1230 + | |
| 1231 + /* Repeatedly load the parent's next child page until a leaf is found. */ | |
| 1232 + do { | |
| 1233 + DbPage *pNextPage; | |
| 1234 + int rc = interiorCursorNextPage(&pCursor->pParent, &pNextPage); | |
| 1235 + if( rc!=SQLITE_ROW ){ | |
| 1236 + assert( rc==SQLITE_DONE ); | |
| 1237 + return rc; | |
| 1238 + } | |
| 1239 + | |
| 1240 + rc = leafCursorLoadPage(pCursor, pNextPage); | |
| 1241 + if( rc!=SQLITE_OK ){ | |
| 1242 + sqlite3PagerUnref(pNextPage); | |
| 1243 + return rc; | |
| 1244 + } | |
| 1245 + } while( !pCursor->pPage ); | |
| 1246 + | |
| 1247 + return SQLITE_ROW; | |
| 1248 +} | |
| 1249 + | |
| 1250 +static void leafCursorDestroyCellData(RecoverLeafCursor *pCursor){ | |
| 1251 + if( pCursor->bFreeRecordHeader ){ | |
| 1252 + sqlite3_free(pCursor->pRecordHeader); | |
| 1253 + } | |
| 1254 + pCursor->bFreeRecordHeader = 0; | |
| 1255 + pCursor->pRecordHeader = NULL; | |
| 1256 + | |
| 1257 + if( pCursor->pOverflow ){ | |
| 1258 + overflowDestroy(pCursor->pOverflow); | |
| 1259 + pCursor->pOverflow = NULL; | |
| 1260 + } | |
| 1261 +} | |
| 1262 + | |
| 1263 +static void leafCursorDestroy(RecoverLeafCursor *pCursor){ | |
| 1264 + leafCursorDestroyCellData(pCursor); | |
| 1265 + | |
| 1266 + if( pCursor->pParent ){ | |
| 1267 + interiorCursorDestroy(pCursor->pParent); | |
| 1268 + pCursor->pParent = NULL; | |
| 1269 + } | |
| 1270 + | |
| 1271 + if( pCursor->pPage ){ | |
| 1272 + sqlite3PagerUnref(pCursor->pPage); | |
| 1273 + pCursor->pPage = NULL; | |
| 1274 + } | |
| 1275 + | |
| 1276 + memset(pCursor, 0xA5, sizeof(*pCursor)); | |
| 1277 + sqlite3_free(pCursor); | |
| 1278 +} | |
| 1279 + | |
| 1280 +/* Create a cursor to iterate the rows from the leaf pages of a table | |
| 1281 + * rooted at iRootPage. | |
| 1282 + */ | |
| 1283 +/* TODO(shess): recoverOpen() calls this to setup the cursor, and I | |
| 1284 + * think that recoverFilter() may make a hard assumption that the | |
| 1285 + * cursor returned will turn up at least one valid cell. | |
| 1286 + * | |
| 1287 + * The cases I can think of which break this assumption are: | |
| 1288 + * - pPage is a valid leaf page with no valid cells. | |
| 1289 + * - pPage is a valid interior page with no valid leaves. | |
| 1290 + * - pPage is a valid interior page who's leaves contain no valid cells. | |
| 1291 + * - pPage is not a valid leaf or interior page. | |
| 1292 + */ | |
| 1293 +static int leafCursorCreate(Pager *pPager, unsigned nPageSize, | |
| 1294 + u32 iRootPage, RecoverLeafCursor **ppCursor){ | |
| 1295 + DbPage *pPage; /* Reference to page at iRootPage. */ | |
| 1296 + RecoverLeafCursor *pCursor; /* Leaf cursor being constructed. */ | |
| 1297 + int rc; | |
| 1298 + | |
| 1299 + /* Start out with the root page. */ | |
| 1300 + rc = sqlite3PagerAcquire(pPager, iRootPage, &pPage, 0); | |
| 1301 + if( rc!=SQLITE_OK ){ | |
| 1302 + return rc; | |
| 1303 + } | |
| 1304 + | |
| 1305 + pCursor = sqlite3_malloc(sizeof(RecoverLeafCursor)); | |
| 1306 + if( !pCursor ){ | |
| 1307 + sqlite3PagerUnref(pPage); | |
| 1308 + return SQLITE_NOMEM; | |
| 1309 + } | |
| 1310 + memset(pCursor, 0, sizeof(*pCursor)); | |
| 1311 + | |
| 1312 + pCursor->nPageSize = nPageSize; | |
| 1313 + | |
| 1314 + rc = leafCursorLoadPage(pCursor, pPage); | |
| 1315 + if( rc!=SQLITE_OK ){ | |
| 1316 + sqlite3PagerUnref(pPage); | |
| 1317 + leafCursorDestroy(pCursor); | |
| 1318 + return rc; | |
| 1319 + } | |
| 1320 + | |
| 1321 + /* pPage wasn't a leaf page, find the next leaf page. */ | |
| 1322 + if( !pCursor->pPage ){ | |
| 1323 + rc = leafCursorNextPage(pCursor); | |
| 1324 + if( rc!=SQLITE_DONE && rc!=SQLITE_ROW ){ | |
| 1325 + leafCursorDestroy(pCursor); | |
| 1326 + return rc; | |
| 1327 + } | |
| 1328 + } | |
| 1329 + | |
| 1330 + *ppCursor = pCursor; | |
| 1331 + return SQLITE_OK; | |
| 1332 +} | |
| 1333 + | |
| 1334 +/* Useful for setting breakpoints. */ | |
| 1335 +static int ValidateError(){ | |
| 1336 + return SQLITE_ERROR; | |
| 1337 +} | |
| 1338 + | |
| 1339 +/* Setup the cursor for reading the information from cell iCell. */ | |
| 1340 +static int leafCursorCellDecode(RecoverLeafCursor *pCursor){ | |
| 1341 + const unsigned char *pPageHeader; /* Header of current page. */ | |
| 1342 + const unsigned char *pCellOffsets; /* Pointer to page's cell offsets. */ | |
| 1343 + unsigned iCellOffset; /* Offset of current cell (iCell). */ | |
| 1344 + const unsigned char *pCell; /* Pointer to data at iCellOffset. */ | |
| 1345 + unsigned nCellMaxBytes; /* Maximum local size of iCell. */ | |
| 1346 + unsigned iEndOffset; /* End of iCell's in-page data. */ | |
| 1347 + u64 nRecordBytes; /* Expected size of cell, w/overflow. */ | |
| 1348 + u64 iRowid; /* iCell's rowid (in table). */ | |
| 1349 + unsigned nRead; /* Amount of cell read. */ | |
| 1350 + unsigned nRecordHeaderRead; /* Header data read. */ | |
| 1351 + u64 nRecordHeaderBytes; /* Header size expected. */ | |
| 1352 + unsigned nRecordCols; /* Columns read from header. */ | |
| 1353 + u64 nRecordColBytes; /* Bytes in payload for those columns. */ | |
| 1354 + unsigned i; | |
| 1355 + int rc; | |
| 1356 + | |
| 1357 + assert( pCursor->iCell<pCursor->nCells ); | |
| 1358 + | |
| 1359 + leafCursorDestroyCellData(pCursor); | |
| 1360 + | |
| 1361 + /* Find the offset to the row. */ | |
| 1362 + pPageHeader = PageHeader(pCursor->pPage); | |
| 1363 + pCellOffsets = pPageHeader + knPageLeafHeaderBytes; | |
| 1364 + iCellOffset = decodeUnsigned16(pCellOffsets + pCursor->iCell*2); | |
| 1365 + if( iCellOffset>=pCursor->nPageSize ){ | |
| 1366 + return ValidateError(); | |
| 1367 + } | |
| 1368 + | |
| 1369 + pCell = PageData(pCursor->pPage, iCellOffset); | |
| 1370 + nCellMaxBytes = pCursor->nPageSize - iCellOffset; | |
| 1371 + | |
| 1372 + /* B-tree leaf cells lead with varint record size, varint rowid and | |
| 1373 + * varint header size. | |
| 1374 + */ | |
| 1375 + /* TODO(shess): The smallest page size is 512 bytes, which has an m | |
| 1376 + * of 39. Three varints need at most 27 bytes to encode. I think. | |
| 1377 + */ | |
| 1378 + if( !checkVarints(pCell, nCellMaxBytes, 3) ){ | |
| 1379 + return ValidateError(); | |
| 1380 + } | |
| 1381 + | |
| 1382 + nRead = getVarint(pCell, &nRecordBytes); | |
| 1383 + assert( iCellOffset+nRead<=pCursor->nPageSize ); | |
| 1384 + pCursor->nRecordBytes = nRecordBytes; | |
| 1385 + | |
| 1386 + nRead += getVarint(pCell + nRead, &iRowid); | |
| 1387 + assert( iCellOffset+nRead<=pCursor->nPageSize ); | |
| 1388 + pCursor->iRowid = (i64)iRowid; | |
| 1389 + | |
| 1390 + pCursor->iRecordOffset = iCellOffset + nRead; | |
| 1391 + | |
| 1392 + /* Start overflow setup here because nLocalRecordBytes is needed to | |
| 1393 + * check cell overlap. | |
| 1394 + */ | |
| 1395 + rc = overflowMaybeCreate(pCursor->pPage, pCursor->nPageSize, | |
| 1396 + pCursor->iRecordOffset, pCursor->nRecordBytes, | |
| 1397 + &pCursor->nLocalRecordBytes, | |
| 1398 + &pCursor->pOverflow); | |
| 1399 + if( rc!=SQLITE_OK ){ | |
| 1400 + return ValidateError(); | |
| 1401 + } | |
| 1402 + | |
| 1403 + /* Check that no other cell starts within this cell. */ | |
| 1404 + iEndOffset = pCursor->iRecordOffset + pCursor->nLocalRecordBytes; | |
| 1405 + for( i=0; i<pCursor->nCells; ++i ){ | |
| 1406 + const unsigned iOtherOffset = decodeUnsigned16(pCellOffsets + i*2); | |
| 1407 + if( iOtherOffset>iCellOffset && iOtherOffset<iEndOffset ){ | |
| 1408 + return ValidateError(); | |
| 1409 + } | |
| 1410 + } | |
| 1411 + | |
| 1412 + nRecordHeaderRead = getVarint(pCell + nRead, &nRecordHeaderBytes); | |
| 1413 + assert( nRecordHeaderBytes<=nRecordBytes ); | |
| 1414 + pCursor->nRecordHeaderBytes = nRecordHeaderBytes; | |
| 1415 + | |
| 1416 + /* Large headers could overflow if pages are small. */ | |
| 1417 + rc = overflowGetSegment(pCursor->pPage, | |
| 1418 + pCursor->iRecordOffset, pCursor->nLocalRecordBytes, | |
| 1419 + pCursor->pOverflow, 0, nRecordHeaderBytes, | |
| 1420 + &pCursor->pRecordHeader, &pCursor->bFreeRecordHeader)
; | |
| 1421 + if( rc!=SQLITE_OK ){ | |
| 1422 + return ValidateError(); | |
| 1423 + } | |
| 1424 + | |
| 1425 + /* Tally up the column count and size of data. */ | |
| 1426 + nRecordCols = 0; | |
| 1427 + nRecordColBytes = 0; | |
| 1428 + while( nRecordHeaderRead<nRecordHeaderBytes ){ | |
| 1429 + u64 iSerialType; /* Type descriptor for current column. */ | |
| 1430 + if( !checkVarint(pCursor->pRecordHeader + nRecordHeaderRead, | |
| 1431 + nRecordHeaderBytes - nRecordHeaderRead) ){ | |
| 1432 + return ValidateError(); | |
| 1433 + } | |
| 1434 + nRecordHeaderRead += getVarint(pCursor->pRecordHeader + nRecordHeaderRead, | |
| 1435 + &iSerialType); | |
| 1436 + if( iSerialType==10 || iSerialType==11 ){ | |
| 1437 + return ValidateError(); | |
| 1438 + } | |
| 1439 + nRecordColBytes += SerialTypeLength(iSerialType); | |
| 1440 + nRecordCols++; | |
| 1441 + } | |
| 1442 + pCursor->nRecordCols = nRecordCols; | |
| 1443 + | |
| 1444 + /* Parsing the header used as many bytes as expected. */ | |
| 1445 + if( nRecordHeaderRead!=nRecordHeaderBytes ){ | |
| 1446 + return ValidateError(); | |
| 1447 + } | |
| 1448 + | |
| 1449 + /* Calculated record is size of expected record. */ | |
| 1450 + if( nRecordHeaderBytes+nRecordColBytes!=nRecordBytes ){ | |
| 1451 + return ValidateError(); | |
| 1452 + } | |
| 1453 + | |
| 1454 + return SQLITE_OK; | |
| 1455 +} | |
| 1456 + | |
| 1457 +static i64 leafCursorCellRowid(RecoverLeafCursor *pCursor){ | |
| 1458 + return pCursor->iRowid; | |
| 1459 +} | |
| 1460 + | |
| 1461 +static unsigned leafCursorCellColumns(RecoverLeafCursor *pCursor){ | |
| 1462 + return pCursor->nRecordCols; | |
| 1463 +} | |
| 1464 + | |
| 1465 +/* Get the column info for the cell. Pass NULL for ppBase to prevent | |
| 1466 + * retrieving the data segment. If *pbFree is true, *ppBase must be | |
| 1467 + * freed by the caller using sqlite3_free(). | |
| 1468 + */ | |
| 1469 +static int leafCursorCellColInfo(RecoverLeafCursor *pCursor, | |
| 1470 + unsigned iCol, u64 *piColType, | |
| 1471 + unsigned char **ppBase, int *pbFree){ | |
| 1472 + const unsigned char *pRecordHeader; /* Current cell's header. */ | |
| 1473 + u64 nRecordHeaderBytes; /* Bytes in pRecordHeader. */ | |
| 1474 + unsigned nRead; /* Bytes read from header. */ | |
| 1475 + u64 iColEndOffset; /* Offset to end of column in cell. */ | |
| 1476 + unsigned nColsSkipped; /* Count columns as procesed. */ | |
| 1477 + u64 iSerialType; /* Type descriptor for current column. *
/ | |
| 1478 + | |
| 1479 + /* Implicit NULL for columns past the end. This case happens when | |
| 1480 + * rows have not been updated since an ALTER TABLE added columns. | |
| 1481 + * It is more convenient to address here than in callers. | |
| 1482 + */ | |
| 1483 + if( iCol>=pCursor->nRecordCols ){ | |
| 1484 + *piColType = 0; | |
| 1485 + if( ppBase ){ | |
| 1486 + *ppBase = 0; | |
| 1487 + *pbFree = 0; | |
| 1488 + } | |
| 1489 + return SQLITE_OK; | |
| 1490 + } | |
| 1491 + | |
| 1492 + /* Must be able to decode header size. */ | |
| 1493 + pRecordHeader = pCursor->pRecordHeader; | |
| 1494 + if( !checkVarint(pRecordHeader, pCursor->nRecordHeaderBytes) ){ | |
| 1495 + return SQLITE_CORRUPT; | |
| 1496 + } | |
| 1497 + | |
| 1498 + /* Rather than caching the header size and how many bytes it took, | |
| 1499 + * decode it every time. | |
| 1500 + */ | |
| 1501 + nRead = getVarint(pRecordHeader, &nRecordHeaderBytes); | |
| 1502 + assert( nRecordHeaderBytes==pCursor->nRecordHeaderBytes ); | |
| 1503 + | |
| 1504 + /* Scan forward to the indicated column. Scans to _after_ column | |
| 1505 + * for later range checking. | |
| 1506 + */ | |
| 1507 + /* TODO(shess): This could get expensive for very wide tables. An | |
| 1508 + * array of iSerialType could be built in leafCursorCellDecode(), but | |
| 1509 + * the number of columns is dynamic per row, so it would add memory | |
| 1510 + * management complexity. Enough info to efficiently forward | |
| 1511 + * iterate could be kept, if all clients forward iterate | |
| 1512 + * (recoverColumn() may not). | |
| 1513 + */ | |
| 1514 + iColEndOffset = 0; | |
| 1515 + nColsSkipped = 0; | |
| 1516 + while( nColsSkipped<=iCol && nRead<nRecordHeaderBytes ){ | |
| 1517 + if( !checkVarint(pRecordHeader + nRead, nRecordHeaderBytes - nRead) ){ | |
| 1518 + return SQLITE_CORRUPT; | |
| 1519 + } | |
| 1520 + nRead += getVarint(pRecordHeader + nRead, &iSerialType); | |
| 1521 + iColEndOffset += SerialTypeLength(iSerialType); | |
| 1522 + nColsSkipped++; | |
| 1523 + } | |
| 1524 + | |
| 1525 + /* Column's data extends past record's end. */ | |
| 1526 + if( nRecordHeaderBytes+iColEndOffset>pCursor->nRecordBytes ){ | |
| 1527 + return SQLITE_CORRUPT; | |
| 1528 + } | |
| 1529 + | |
| 1530 + *piColType = iSerialType; | |
| 1531 + if( ppBase ){ | |
| 1532 + const u32 nColBytes = SerialTypeLength(iSerialType); | |
| 1533 + | |
| 1534 + /* Offset from start of record to beginning of column. */ | |
| 1535 + const unsigned iColOffset = nRecordHeaderBytes+iColEndOffset-nColBytes; | |
| 1536 + | |
| 1537 + return overflowGetSegment(pCursor->pPage, pCursor->iRecordOffset, | |
| 1538 + pCursor->nLocalRecordBytes, pCursor->pOverflow, | |
| 1539 + iColOffset, nColBytes, ppBase, pbFree); | |
| 1540 + } | |
| 1541 + return SQLITE_OK; | |
| 1542 +} | |
| 1543 + | |
| 1544 +static int leafCursorNextValidCell(RecoverLeafCursor *pCursor){ | |
| 1545 + while( 1 ){ | |
| 1546 + int rc; | |
| 1547 + | |
| 1548 + /* Move to the next cell. */ | |
| 1549 + pCursor->iCell++; | |
| 1550 + | |
| 1551 + /* No more cells, get the next leaf. */ | |
| 1552 + if( pCursor->iCell>=pCursor->nCells ){ | |
| 1553 + rc = leafCursorNextPage(pCursor); | |
| 1554 + if( rc!=SQLITE_ROW ){ | |
| 1555 + return rc; | |
| 1556 + } | |
| 1557 + assert( pCursor->iCell==0 ); | |
| 1558 + } | |
| 1559 + | |
| 1560 + /* If the cell is valid, indicate that a row is available. */ | |
| 1561 + rc = leafCursorCellDecode(pCursor); | |
| 1562 + if( rc==SQLITE_OK ){ | |
| 1563 + return SQLITE_ROW; | |
| 1564 + } | |
| 1565 + | |
| 1566 + /* Iterate until done or a valid row is found. */ | |
| 1567 + /* TODO(shess): Remove debugging output. */ | |
| 1568 + fprintf(stderr, "Skipping invalid cell\n"); | |
| 1569 + } | |
| 1570 + return SQLITE_ERROR; | |
| 1571 +} | |
| 1572 + | |
| 1573 +typedef struct Recover Recover; | |
| 1574 +struct Recover { | |
| 1575 + sqlite3_vtab base; | |
| 1576 + sqlite3 *db; /* Host database connection */ | |
| 1577 + char *zDb; /* Database containing target table */ | |
| 1578 + char *zTable; /* Target table */ | |
| 1579 + unsigned nCols; /* Number of columns in target table */ | |
| 1580 + unsigned char *pTypes; /* Types of columns in target table */ | |
| 1581 +}; | |
| 1582 + | |
| 1583 +/* Internal helper for deleting the module. */ | |
| 1584 +static void recoverRelease(Recover *pRecover){ | |
| 1585 + sqlite3_free(pRecover->zDb); | |
| 1586 + sqlite3_free(pRecover->zTable); | |
| 1587 + sqlite3_free(pRecover->pTypes); | |
| 1588 + memset(pRecover, 0xA5, sizeof(*pRecover)); | |
| 1589 + sqlite3_free(pRecover); | |
| 1590 +} | |
| 1591 + | |
| 1592 +/* Helper function for initializing the module. Forward-declared so | |
| 1593 + * recoverCreate() and recoverConnect() can see it. | |
| 1594 + */ | |
| 1595 +static int recoverInit( | |
| 1596 + sqlite3 *, void *, int, const char *const*, sqlite3_vtab **, char ** | |
| 1597 +); | |
| 1598 + | |
| 1599 +static int recoverCreate( | |
| 1600 + sqlite3 *db, | |
| 1601 + void *pAux, | |
| 1602 + int argc, const char *const*argv, | |
| 1603 + sqlite3_vtab **ppVtab, | |
| 1604 + char **pzErr | |
| 1605 +){ | |
| 1606 + FNENTRY(); | |
| 1607 + return recoverInit(db, pAux, argc, argv, ppVtab, pzErr); | |
| 1608 +} | |
| 1609 + | |
| 1610 +/* This should never be called. */ | |
| 1611 +static int recoverConnect( | |
| 1612 + sqlite3 *db, | |
| 1613 + void *pAux, | |
| 1614 + int argc, const char *const*argv, | |
| 1615 + sqlite3_vtab **ppVtab, | |
| 1616 + char **pzErr | |
| 1617 +){ | |
| 1618 + FNENTRY(); | |
| 1619 + return recoverInit(db, pAux, argc, argv, ppVtab, pzErr); | |
| 1620 +} | |
| 1621 + | |
| 1622 +/* No indices supported. */ | |
| 1623 +static int recoverBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ | |
| 1624 + FNENTRY(); | |
| 1625 + return SQLITE_OK; | |
| 1626 +} | |
| 1627 + | |
| 1628 +/* Logically, this should never be called. */ | |
| 1629 +static int recoverDisconnect(sqlite3_vtab *pVtab){ | |
| 1630 + FNENTRY(); | |
| 1631 + recoverRelease((Recover*)pVtab); | |
| 1632 + return SQLITE_OK; | |
| 1633 +} | |
| 1634 + | |
| 1635 +static int recoverDestroy(sqlite3_vtab *pVtab){ | |
| 1636 + FNENTRY(); | |
| 1637 + recoverRelease((Recover*)pVtab); | |
| 1638 + return SQLITE_OK; | |
| 1639 +} | |
| 1640 + | |
| 1641 +typedef struct RecoverCursor RecoverCursor; | |
| 1642 +struct RecoverCursor { | |
| 1643 + sqlite3_vtab_cursor base; | |
| 1644 + RecoverLeafCursor *pLeafCursor; | |
| 1645 + int iEncoding; | |
| 1646 + int bEOF; | |
| 1647 +}; | |
| 1648 + | |
| 1649 +static int recoverOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ | |
| 1650 + Recover *pRecover = (Recover*)pVTab; | |
| 1651 + u32 iRootPage; /* Root page of the backing table. */ | |
| 1652 + int iEncoding; /* UTF encoding for backing database. */ | |
| 1653 + unsigned nPageSize; /* Size of pages in backing database. */ | |
| 1654 + Pager *pPager; /* Backing database pager. */ | |
| 1655 + RecoverLeafCursor *pLeafCursor; /* Cursor to read table's leaf pages. */ | |
| 1656 + RecoverCursor *pCursor; /* Cursor to read rows from leaves. */ | |
| 1657 + int rc; | |
| 1658 + | |
| 1659 + FNENTRY(); | |
| 1660 + | |
| 1661 + iRootPage = 0; | |
| 1662 + rc = getRootPage(pRecover->db, pRecover->zDb, pRecover->zTable, | |
| 1663 + &iRootPage); | |
| 1664 + if( rc!=SQLITE_OK ){ | |
| 1665 + return rc; | |
| 1666 + } | |
| 1667 + | |
| 1668 + iEncoding = 0; | |
| 1669 + rc = getEncoding(pRecover->db, pRecover->zDb, &iEncoding); | |
| 1670 + if( rc!=SQLITE_OK ){ | |
| 1671 + return rc; | |
| 1672 + } | |
| 1673 + | |
| 1674 + rc = GetPager(pRecover->db, pRecover->zDb, &pPager, &nPageSize); | |
| 1675 + if( rc!=SQLITE_OK ){ | |
| 1676 + return rc; | |
| 1677 + } | |
| 1678 + | |
| 1679 + rc = leafCursorCreate(pPager, nPageSize, iRootPage, &pLeafCursor); | |
| 1680 + if( rc!=SQLITE_OK ){ | |
| 1681 + return rc; | |
| 1682 + } | |
| 1683 + | |
| 1684 + pCursor = sqlite3_malloc(sizeof(RecoverCursor)); | |
| 1685 + if( !pCursor ){ | |
| 1686 + leafCursorDestroy(pLeafCursor); | |
| 1687 + return SQLITE_NOMEM; | |
| 1688 + } | |
| 1689 + memset(pCursor, 0, sizeof(*pCursor)); | |
| 1690 + pCursor->base.pVtab = pVTab; | |
| 1691 + pCursor->pLeafCursor = pLeafCursor; | |
| 1692 + pCursor->iEncoding = iEncoding; | |
| 1693 + | |
| 1694 + *ppCursor = (sqlite3_vtab_cursor*)pCursor; | |
| 1695 + return SQLITE_OK; | |
| 1696 +} | |
| 1697 + | |
| 1698 +static int recoverClose(sqlite3_vtab_cursor *cur){ | |
| 1699 + RecoverCursor *pCursor = (RecoverCursor*)cur; | |
| 1700 + FNENTRY(); | |
| 1701 + if( pCursor->pLeafCursor ){ | |
| 1702 + leafCursorDestroy(pCursor->pLeafCursor); | |
| 1703 + pCursor->pLeafCursor = NULL; | |
| 1704 + } | |
| 1705 + memset(pCursor, 0xA5, sizeof(*pCursor)); | |
| 1706 + sqlite3_free(cur); | |
| 1707 + return SQLITE_OK; | |
| 1708 +} | |
| 1709 + | |
| 1710 +/* Helpful place to set a breakpoint. */ | |
| 1711 +static int RecoverInvalidCell(){ | |
| 1712 + return SQLITE_ERROR; | |
| 1713 +} | |
| 1714 + | |
| 1715 +/* Returns SQLITE_OK if the cell has an appropriate number of columns | |
| 1716 + * with the appropriate types of data. | |
| 1717 + */ | |
| 1718 +static int recoverValidateLeafCell(Recover *pRecover, RecoverCursor *pCursor){ | |
| 1719 + unsigned i; | |
| 1720 + | |
| 1721 + /* If the row's storage has too many columns, skip it. */ | |
| 1722 + if( leafCursorCellColumns(pCursor->pLeafCursor)>pRecover->nCols ){ | |
| 1723 + return RecoverInvalidCell(); | |
| 1724 + } | |
| 1725 + | |
| 1726 + /* Skip rows with unexpected types. */ | |
| 1727 + for( i=0; i<pRecover->nCols; ++i ){ | |
| 1728 + u64 iType; /* Storage type of column i. */ | |
| 1729 + int rc; | |
| 1730 + | |
| 1731 + /* ROWID alias. */ | |
| 1732 + if( (pRecover->pTypes[i]&MASK_ROWID) ){ | |
| 1733 + continue; | |
| 1734 + } | |
| 1735 + | |
| 1736 + rc = leafCursorCellColInfo(pCursor->pLeafCursor, i, &iType, NULL, NULL); | |
| 1737 + assert( rc==SQLITE_OK ); | |
| 1738 + if( rc!=SQLITE_OK || !SerialTypeIsCompatible(iType, pRecover->pTypes[i]) ){ | |
| 1739 + return RecoverInvalidCell(); | |
| 1740 + } | |
| 1741 + } | |
| 1742 + | |
| 1743 + return SQLITE_OK; | |
| 1744 +} | |
| 1745 + | |
| 1746 +static int recoverNext(sqlite3_vtab_cursor *pVtabCursor){ | |
| 1747 + RecoverCursor *pCursor = (RecoverCursor*)pVtabCursor; | |
| 1748 + Recover *pRecover = (Recover*)pCursor->base.pVtab; | |
| 1749 + int rc; | |
| 1750 + | |
| 1751 + FNENTRY(); | |
| 1752 + | |
| 1753 + /* Scan forward to the next cell with valid storage, then check that | |
| 1754 + * the stored data matches the schema. | |
| 1755 + */ | |
| 1756 + while( (rc = leafCursorNextValidCell(pCursor->pLeafCursor))==SQLITE_ROW ){ | |
| 1757 + if( recoverValidateLeafCell(pRecover, pCursor)==SQLITE_OK ){ | |
| 1758 + return SQLITE_OK; | |
| 1759 + } | |
| 1760 + } | |
| 1761 + | |
| 1762 + if( rc==SQLITE_DONE ){ | |
| 1763 + pCursor->bEOF = 1; | |
| 1764 + return SQLITE_OK; | |
| 1765 + } | |
| 1766 + | |
| 1767 + assert( rc!=SQLITE_OK ); | |
| 1768 + return rc; | |
| 1769 +} | |
| 1770 + | |
| 1771 +static int recoverFilter( | |
| 1772 + sqlite3_vtab_cursor *pVtabCursor, | |
| 1773 + int idxNum, const char *idxStr, | |
| 1774 + int argc, sqlite3_value **argv | |
| 1775 +){ | |
| 1776 + RecoverCursor *pCursor = (RecoverCursor*)pVtabCursor; | |
| 1777 + Recover *pRecover = (Recover*)pCursor->base.pVtab; | |
| 1778 + int rc; | |
| 1779 + | |
| 1780 + FNENTRY(); | |
| 1781 + | |
| 1782 + /* Load the first cell, and iterate forward if it's not valid. */ | |
| 1783 + /* TODO(shess): What happens if no cells at all are valid? */ | |
| 1784 + rc = leafCursorCellDecode(pCursor->pLeafCursor); | |
| 1785 + if( rc!=SQLITE_OK || recoverValidateLeafCell(pRecover, pCursor)!=SQLITE_OK ){ | |
| 1786 + return recoverNext(pVtabCursor); | |
| 1787 + } | |
| 1788 + | |
| 1789 + return SQLITE_OK; | |
| 1790 +} | |
| 1791 + | |
| 1792 +static int recoverEof(sqlite3_vtab_cursor *pVtabCursor){ | |
| 1793 + RecoverCursor *pCursor = (RecoverCursor*)pVtabCursor; | |
| 1794 + FNENTRY(); | |
| 1795 + return pCursor->bEOF; | |
| 1796 +} | |
| 1797 + | |
| 1798 +static int recoverColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i)
{ | |
| 1799 + RecoverCursor *pCursor = (RecoverCursor*)cur; | |
| 1800 + Recover *pRecover = (Recover*)pCursor->base.pVtab; | |
| 1801 + u64 iColType; /* Storage type of column i. */ | |
| 1802 + unsigned char *pColData; /* Column i's data. */ | |
| 1803 + int shouldFree; /* Non-zero if pColData should be freed. */ | |
| 1804 + int rc; | |
| 1805 + | |
| 1806 + FNENTRY(); | |
| 1807 + | |
| 1808 + if( i>=pRecover->nCols ){ | |
| 1809 + return SQLITE_ERROR; | |
| 1810 + } | |
| 1811 + | |
| 1812 + /* ROWID alias. */ | |
| 1813 + if( (pRecover->pTypes[i]&MASK_ROWID) ){ | |
| 1814 + sqlite3_result_int64(ctx, leafCursorCellRowid(pCursor->pLeafCursor)); | |
| 1815 + return SQLITE_OK; | |
| 1816 + } | |
| 1817 + | |
| 1818 + pColData = NULL; | |
| 1819 + shouldFree = 0; | |
| 1820 + rc = leafCursorCellColInfo(pCursor->pLeafCursor, i, &iColType, | |
| 1821 + &pColData, &shouldFree); | |
| 1822 + if( rc!=SQLITE_OK ){ | |
| 1823 + return rc; | |
| 1824 + } | |
| 1825 + /* recoverValidateLeafCell() should guarantee that this will never | |
| 1826 + * occur. | |
| 1827 + */ | |
| 1828 + if( !SerialTypeIsCompatible(iColType, pRecover->pTypes[i]) ){ | |
| 1829 + if( shouldFree ){ | |
| 1830 + sqlite3_free(pColData); | |
| 1831 + } | |
| 1832 + return SQLITE_ERROR; | |
| 1833 + } | |
| 1834 + | |
| 1835 + switch( iColType ){ | |
| 1836 + case 0 : sqlite3_result_null(ctx); break; | |
| 1837 + case 1 : sqlite3_result_int64(ctx, decodeSigned(pColData, 1)); break; | |
| 1838 + case 2 : sqlite3_result_int64(ctx, decodeSigned(pColData, 2)); break; | |
| 1839 + case 3 : sqlite3_result_int64(ctx, decodeSigned(pColData, 3)); break; | |
| 1840 + case 4 : sqlite3_result_int64(ctx, decodeSigned(pColData, 4)); break; | |
| 1841 + case 5 : sqlite3_result_int64(ctx, decodeSigned(pColData, 6)); break; | |
| 1842 + case 6 : sqlite3_result_int64(ctx, decodeSigned(pColData, 8)); break; | |
| 1843 + case 7 : sqlite3_result_double(ctx, decodeFloat64(pColData)); break; | |
| 1844 + case 8 : sqlite3_result_int(ctx, 0); break; | |
| 1845 + case 9 : sqlite3_result_int(ctx, 1); break; | |
| 1846 + case 10 : assert( iColType!=10 ); break; | |
| 1847 + case 11 : assert( iColType!=11 ); break; | |
| 1848 + | |
| 1849 + default : { | |
| 1850 + u32 l = SerialTypeLength(iColType); | |
| 1851 + | |
| 1852 + /* If pColData was already allocated, arrange to pass ownership. */ | |
| 1853 + sqlite3_destructor_type pFn = SQLITE_TRANSIENT; | |
| 1854 + if( shouldFree ){ | |
| 1855 + pFn = sqlite3_free; | |
| 1856 + shouldFree = 0; | |
| 1857 + } | |
| 1858 + | |
| 1859 + if( SerialTypeIsBlob(iColType) ){ | |
| 1860 + sqlite3_result_blob(ctx, pColData, l, pFn); | |
| 1861 + }else{ | |
| 1862 + if( pCursor->iEncoding==SQLITE_UTF16LE ){ | |
| 1863 + sqlite3_result_text16le(ctx, (const void*)pColData, l, pFn); | |
| 1864 + }else if( pCursor->iEncoding==SQLITE_UTF16BE ){ | |
| 1865 + sqlite3_result_text16be(ctx, (const void*)pColData, l, pFn); | |
| 1866 + }else{ | |
| 1867 + sqlite3_result_text(ctx, (const char*)pColData, l, pFn); | |
| 1868 + } | |
| 1869 + } | |
| 1870 + } break; | |
| 1871 + } | |
| 1872 + if( shouldFree ){ | |
| 1873 + sqlite3_free(pColData); | |
| 1874 + } | |
| 1875 + return SQLITE_OK; | |
| 1876 +} | |
| 1877 + | |
| 1878 +static int recoverRowid(sqlite3_vtab_cursor *pVtabCursor, sqlite_int64 *pRowid)
{ | |
| 1879 + RecoverCursor *pCursor = (RecoverCursor*)pVtabCursor; | |
| 1880 + FNENTRY(); | |
| 1881 + *pRowid = leafCursorCellRowid(pCursor->pLeafCursor); | |
| 1882 + return SQLITE_OK; | |
| 1883 +} | |
| 1884 + | |
| 1885 +static sqlite3_module recoverModule = { | |
| 1886 + 0, /* iVersion */ | |
| 1887 + recoverCreate, /* xCreate - create a table */ | |
| 1888 + recoverConnect, /* xConnect - connect to an existing table */ | |
| 1889 + recoverBestIndex, /* xBestIndex - Determine search strategy */ | |
| 1890 + recoverDisconnect, /* xDisconnect - Disconnect from a table */ | |
| 1891 + recoverDestroy, /* xDestroy - Drop a table */ | |
| 1892 + recoverOpen, /* xOpen - open a cursor */ | |
| 1893 + recoverClose, /* xClose - close a cursor */ | |
| 1894 + recoverFilter, /* xFilter - configure scan constraints */ | |
| 1895 + recoverNext, /* xNext - advance a cursor */ | |
| 1896 + recoverEof, /* xEof */ | |
| 1897 + recoverColumn, /* xColumn - read data */ | |
| 1898 + recoverRowid, /* xRowid - read data */ | |
| 1899 + 0, /* xUpdate - write data */ | |
| 1900 + 0, /* xBegin - begin transaction */ | |
| 1901 + 0, /* xSync - sync transaction */ | |
| 1902 + 0, /* xCommit - commit transaction */ | |
| 1903 + 0, /* xRollback - rollback transaction */ | |
| 1904 + 0, /* xFindFunction - function overloading */ | |
| 1905 + 0, /* xRename - rename the table */ | |
| 1906 +}; | |
| 1907 + | |
| 1908 +int recoverVtableInit(sqlite3 *db){ | |
| 1909 + return sqlite3_create_module_v2(db, "recover", &recoverModule, NULL, 0); | |
| 1910 +} | |
| 1911 + | |
| 1912 +/* This section of code is for parsing the create input and | |
| 1913 + * initializing the module. | |
| 1914 + */ | |
| 1915 + | |
| 1916 +/* Find the next word in zText and place the endpoints in pzWord*. | |
| 1917 + * Returns true if the word is non-empty. "Word" is defined as | |
| 1918 + * ASCII alphanumeric plus '_' at this time. | |
| 1919 + */ | |
| 1920 +static int findWord(const char *zText, | |
| 1921 + const char **pzWordStart, const char **pzWordEnd){ | |
| 1922 + int r; | |
| 1923 + while( ascii_isspace(*zText) ){ | |
| 1924 + zText++; | |
| 1925 + } | |
| 1926 + *pzWordStart = zText; | |
| 1927 + while( ascii_isalnum(*zText) || *zText=='_' ){ | |
| 1928 + zText++; | |
| 1929 + } | |
| 1930 + r = zText>*pzWordStart; /* In case pzWordStart==pzWordEnd */ | |
| 1931 + *pzWordEnd = zText; | |
| 1932 + return r; | |
| 1933 +} | |
| 1934 + | |
| 1935 +/* Return true if the next word in zText is zWord, also setting | |
| 1936 + * *pzContinue to the character after the word. | |
| 1937 + */ | |
| 1938 +static int expectWord(const char *zText, const char *zWord, | |
| 1939 + const char **pzContinue){ | |
| 1940 + const char *zWordStart, *zWordEnd; | |
| 1941 + if( findWord(zText, &zWordStart, &zWordEnd) && | |
| 1942 + ascii_strncasecmp(zWord, zWordStart, zWordEnd - zWordStart)==0 ){ | |
| 1943 + *pzContinue = zWordEnd; | |
| 1944 + return 1; | |
| 1945 + } | |
| 1946 + return 0; | |
| 1947 +} | |
| 1948 + | |
| 1949 +/* Parse the name and type information out of parameter. In case of | |
| 1950 + * success, *pzNameStart/End contain the name of the column, | |
| 1951 + * *pzTypeStart/End contain the top-level type, and *pTypeMask has the | |
| 1952 + * type mask to use for the column. | |
| 1953 + */ | |
| 1954 +static int findNameAndType(const char *parameter, | |
| 1955 + const char **pzNameStart, const char **pzNameEnd, | |
| 1956 + const char **pzTypeStart, const char **pzTypeEnd, | |
| 1957 + unsigned char *pTypeMask){ | |
| 1958 + unsigned nNameLen; /* Length of found name. */ | |
| 1959 + const char *zEnd; /* Current end of parsed column information. */ | |
| 1960 + int bNotNull; /* Non-zero if NULL is not allowed for name. */ | |
| 1961 + int bStrict; /* Non-zero if column requires exact type match. */ | |
| 1962 + const char *zDummy; /* Dummy parameter, result unused. */ | |
| 1963 + unsigned i; | |
| 1964 + | |
| 1965 + /* strictMask is used for STRICT, strictMask|otherMask if STRICT is | |
| 1966 + * not supplied. zReplace provides an alternate type to expose to | |
| 1967 + * the caller. | |
| 1968 + */ | |
| 1969 + static struct { | |
| 1970 + const char *zName; | |
| 1971 + unsigned char strictMask; | |
| 1972 + unsigned char otherMask; | |
| 1973 + const char *zReplace; | |
| 1974 + } kTypeInfo[] = { | |
| 1975 + { "ANY", | |
| 1976 + MASK_INTEGER | MASK_FLOAT | MASK_BLOB | MASK_TEXT | MASK_NULL, | |
| 1977 + 0, "", | |
| 1978 + }, | |
| 1979 + { "ROWID", MASK_INTEGER | MASK_ROWID, 0, "INTEGER", }, | |
| 1980 + { "INTEGER", MASK_INTEGER | MASK_NULL, 0, NULL, }, | |
| 1981 + { "FLOAT", MASK_FLOAT | MASK_NULL, MASK_INTEGER, NULL, }, | |
| 1982 + { "NUMERIC", MASK_INTEGER | MASK_FLOAT | MASK_NULL, MASK_TEXT, NULL, }, | |
| 1983 + { "TEXT", MASK_TEXT | MASK_NULL, MASK_BLOB, NULL, }, | |
| 1984 + { "BLOB", MASK_BLOB | MASK_NULL, 0, NULL, }, | |
| 1985 + }; | |
| 1986 + | |
| 1987 + if( !findWord(parameter, pzNameStart, pzNameEnd) ){ | |
| 1988 + return SQLITE_MISUSE; | |
| 1989 + } | |
| 1990 + | |
| 1991 + /* Manifest typing, accept any storage type. */ | |
| 1992 + if( !findWord(*pzNameEnd, pzTypeStart, pzTypeEnd) ){ | |
| 1993 + *pzTypeEnd = *pzTypeStart = ""; | |
| 1994 + *pTypeMask = MASK_INTEGER | MASK_FLOAT | MASK_BLOB | MASK_TEXT | MASK_NULL; | |
| 1995 + return SQLITE_OK; | |
| 1996 + } | |
| 1997 + | |
| 1998 + nNameLen = *pzTypeEnd - *pzTypeStart; | |
| 1999 + for( i=0; i<ArraySize(kTypeInfo); ++i ){ | |
| 2000 + if( ascii_strncasecmp(kTypeInfo[i].zName, *pzTypeStart, nNameLen)==0 ){ | |
| 2001 + break; | |
| 2002 + } | |
| 2003 + } | |
| 2004 + if( i==ArraySize(kTypeInfo) ){ | |
| 2005 + return SQLITE_MISUSE; | |
| 2006 + } | |
| 2007 + | |
| 2008 + zEnd = *pzTypeEnd; | |
| 2009 + bStrict = 0; | |
| 2010 + if( expectWord(zEnd, "STRICT", &zEnd) ){ | |
| 2011 + /* TODO(shess): Ick. But I don't want another single-purpose | |
| 2012 + * flag, either. | |
| 2013 + */ | |
| 2014 + if( kTypeInfo[i].zReplace && !kTypeInfo[i].zReplace[0] ){ | |
| 2015 + return SQLITE_MISUSE; | |
| 2016 + } | |
| 2017 + bStrict = 1; | |
| 2018 + } | |
| 2019 + | |
| 2020 + bNotNull = 0; | |
| 2021 + if( expectWord(zEnd, "NOT", &zEnd) ){ | |
| 2022 + if( expectWord(zEnd, "NULL", &zEnd) ){ | |
| 2023 + bNotNull = 1; | |
| 2024 + }else{ | |
| 2025 + /* Anything other than NULL after NOT is an error. */ | |
| 2026 + return SQLITE_MISUSE; | |
| 2027 + } | |
| 2028 + } | |
| 2029 + | |
| 2030 + /* Anything else is an error. */ | |
| 2031 + if( findWord(zEnd, &zDummy, &zDummy) ){ | |
| 2032 + return SQLITE_MISUSE; | |
| 2033 + } | |
| 2034 + | |
| 2035 + *pTypeMask = kTypeInfo[i].strictMask; | |
| 2036 + if( !bStrict ){ | |
| 2037 + *pTypeMask |= kTypeInfo[i].otherMask; | |
| 2038 + } | |
| 2039 + if( bNotNull ){ | |
| 2040 + *pTypeMask &= ~MASK_NULL; | |
| 2041 + } | |
| 2042 + if( kTypeInfo[i].zReplace ){ | |
| 2043 + *pzTypeStart = kTypeInfo[i].zReplace; | |
| 2044 + *pzTypeEnd = *pzTypeStart + strlen(*pzTypeStart); | |
| 2045 + } | |
| 2046 + return SQLITE_OK; | |
| 2047 +} | |
| 2048 + | |
| 2049 +/* Parse the arguments, placing type masks in *pTypes and the exposed | |
| 2050 + * schema in *pzCreateSql (for sqlite3_declare_vtab). | |
| 2051 + */ | |
| 2052 +static int ParseColumnsAndGenerateCreate(unsigned nCols, | |
| 2053 + const char *const *pCols, | |
| 2054 + char **pzCreateSql, | |
| 2055 + unsigned char *pTypes, | |
| 2056 + char **pzErr){ | |
| 2057 + unsigned i; | |
| 2058 + char *zCreateSql = sqlite3_mprintf("CREATE TABLE x("); | |
| 2059 + if( !zCreateSql ){ | |
| 2060 + return SQLITE_NOMEM; | |
| 2061 + } | |
| 2062 + | |
| 2063 + for( i=0; i<nCols; i++ ){ | |
| 2064 + const char *zSep = (i < nCols - 1 ? ", " : ")"); | |
| 2065 + const char *zNotNull = ""; | |
| 2066 + const char *zNameStart, *zNameEnd; | |
| 2067 + const char *zTypeStart, *zTypeEnd; | |
| 2068 + int rc = findNameAndType(pCols[i], | |
| 2069 + &zNameStart, &zNameEnd, | |
| 2070 + &zTypeStart, &zTypeEnd, | |
| 2071 + &pTypes[i]); | |
| 2072 + if( rc!=SQLITE_OK ){ | |
| 2073 + *pzErr = sqlite3_mprintf("unable to parse column %d", i); | |
| 2074 + sqlite3_free(zCreateSql); | |
| 2075 + return rc; | |
| 2076 + } | |
| 2077 + | |
| 2078 + if( !(pTypes[i]&MASK_NULL) ){ | |
| 2079 + zNotNull = " NOT NULL"; | |
| 2080 + } | |
| 2081 + | |
| 2082 + /* Add name and type to the create statement. */ | |
| 2083 + zCreateSql = sqlite3_mprintf("%z%.*s %.*s%s%s", | |
| 2084 + zCreateSql, | |
| 2085 + zNameEnd - zNameStart, zNameStart, | |
| 2086 + zTypeEnd - zTypeStart, zTypeStart, | |
| 2087 + zNotNull, zSep); | |
| 2088 + if( !zCreateSql ){ | |
| 2089 + return SQLITE_NOMEM; | |
| 2090 + } | |
| 2091 + } | |
| 2092 + | |
| 2093 + *pzCreateSql = zCreateSql; | |
| 2094 + return SQLITE_OK; | |
| 2095 +} | |
| 2096 + | |
| 2097 +/* Helper function for initializing the module. */ | |
| 2098 +/* argv[0] module name | |
| 2099 + * argv[1] db name for virtual table | |
| 2100 + * argv[2] virtual table name | |
| 2101 + * argv[3] backing table name | |
| 2102 + * argv[4] columns | |
| 2103 + */ | |
| 2104 +/* TODO(shess): Since connect isn't supported, could inline into | |
| 2105 + * recoverCreate(). | |
| 2106 + */ | |
| 2107 +/* TODO(shess): Explore cases where it would make sense to set *pzErr. */ | |
| 2108 +static int recoverInit( | |
| 2109 + sqlite3 *db, /* Database connection */ | |
| 2110 + void *pAux, /* unused */ | |
| 2111 + int argc, const char *const*argv, /* Parameters to CREATE TABLE statement *
/ | |
| 2112 + sqlite3_vtab **ppVtab, /* OUT: New virtual table */ | |
| 2113 + char **pzErr /* OUT: Error message, if any */ | |
| 2114 +){ | |
| 2115 + const unsigned kTypeCol = 4; /* First argument with column type info. */ | |
| 2116 + Recover *pRecover; /* Virtual table structure being created. */ | |
| 2117 + char *zDot; /* Any dot found in "db.table" backing. */ | |
| 2118 + u32 iRootPage; /* Root page of backing table. */ | |
| 2119 + char *zCreateSql; /* Schema of created virtual table. */ | |
| 2120 + int rc; | |
| 2121 + | |
| 2122 + /* Require to be in the temp database. */ | |
| 2123 + if( ascii_strcasecmp(argv[1], "temp")!=0 ){ | |
| 2124 + *pzErr = sqlite3_mprintf("recover table must be in temp database"); | |
| 2125 + return SQLITE_MISUSE; | |
| 2126 + } | |
| 2127 + | |
| 2128 + /* Need the backing table and at least one column. */ | |
| 2129 + if( argc<=kTypeCol ){ | |
| 2130 + *pzErr = sqlite3_mprintf("no columns specified"); | |
| 2131 + return SQLITE_MISUSE; | |
| 2132 + } | |
| 2133 + | |
| 2134 + pRecover = sqlite3_malloc(sizeof(Recover)); | |
| 2135 + if( !pRecover ){ | |
| 2136 + return SQLITE_NOMEM; | |
| 2137 + } | |
| 2138 + memset(pRecover, 0, sizeof(*pRecover)); | |
| 2139 + pRecover->base.pModule = &recoverModule; | |
| 2140 + pRecover->db = db; | |
| 2141 + | |
| 2142 + /* Parse out db.table, assuming main if no dot. */ | |
| 2143 + zDot = strchr(argv[3], '.'); | |
| 2144 + if( !zDot ){ | |
| 2145 + pRecover->zDb = sqlite3_strdup(db->aDb[0].zName); | |
| 2146 + pRecover->zTable = sqlite3_strdup(argv[3]); | |
| 2147 + }else if( zDot>argv[3] && zDot[1]!='\0' ){ | |
| 2148 + pRecover->zDb = sqlite3_strndup(argv[3], zDot - argv[3]); | |
| 2149 + pRecover->zTable = sqlite3_strdup(zDot + 1); | |
| 2150 + }else{ | |
| 2151 + /* ".table" or "db." not allowed. */ | |
| 2152 + *pzErr = sqlite3_mprintf("ill-formed table specifier"); | |
| 2153 + recoverRelease(pRecover); | |
| 2154 + return SQLITE_ERROR; | |
| 2155 + } | |
| 2156 + | |
| 2157 + pRecover->nCols = argc - kTypeCol; | |
| 2158 + pRecover->pTypes = sqlite3_malloc(pRecover->nCols); | |
| 2159 + if( !pRecover->zDb || !pRecover->zTable || !pRecover->pTypes ){ | |
| 2160 + recoverRelease(pRecover); | |
| 2161 + return SQLITE_NOMEM; | |
| 2162 + } | |
| 2163 + | |
| 2164 + /* Require the backing table to exist. */ | |
| 2165 + /* TODO(shess): Be more pedantic about the form of the descriptor | |
| 2166 + * string. This already fails for poorly-formed strings, simply | |
| 2167 + * because there won't be a root page, but it would make more sense | |
| 2168 + * to be explicit. | |
| 2169 + */ | |
| 2170 + rc = getRootPage(pRecover->db, pRecover->zDb, pRecover->zTable, &iRootPage); | |
| 2171 + if( rc!=SQLITE_OK ){ | |
| 2172 + *pzErr = sqlite3_mprintf("unable to find backing table"); | |
| 2173 + recoverRelease(pRecover); | |
| 2174 + return rc; | |
| 2175 + } | |
| 2176 + | |
| 2177 + /* Parse the column definitions. */ | |
| 2178 + rc = ParseColumnsAndGenerateCreate(pRecover->nCols, argv + kTypeCol, | |
| 2179 + &zCreateSql, pRecover->pTypes, pzErr); | |
| 2180 + if( rc!=SQLITE_OK ){ | |
| 2181 + recoverRelease(pRecover); | |
| 2182 + return rc; | |
| 2183 + } | |
| 2184 + | |
| 2185 + rc = sqlite3_declare_vtab(db, zCreateSql); | |
| 2186 + sqlite3_free(zCreateSql); | |
| 2187 + if( rc!=SQLITE_OK ){ | |
| 2188 + recoverRelease(pRecover); | |
| 2189 + return rc; | |
| 2190 + } | |
| 2191 + | |
| 2192 + *ppVtab = (sqlite3_vtab *)pRecover; | |
| 2193 + return SQLITE_OK; | |
| 2194 +} | |
| OLD | NEW |