OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ** 2016-05-28 |
| 3 ** |
| 4 ** The author disclaims copyright to this source code. In place of |
| 5 ** a legal notice, here is a blessing: |
| 6 ** |
| 7 ** May you do good and not evil. |
| 8 ** May you find forgiveness for yourself and forgive others. |
| 9 ** May you share freely, never taking more than you give. |
| 10 ** |
| 11 ****************************************************************************** |
| 12 ** |
| 13 ** This file contains the implementation of an SQLite virtual table for |
| 14 ** reading CSV files. |
| 15 ** |
| 16 ** Usage: |
| 17 ** |
| 18 ** .load ./csv |
| 19 ** CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME); |
| 20 ** SELECT * FROM csv; |
| 21 ** |
| 22 ** The columns are named "c1", "c2", "c3", ... by default. But the |
| 23 ** application can define its own CREATE TABLE statement as an additional |
| 24 ** parameter. For example: |
| 25 ** |
| 26 ** CREATE VIRTUAL TABLE temp.csv2 USING csv( |
| 27 ** filename = "../http.log", |
| 28 ** schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)" |
| 29 ** ); |
| 30 ** |
| 31 ** Instead of specifying a file, the text of the CSV can be loaded using |
| 32 ** the data= parameter. |
| 33 ** |
| 34 ** If the columns=N parameter is supplied, then the CSV file is assumed to have |
| 35 ** N columns. If the columns parameter is omitted, the CSV file is opened |
| 36 ** as soon as the virtual table is constructed and the first row of the CSV |
| 37 ** is read in order to count the tables. |
| 38 ** |
| 39 ** Some extra debugging features (used for testing virtual tables) are available |
| 40 ** if this module is compiled with -DSQLITE_TEST. |
| 41 */ |
| 42 #include <sqlite3ext.h> |
| 43 SQLITE_EXTENSION_INIT1 |
| 44 #include <string.h> |
| 45 #include <stdlib.h> |
| 46 #include <assert.h> |
| 47 #include <stdarg.h> |
| 48 #include <ctype.h> |
| 49 #include <stdio.h> |
| 50 |
| 51 #ifndef SQLITE_OMIT_VIRTUALTABLE |
| 52 |
| 53 /* |
| 54 ** A macro to hint to the compiler that a function should not be |
| 55 ** inlined. |
| 56 */ |
| 57 #if defined(__GNUC__) |
| 58 # define CSV_NOINLINE __attribute__((noinline)) |
| 59 #elif defined(_MSC_VER) && _MSC_VER>=1310 |
| 60 # define CSV_NOINLINE __declspec(noinline) |
| 61 #else |
| 62 # define CSV_NOINLINE |
| 63 #endif |
| 64 |
| 65 |
| 66 /* Max size of the error message in a CsvReader */ |
| 67 #define CSV_MXERR 200 |
| 68 |
| 69 /* Size of the CsvReader input buffer */ |
| 70 #define CSV_INBUFSZ 1024 |
| 71 |
| 72 /* A context object used when read a CSV file. */ |
| 73 typedef struct CsvReader CsvReader; |
| 74 struct CsvReader { |
| 75 FILE *in; /* Read the CSV text from this input stream */ |
| 76 char *z; /* Accumulated text for a field */ |
| 77 int n; /* Number of bytes in z */ |
| 78 int nAlloc; /* Space allocated for z[] */ |
| 79 int nLine; /* Current line number */ |
| 80 char cTerm; /* Character that terminated the most recent field */ |
| 81 size_t iIn; /* Next unread character in the input buffer */ |
| 82 size_t nIn; /* Number of characters in the input buffer */ |
| 83 char *zIn; /* The input buffer */ |
| 84 char zErr[CSV_MXERR]; /* Error message */ |
| 85 }; |
| 86 |
| 87 /* Initialize a CsvReader object */ |
| 88 static void csv_reader_init(CsvReader *p){ |
| 89 p->in = 0; |
| 90 p->z = 0; |
| 91 p->n = 0; |
| 92 p->nAlloc = 0; |
| 93 p->nLine = 0; |
| 94 p->nIn = 0; |
| 95 p->zIn = 0; |
| 96 p->zErr[0] = 0; |
| 97 } |
| 98 |
| 99 /* Close and reset a CsvReader object */ |
| 100 static void csv_reader_reset(CsvReader *p){ |
| 101 if( p->in ){ |
| 102 fclose(p->in); |
| 103 sqlite3_free(p->zIn); |
| 104 } |
| 105 sqlite3_free(p->z); |
| 106 csv_reader_init(p); |
| 107 } |
| 108 |
| 109 /* Report an error on a CsvReader */ |
| 110 static void csv_errmsg(CsvReader *p, const char *zFormat, ...){ |
| 111 va_list ap; |
| 112 va_start(ap, zFormat); |
| 113 sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap); |
| 114 va_end(ap); |
| 115 } |
| 116 |
| 117 /* Open the file associated with a CsvReader |
| 118 ** Return the number of errors. |
| 119 */ |
| 120 static int csv_reader_open( |
| 121 CsvReader *p, /* The reader to open */ |
| 122 const char *zFilename, /* Read from this filename */ |
| 123 const char *zData /* ... or use this data */ |
| 124 ){ |
| 125 if( zFilename ){ |
| 126 p->zIn = sqlite3_malloc( CSV_INBUFSZ ); |
| 127 if( p->zIn==0 ){ |
| 128 csv_errmsg(p, "out of memory"); |
| 129 return 1; |
| 130 } |
| 131 p->in = fopen(zFilename, "rb"); |
| 132 if( p->in==0 ){ |
| 133 csv_reader_reset(p); |
| 134 csv_errmsg(p, "cannot open '%s' for reading", zFilename); |
| 135 return 1; |
| 136 } |
| 137 }else{ |
| 138 assert( p->in==0 ); |
| 139 p->zIn = (char*)zData; |
| 140 p->nIn = strlen(zData); |
| 141 } |
| 142 return 0; |
| 143 } |
| 144 |
| 145 /* The input buffer has overflowed. Refill the input buffer, then |
| 146 ** return the next character |
| 147 */ |
| 148 static CSV_NOINLINE int csv_getc_refill(CsvReader *p){ |
| 149 size_t got; |
| 150 |
| 151 assert( p->iIn>=p->nIn ); /* Only called on an empty input buffer */ |
| 152 assert( p->in!=0 ); /* Only called if reading froma file */ |
| 153 |
| 154 got = fread(p->zIn, 1, CSV_INBUFSZ, p->in); |
| 155 if( got==0 ) return EOF; |
| 156 p->nIn = got; |
| 157 p->iIn = 1; |
| 158 return p->zIn[0]; |
| 159 } |
| 160 |
| 161 /* Return the next character of input. Return EOF at end of input. */ |
| 162 static int csv_getc(CsvReader *p){ |
| 163 if( p->iIn >= p->nIn ){ |
| 164 if( p->in!=0 ) return csv_getc_refill(p); |
| 165 return EOF; |
| 166 } |
| 167 return p->zIn[p->iIn++]; |
| 168 } |
| 169 |
| 170 /* Increase the size of p->z and append character c to the end. |
| 171 ** Return 0 on success and non-zero if there is an OOM error */ |
| 172 static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){ |
| 173 char *zNew; |
| 174 int nNew = p->nAlloc*2 + 100; |
| 175 zNew = sqlite3_realloc64(p->z, nNew); |
| 176 if( zNew ){ |
| 177 p->z = zNew; |
| 178 p->nAlloc = nNew; |
| 179 p->z[p->n++] = c; |
| 180 return 0; |
| 181 }else{ |
| 182 csv_errmsg(p, "out of memory"); |
| 183 return 1; |
| 184 } |
| 185 } |
| 186 |
| 187 /* Append a single character to the CsvReader.z[] array. |
| 188 ** Return 0 on success and non-zero if there is an OOM error */ |
| 189 static int csv_append(CsvReader *p, char c){ |
| 190 if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c); |
| 191 p->z[p->n++] = c; |
| 192 return 0; |
| 193 } |
| 194 |
| 195 /* Read a single field of CSV text. Compatible with rfc4180 and extended |
| 196 ** with the option of having a separator other than ",". |
| 197 ** |
| 198 ** + Input comes from p->in. |
| 199 ** + Store results in p->z of length p->n. Space to hold p->z comes |
| 200 ** from sqlite3_malloc64(). |
| 201 ** + Keep track of the line number in p->nLine. |
| 202 ** + Store the character that terminates the field in p->cTerm. Store |
| 203 ** EOF on end-of-file. |
| 204 ** |
| 205 ** Return "" at EOF. Return 0 on an OOM error. |
| 206 */ |
| 207 static char *csv_read_one_field(CsvReader *p){ |
| 208 int c; |
| 209 p->n = 0; |
| 210 c = csv_getc(p); |
| 211 if( c==EOF ){ |
| 212 p->cTerm = EOF; |
| 213 return ""; |
| 214 } |
| 215 if( c=='"' ){ |
| 216 int pc, ppc; |
| 217 int startLine = p->nLine; |
| 218 pc = ppc = 0; |
| 219 while( 1 ){ |
| 220 c = csv_getc(p); |
| 221 if( c<='"' || pc=='"' ){ |
| 222 if( c=='\n' ) p->nLine++; |
| 223 if( c=='"' ){ |
| 224 if( pc=='"' ){ |
| 225 pc = 0; |
| 226 continue; |
| 227 } |
| 228 } |
| 229 if( (c==',' && pc=='"') |
| 230 || (c=='\n' && pc=='"') |
| 231 || (c=='\n' && pc=='\r' && ppc=='"') |
| 232 || (c==EOF && pc=='"') |
| 233 ){ |
| 234 do{ p->n--; }while( p->z[p->n]!='"' ); |
| 235 p->cTerm = (char)c; |
| 236 break; |
| 237 } |
| 238 if( pc=='"' && c!='\r' ){ |
| 239 csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"'); |
| 240 break; |
| 241 } |
| 242 if( c==EOF ){ |
| 243 csv_errmsg(p, "line %d: unterminated %c-quoted field\n", |
| 244 startLine, '"'); |
| 245 p->cTerm = (char)c; |
| 246 break; |
| 247 } |
| 248 } |
| 249 if( csv_append(p, (char)c) ) return 0; |
| 250 ppc = pc; |
| 251 pc = c; |
| 252 } |
| 253 }else{ |
| 254 while( c>',' || (c!=EOF && c!=',' && c!='\n') ){ |
| 255 if( csv_append(p, (char)c) ) return 0; |
| 256 c = csv_getc(p); |
| 257 } |
| 258 if( c=='\n' ){ |
| 259 p->nLine++; |
| 260 if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--; |
| 261 } |
| 262 p->cTerm = (char)c; |
| 263 } |
| 264 if( p->z ) p->z[p->n] = 0; |
| 265 return p->z; |
| 266 } |
| 267 |
| 268 |
| 269 /* Forward references to the various virtual table methods implemented |
| 270 ** in this file. */ |
| 271 static int csvtabCreate(sqlite3*, void*, int, const char*const*, |
| 272 sqlite3_vtab**,char**); |
| 273 static int csvtabConnect(sqlite3*, void*, int, const char*const*, |
| 274 sqlite3_vtab**,char**); |
| 275 static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*); |
| 276 static int csvtabDisconnect(sqlite3_vtab*); |
| 277 static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**); |
| 278 static int csvtabClose(sqlite3_vtab_cursor*); |
| 279 static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr, |
| 280 int argc, sqlite3_value **argv); |
| 281 static int csvtabNext(sqlite3_vtab_cursor*); |
| 282 static int csvtabEof(sqlite3_vtab_cursor*); |
| 283 static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int); |
| 284 static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*); |
| 285 |
| 286 /* An instance of the CSV virtual table */ |
| 287 typedef struct CsvTable { |
| 288 sqlite3_vtab base; /* Base class. Must be first */ |
| 289 char *zFilename; /* Name of the CSV file */ |
| 290 char *zData; /* Raw CSV data in lieu of zFilename */ |
| 291 long iStart; /* Offset to start of data in zFilename */ |
| 292 int nCol; /* Number of columns in the CSV file */ |
| 293 unsigned int tstFlags; /* Bit values used for testing */ |
| 294 } CsvTable; |
| 295 |
| 296 /* Allowed values for tstFlags */ |
| 297 #define CSVTEST_FIDX 0x0001 /* Pretend that constrained searchs cost less*
/ |
| 298 |
| 299 /* A cursor for the CSV virtual table */ |
| 300 typedef struct CsvCursor { |
| 301 sqlite3_vtab_cursor base; /* Base class. Must be first */ |
| 302 CsvReader rdr; /* The CsvReader object */ |
| 303 char **azVal; /* Value of the current row */ |
| 304 int *aLen; /* Length of each entry */ |
| 305 sqlite3_int64 iRowid; /* The current rowid. Negative for EOF */ |
| 306 } CsvCursor; |
| 307 |
| 308 /* Transfer error message text from a reader into a CsvTable */ |
| 309 static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){ |
| 310 sqlite3_free(pTab->base.zErrMsg); |
| 311 pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr); |
| 312 } |
| 313 |
| 314 /* |
| 315 ** This method is the destructor fo a CsvTable object. |
| 316 */ |
| 317 static int csvtabDisconnect(sqlite3_vtab *pVtab){ |
| 318 CsvTable *p = (CsvTable*)pVtab; |
| 319 sqlite3_free(p->zFilename); |
| 320 sqlite3_free(p->zData); |
| 321 sqlite3_free(p); |
| 322 return SQLITE_OK; |
| 323 } |
| 324 |
| 325 /* Skip leading whitespace. Return a pointer to the first non-whitespace |
| 326 ** character, or to the zero terminator if the string has only whitespace */ |
| 327 static const char *csv_skip_whitespace(const char *z){ |
| 328 while( isspace((unsigned char)z[0]) ) z++; |
| 329 return z; |
| 330 } |
| 331 |
| 332 /* Remove trailing whitespace from the end of string z[] */ |
| 333 static void csv_trim_whitespace(char *z){ |
| 334 size_t n = strlen(z); |
| 335 while( n>0 && isspace((unsigned char)z[n]) ) n--; |
| 336 z[n] = 0; |
| 337 } |
| 338 |
| 339 /* Dequote the string */ |
| 340 static void csv_dequote(char *z){ |
| 341 int j; |
| 342 char cQuote = z[0]; |
| 343 size_t i, n; |
| 344 |
| 345 if( cQuote!='\'' && cQuote!='"' ) return; |
| 346 n = strlen(z); |
| 347 if( n<2 || z[n-1]!=z[0] ) return; |
| 348 for(i=1, j=0; i<n-1; i++){ |
| 349 if( z[i]==cQuote && z[i+1]==cQuote ) i++; |
| 350 z[j++] = z[i]; |
| 351 } |
| 352 z[j] = 0; |
| 353 } |
| 354 |
| 355 /* Check to see if the string is of the form: "TAG = VALUE" with optional |
| 356 ** whitespace before and around tokens. If it is, return a pointer to the |
| 357 ** first character of VALUE. If it is not, return NULL. |
| 358 */ |
| 359 static const char *csv_parameter(const char *zTag, int nTag, const char *z){ |
| 360 z = csv_skip_whitespace(z); |
| 361 if( strncmp(zTag, z, nTag)!=0 ) return 0; |
| 362 z = csv_skip_whitespace(z+nTag); |
| 363 if( z[0]!='=' ) return 0; |
| 364 return csv_skip_whitespace(z+1); |
| 365 } |
| 366 |
| 367 /* Decode a parameter that requires a dequoted string. |
| 368 ** |
| 369 ** Return 1 if the parameter is seen, or 0 if not. 1 is returned |
| 370 ** even if there is an error. If an error occurs, then an error message |
| 371 ** is left in p->zErr. If there are no errors, p->zErr[0]==0. |
| 372 */ |
| 373 static int csv_string_parameter( |
| 374 CsvReader *p, /* Leave the error message here, if there is one */ |
| 375 const char *zParam, /* Parameter we are checking for */ |
| 376 const char *zArg, /* Raw text of the virtual table argment */ |
| 377 char **pzVal /* Write the dequoted string value here */ |
| 378 ){ |
| 379 const char *zValue; |
| 380 zValue = csv_parameter(zParam,(int)strlen(zParam),zArg); |
| 381 if( zValue==0 ) return 0; |
| 382 p->zErr[0] = 0; |
| 383 if( *pzVal ){ |
| 384 csv_errmsg(p, "more than one '%s' parameter", zParam); |
| 385 return 1; |
| 386 } |
| 387 *pzVal = sqlite3_mprintf("%s", zValue); |
| 388 if( *pzVal==0 ){ |
| 389 csv_errmsg(p, "out of memory"); |
| 390 return 1; |
| 391 } |
| 392 csv_trim_whitespace(*pzVal); |
| 393 csv_dequote(*pzVal); |
| 394 return 1; |
| 395 } |
| 396 |
| 397 |
| 398 /* Return 0 if the argument is false and 1 if it is true. Return -1 if |
| 399 ** we cannot really tell. |
| 400 */ |
| 401 static int csv_boolean(const char *z){ |
| 402 if( sqlite3_stricmp("yes",z)==0 |
| 403 || sqlite3_stricmp("on",z)==0 |
| 404 || sqlite3_stricmp("true",z)==0 |
| 405 || (z[0]=='1' && z[1]==0) |
| 406 ){ |
| 407 return 1; |
| 408 } |
| 409 if( sqlite3_stricmp("no",z)==0 |
| 410 || sqlite3_stricmp("off",z)==0 |
| 411 || sqlite3_stricmp("false",z)==0 |
| 412 || (z[0]=='0' && z[1]==0) |
| 413 ){ |
| 414 return 0; |
| 415 } |
| 416 return -1; |
| 417 } |
| 418 |
| 419 |
| 420 /* |
| 421 ** Parameters: |
| 422 ** filename=FILENAME Name of file containing CSV content |
| 423 ** data=TEXT Direct CSV content. |
| 424 ** schema=SCHEMA Alternative CSV schema. |
| 425 ** header=YES|NO First row of CSV defines the names of |
| 426 ** columns if "yes". Default "no". |
| 427 ** columns=N Assume the CSV file contains N columns. |
| 428 ** |
| 429 ** Only available if compiled with SQLITE_TEST: |
| 430 ** |
| 431 ** testflags=N Bitmask of test flags. Optional |
| 432 ** |
| 433 ** If schema= is omitted, then the columns are named "c0", "c1", "c2", |
| 434 ** and so forth. If columns=N is omitted, then the file is opened and |
| 435 ** the number of columns in the first row is counted to determine the |
| 436 ** column count. If header=YES, then the first row is skipped. |
| 437 */ |
| 438 static int csvtabConnect( |
| 439 sqlite3 *db, |
| 440 void *pAux, |
| 441 int argc, const char *const*argv, |
| 442 sqlite3_vtab **ppVtab, |
| 443 char **pzErr |
| 444 ){ |
| 445 CsvTable *pNew = 0; /* The CsvTable object to construct */ |
| 446 int bHeader = -1; /* header= flags. -1 means not seen yet */ |
| 447 int rc = SQLITE_OK; /* Result code from this routine */ |
| 448 int i, j; /* Loop counters */ |
| 449 #ifdef SQLITE_TEST |
| 450 int tstFlags = 0; /* Value for testflags=N parameter */ |
| 451 #endif |
| 452 int nCol = -99; /* Value of the columns= parameter */ |
| 453 CsvReader sRdr; /* A CSV file reader used to store an error |
| 454 ** message and/or to count the number of columns */ |
| 455 static const char *azParam[] = { |
| 456 "filename", "data", "schema", |
| 457 }; |
| 458 char *azPValue[3]; /* Parameter values */ |
| 459 # define CSV_FILENAME (azPValue[0]) |
| 460 # define CSV_DATA (azPValue[1]) |
| 461 # define CSV_SCHEMA (azPValue[2]) |
| 462 |
| 463 |
| 464 assert( sizeof(azPValue)==sizeof(azParam) ); |
| 465 memset(&sRdr, 0, sizeof(sRdr)); |
| 466 memset(azPValue, 0, sizeof(azPValue)); |
| 467 for(i=3; i<argc; i++){ |
| 468 const char *z = argv[i]; |
| 469 const char *zValue; |
| 470 for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){ |
| 471 if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break; |
| 472 } |
| 473 if( j<sizeof(azParam)/sizeof(azParam[0]) ){ |
| 474 if( sRdr.zErr[0] ) goto csvtab_connect_error; |
| 475 }else |
| 476 if( (zValue = csv_parameter("header",6,z))!=0 ){ |
| 477 int x; |
| 478 if( bHeader>=0 ){ |
| 479 csv_errmsg(&sRdr, "more than one 'header' parameter"); |
| 480 goto csvtab_connect_error; |
| 481 } |
| 482 x = csv_boolean(zValue); |
| 483 if( x==1 ){ |
| 484 bHeader = 1; |
| 485 }else if( x==0 ){ |
| 486 bHeader = 0; |
| 487 }else{ |
| 488 csv_errmsg(&sRdr, "unrecognized argument to 'header': %s", zValue); |
| 489 goto csvtab_connect_error; |
| 490 } |
| 491 }else |
| 492 #ifdef SQLITE_TEST |
| 493 if( (zValue = csv_parameter("testflags",9,z))!=0 ){ |
| 494 tstFlags = (unsigned int)atoi(zValue); |
| 495 }else |
| 496 #endif |
| 497 if( (zValue = csv_parameter("columns",7,z))!=0 ){ |
| 498 if( nCol>0 ){ |
| 499 csv_errmsg(&sRdr, "more than one 'columns' parameter"); |
| 500 goto csvtab_connect_error; |
| 501 } |
| 502 nCol = atoi(zValue); |
| 503 if( nCol<=0 ){ |
| 504 csv_errmsg(&sRdr, "must have at least one column"); |
| 505 goto csvtab_connect_error; |
| 506 } |
| 507 }else |
| 508 { |
| 509 csv_errmsg(&sRdr, "unrecognized parameter '%s'", z); |
| 510 goto csvtab_connect_error; |
| 511 } |
| 512 } |
| 513 if( (CSV_FILENAME==0)==(CSV_DATA==0) ){ |
| 514 csv_errmsg(&sRdr, "must either filename= or data= but not both"); |
| 515 goto csvtab_connect_error; |
| 516 } |
| 517 if( nCol<=0 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA) ){ |
| 518 goto csvtab_connect_error; |
| 519 } |
| 520 pNew = sqlite3_malloc( sizeof(*pNew) ); |
| 521 *ppVtab = (sqlite3_vtab*)pNew; |
| 522 if( pNew==0 ) goto csvtab_connect_oom; |
| 523 memset(pNew, 0, sizeof(*pNew)); |
| 524 if( nCol>0 ){ |
| 525 pNew->nCol = nCol; |
| 526 }else{ |
| 527 do{ |
| 528 const char *z = csv_read_one_field(&sRdr); |
| 529 if( z==0 ) goto csvtab_connect_oom; |
| 530 pNew->nCol++; |
| 531 }while( sRdr.cTerm==',' ); |
| 532 } |
| 533 pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0; |
| 534 pNew->zData = CSV_DATA; CSV_DATA = 0; |
| 535 #ifdef SQLITE_TEST |
| 536 pNew->tstFlags = tstFlags; |
| 537 #endif |
| 538 pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0; |
| 539 csv_reader_reset(&sRdr); |
| 540 if( CSV_SCHEMA==0 ){ |
| 541 char *zSep = ""; |
| 542 CSV_SCHEMA = sqlite3_mprintf("CREATE TABLE x("); |
| 543 if( CSV_SCHEMA==0 ) goto csvtab_connect_oom; |
| 544 for(i=0; i<pNew->nCol; i++){ |
| 545 CSV_SCHEMA = sqlite3_mprintf("%z%sc%d TEXT",CSV_SCHEMA, zSep, i); |
| 546 zSep = ","; |
| 547 } |
| 548 CSV_SCHEMA = sqlite3_mprintf("%z);", CSV_SCHEMA); |
| 549 } |
| 550 rc = sqlite3_declare_vtab(db, CSV_SCHEMA); |
| 551 if( rc ) goto csvtab_connect_error; |
| 552 for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){ |
| 553 sqlite3_free(azPValue[i]); |
| 554 } |
| 555 return SQLITE_OK; |
| 556 |
| 557 csvtab_connect_oom: |
| 558 rc = SQLITE_NOMEM; |
| 559 csv_errmsg(&sRdr, "out of memory"); |
| 560 |
| 561 csvtab_connect_error: |
| 562 if( pNew ) csvtabDisconnect(&pNew->base); |
| 563 for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){ |
| 564 sqlite3_free(azPValue[i]); |
| 565 } |
| 566 if( sRdr.zErr[0] ){ |
| 567 sqlite3_free(*pzErr); |
| 568 *pzErr = sqlite3_mprintf("%s", sRdr.zErr); |
| 569 } |
| 570 csv_reader_reset(&sRdr); |
| 571 if( rc==SQLITE_OK ) rc = SQLITE_ERROR; |
| 572 return rc; |
| 573 } |
| 574 |
| 575 /* |
| 576 ** Reset the current row content held by a CsvCursor. |
| 577 */ |
| 578 static void csvtabCursorRowReset(CsvCursor *pCur){ |
| 579 CsvTable *pTab = (CsvTable*)pCur->base.pVtab; |
| 580 int i; |
| 581 for(i=0; i<pTab->nCol; i++){ |
| 582 sqlite3_free(pCur->azVal[i]); |
| 583 pCur->azVal[i] = 0; |
| 584 pCur->aLen[i] = 0; |
| 585 } |
| 586 } |
| 587 |
| 588 /* |
| 589 ** The xConnect and xCreate methods do the same thing, but they must be |
| 590 ** different so that the virtual table is not an eponymous virtual table. |
| 591 */ |
| 592 static int csvtabCreate( |
| 593 sqlite3 *db, |
| 594 void *pAux, |
| 595 int argc, const char *const*argv, |
| 596 sqlite3_vtab **ppVtab, |
| 597 char **pzErr |
| 598 ){ |
| 599 return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr); |
| 600 } |
| 601 |
| 602 /* |
| 603 ** Destructor for a CsvCursor. |
| 604 */ |
| 605 static int csvtabClose(sqlite3_vtab_cursor *cur){ |
| 606 CsvCursor *pCur = (CsvCursor*)cur; |
| 607 csvtabCursorRowReset(pCur); |
| 608 csv_reader_reset(&pCur->rdr); |
| 609 sqlite3_free(cur); |
| 610 return SQLITE_OK; |
| 611 } |
| 612 |
| 613 /* |
| 614 ** Constructor for a new CsvTable cursor object. |
| 615 */ |
| 616 static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ |
| 617 CsvTable *pTab = (CsvTable*)p; |
| 618 CsvCursor *pCur; |
| 619 size_t nByte; |
| 620 nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol; |
| 621 pCur = sqlite3_malloc64( nByte ); |
| 622 if( pCur==0 ) return SQLITE_NOMEM; |
| 623 memset(pCur, 0, nByte); |
| 624 pCur->azVal = (char**)&pCur[1]; |
| 625 pCur->aLen = (int*)&pCur->azVal[pTab->nCol]; |
| 626 *ppCursor = &pCur->base; |
| 627 if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){ |
| 628 csv_xfer_error(pTab, &pCur->rdr); |
| 629 return SQLITE_ERROR; |
| 630 } |
| 631 return SQLITE_OK; |
| 632 } |
| 633 |
| 634 |
| 635 /* |
| 636 ** Advance a CsvCursor to its next row of input. |
| 637 ** Set the EOF marker if we reach the end of input. |
| 638 */ |
| 639 static int csvtabNext(sqlite3_vtab_cursor *cur){ |
| 640 CsvCursor *pCur = (CsvCursor*)cur; |
| 641 CsvTable *pTab = (CsvTable*)cur->pVtab; |
| 642 int i = 0; |
| 643 char *z; |
| 644 do{ |
| 645 z = csv_read_one_field(&pCur->rdr); |
| 646 if( z==0 ){ |
| 647 csv_xfer_error(pTab, &pCur->rdr); |
| 648 break; |
| 649 } |
| 650 if( i<pTab->nCol ){ |
| 651 if( pCur->aLen[i] < pCur->rdr.n+1 ){ |
| 652 char *zNew = sqlite3_realloc64(pCur->azVal[i], pCur->rdr.n+1); |
| 653 if( zNew==0 ){ |
| 654 csv_errmsg(&pCur->rdr, "out of memory"); |
| 655 csv_xfer_error(pTab, &pCur->rdr); |
| 656 break; |
| 657 } |
| 658 pCur->azVal[i] = zNew; |
| 659 pCur->aLen[i] = pCur->rdr.n+1; |
| 660 } |
| 661 memcpy(pCur->azVal[i], z, pCur->rdr.n+1); |
| 662 i++; |
| 663 } |
| 664 }while( pCur->rdr.cTerm==',' ); |
| 665 while( i<pTab->nCol ){ |
| 666 sqlite3_free(pCur->azVal[i]); |
| 667 pCur->azVal[i] = 0; |
| 668 pCur->aLen[i] = 0; |
| 669 i++; |
| 670 } |
| 671 if( z==0 || pCur->rdr.cTerm==EOF ){ |
| 672 pCur->iRowid = -1; |
| 673 }else{ |
| 674 pCur->iRowid++; |
| 675 } |
| 676 return SQLITE_OK; |
| 677 } |
| 678 |
| 679 /* |
| 680 ** Return values of columns for the row at which the CsvCursor |
| 681 ** is currently pointing. |
| 682 */ |
| 683 static int csvtabColumn( |
| 684 sqlite3_vtab_cursor *cur, /* The cursor */ |
| 685 sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ |
| 686 int i /* Which column to return */ |
| 687 ){ |
| 688 CsvCursor *pCur = (CsvCursor*)cur; |
| 689 CsvTable *pTab = (CsvTable*)cur->pVtab; |
| 690 if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){ |
| 691 sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_STATIC); |
| 692 } |
| 693 return SQLITE_OK; |
| 694 } |
| 695 |
| 696 /* |
| 697 ** Return the rowid for the current row. |
| 698 */ |
| 699 static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ |
| 700 CsvCursor *pCur = (CsvCursor*)cur; |
| 701 *pRowid = pCur->iRowid; |
| 702 return SQLITE_OK; |
| 703 } |
| 704 |
| 705 /* |
| 706 ** Return TRUE if the cursor has been moved off of the last |
| 707 ** row of output. |
| 708 */ |
| 709 static int csvtabEof(sqlite3_vtab_cursor *cur){ |
| 710 CsvCursor *pCur = (CsvCursor*)cur; |
| 711 return pCur->iRowid<0; |
| 712 } |
| 713 |
| 714 /* |
| 715 ** Only a full table scan is supported. So xFilter simply rewinds to |
| 716 ** the beginning. |
| 717 */ |
| 718 static int csvtabFilter( |
| 719 sqlite3_vtab_cursor *pVtabCursor, |
| 720 int idxNum, const char *idxStr, |
| 721 int argc, sqlite3_value **argv |
| 722 ){ |
| 723 CsvCursor *pCur = (CsvCursor*)pVtabCursor; |
| 724 CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab; |
| 725 pCur->iRowid = 0; |
| 726 if( pCur->rdr.in==0 ){ |
| 727 assert( pCur->rdr.zIn==pTab->zData ); |
| 728 assert( pTab->iStart>=0 ); |
| 729 assert( (size_t)pTab->iStart<=pCur->rdr.nIn ); |
| 730 pCur->rdr.iIn = pTab->iStart; |
| 731 }else{ |
| 732 fseek(pCur->rdr.in, pTab->iStart, SEEK_SET); |
| 733 pCur->rdr.iIn = 0; |
| 734 pCur->rdr.nIn = 0; |
| 735 } |
| 736 return csvtabNext(pVtabCursor); |
| 737 } |
| 738 |
| 739 /* |
| 740 ** Only a forward full table scan is supported. xBestIndex is mostly |
| 741 ** a no-op. If CSVTEST_FIDX is set, then the presence of equality |
| 742 ** constraints lowers the estimated cost, which is fiction, but is useful |
| 743 ** for testing certain kinds of virtual table behavior. |
| 744 */ |
| 745 static int csvtabBestIndex( |
| 746 sqlite3_vtab *tab, |
| 747 sqlite3_index_info *pIdxInfo |
| 748 ){ |
| 749 pIdxInfo->estimatedCost = 1000000; |
| 750 #ifdef SQLITE_TEST |
| 751 if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){ |
| 752 /* The usual (and sensible) case is to always do a full table scan. |
| 753 ** The code in this branch only runs when testflags=1. This code |
| 754 ** generates an artifical and unrealistic plan which is useful |
| 755 ** for testing virtual table logic but is not helpful to real applications. |
| 756 ** |
| 757 ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual |
| 758 ** table (even though it is not) and the cost of running the virtual table |
| 759 ** is reduced from 1 million to just 10. The constraints are *not* marked |
| 760 ** as omittable, however, so the query planner should still generate a |
| 761 ** plan that gives a correct answer, even if they plan is not optimal. |
| 762 */ |
| 763 int i; |
| 764 int nConst = 0; |
| 765 for(i=0; i<pIdxInfo->nConstraint; i++){ |
| 766 unsigned char op; |
| 767 if( pIdxInfo->aConstraint[i].usable==0 ) continue; |
| 768 op = pIdxInfo->aConstraint[i].op; |
| 769 if( op==SQLITE_INDEX_CONSTRAINT_EQ |
| 770 || op==SQLITE_INDEX_CONSTRAINT_LIKE |
| 771 || op==SQLITE_INDEX_CONSTRAINT_GLOB |
| 772 ){ |
| 773 pIdxInfo->estimatedCost = 10; |
| 774 pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1; |
| 775 nConst++; |
| 776 } |
| 777 } |
| 778 } |
| 779 #endif |
| 780 return SQLITE_OK; |
| 781 } |
| 782 |
| 783 |
| 784 static sqlite3_module CsvModule = { |
| 785 0, /* iVersion */ |
| 786 csvtabCreate, /* xCreate */ |
| 787 csvtabConnect, /* xConnect */ |
| 788 csvtabBestIndex, /* xBestIndex */ |
| 789 csvtabDisconnect, /* xDisconnect */ |
| 790 csvtabDisconnect, /* xDestroy */ |
| 791 csvtabOpen, /* xOpen - open a cursor */ |
| 792 csvtabClose, /* xClose - close a cursor */ |
| 793 csvtabFilter, /* xFilter - configure scan constraints */ |
| 794 csvtabNext, /* xNext - advance a cursor */ |
| 795 csvtabEof, /* xEof - check for end of scan */ |
| 796 csvtabColumn, /* xColumn - read data */ |
| 797 csvtabRowid, /* xRowid - read data */ |
| 798 0, /* xUpdate */ |
| 799 0, /* xBegin */ |
| 800 0, /* xSync */ |
| 801 0, /* xCommit */ |
| 802 0, /* xRollback */ |
| 803 0, /* xFindMethod */ |
| 804 0, /* xRename */ |
| 805 }; |
| 806 |
| 807 #ifdef SQLITE_TEST |
| 808 /* |
| 809 ** For virtual table testing, make a version of the CSV virtual table |
| 810 ** available that has an xUpdate function. But the xUpdate always returns |
| 811 ** SQLITE_READONLY since the CSV file is not really writable. |
| 812 */ |
| 813 static int csvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x){ |
| 814 return SQLITE_READONLY; |
| 815 } |
| 816 static sqlite3_module CsvModuleFauxWrite = { |
| 817 0, /* iVersion */ |
| 818 csvtabCreate, /* xCreate */ |
| 819 csvtabConnect, /* xConnect */ |
| 820 csvtabBestIndex, /* xBestIndex */ |
| 821 csvtabDisconnect, /* xDisconnect */ |
| 822 csvtabDisconnect, /* xDestroy */ |
| 823 csvtabOpen, /* xOpen - open a cursor */ |
| 824 csvtabClose, /* xClose - close a cursor */ |
| 825 csvtabFilter, /* xFilter - configure scan constraints */ |
| 826 csvtabNext, /* xNext - advance a cursor */ |
| 827 csvtabEof, /* xEof - check for end of scan */ |
| 828 csvtabColumn, /* xColumn - read data */ |
| 829 csvtabRowid, /* xRowid - read data */ |
| 830 csvtabUpdate, /* xUpdate */ |
| 831 0, /* xBegin */ |
| 832 0, /* xSync */ |
| 833 0, /* xCommit */ |
| 834 0, /* xRollback */ |
| 835 0, /* xFindMethod */ |
| 836 0, /* xRename */ |
| 837 }; |
| 838 #endif /* SQLITE_TEST */ |
| 839 |
| 840 #endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */ |
| 841 |
| 842 |
| 843 #ifdef _WIN32 |
| 844 __declspec(dllexport) |
| 845 #endif |
| 846 /* |
| 847 ** This routine is called when the extension is loaded. The new |
| 848 ** CSV virtual table module is registered with the calling database |
| 849 ** connection. |
| 850 */ |
| 851 int sqlite3_csv_init( |
| 852 sqlite3 *db, |
| 853 char **pzErrMsg, |
| 854 const sqlite3_api_routines *pApi |
| 855 ){ |
| 856 #ifndef SQLITE_OMIT_VIRTUALTABLE |
| 857 int rc; |
| 858 SQLITE_EXTENSION_INIT2(pApi); |
| 859 rc = sqlite3_create_module(db, "csv", &CsvModule, 0); |
| 860 #ifdef SQLITE_TEST |
| 861 if( rc==SQLITE_OK ){ |
| 862 rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0); |
| 863 } |
| 864 #endif |
| 865 return rc; |
| 866 #else |
| 867 return SQLITE_OK; |
| 868 #endif |
| 869 } |
OLD | NEW |