OLD | NEW |
| (Empty) |
1 /* | |
2 ** 2001 September 15 | |
3 ** | |
4 ** The author disclaims copyright to this source code. In place of | |
5 ** a legal notice, here is a blessing: | |
6 ** | |
7 ** May you do good and not evil. | |
8 ** May you find forgiveness for yourself and forgive others. | |
9 ** May you share freely, never taking more than you give. | |
10 ** | |
11 ************************************************************************* | |
12 ** An tokenizer for SQL | |
13 ** | |
14 ** This file contains C code that splits an SQL input string up into | |
15 ** individual tokens and sends those tokens one-by-one over to the | |
16 ** parser for analysis. | |
17 */ | |
18 #include "sqliteInt.h" | |
19 #include <stdlib.h> | |
20 | |
21 /* | |
22 ** The charMap() macro maps alphabetic characters into their | |
23 ** lower-case ASCII equivalent. On ASCII machines, this is just | |
24 ** an upper-to-lower case map. On EBCDIC machines we also need | |
25 ** to adjust the encoding. Only alphabetic characters and underscores | |
26 ** need to be translated. | |
27 */ | |
28 #ifdef SQLITE_ASCII | |
29 # define charMap(X) sqlite3UpperToLower[(unsigned char)X] | |
30 #endif | |
31 #ifdef SQLITE_EBCDIC | |
32 # define charMap(X) ebcdicToAscii[(unsigned char)X] | |
33 const unsigned char ebcdicToAscii[] = { | |
34 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ | |
35 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ | |
36 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ | |
37 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ | |
38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3x */ | |
39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */ | |
40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5x */ | |
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 0, /* 6x */ | |
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7x */ | |
43 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* 8x */ | |
44 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* 9x */ | |
45 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ax */ | |
46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ | |
47 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* Cx */ | |
48 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* Dx */ | |
49 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ex */ | |
50 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Fx */ | |
51 }; | |
52 #endif | |
53 | |
54 /* | |
55 ** The sqlite3KeywordCode function looks up an identifier to determine if | |
56 ** it is a keyword. If it is a keyword, the token code of that keyword is | |
57 ** returned. If the input is not a keyword, TK_ID is returned. | |
58 ** | |
59 ** The implementation of this routine was generated by a program, | |
60 ** mkkeywordhash.h, located in the tool subdirectory of the distribution. | |
61 ** The output of the mkkeywordhash.c program is written into a file | |
62 ** named keywordhash.h and then included into this source file by | |
63 ** the #include below. | |
64 */ | |
65 #include "keywordhash.h" | |
66 | |
67 | |
68 /* | |
69 ** If X is a character that can be used in an identifier then | |
70 ** IdChar(X) will be true. Otherwise it is false. | |
71 ** | |
72 ** For ASCII, any character with the high-order bit set is | |
73 ** allowed in an identifier. For 7-bit characters, | |
74 ** sqlite3IsIdChar[X] must be 1. | |
75 ** | |
76 ** For EBCDIC, the rules are more complex but have the same | |
77 ** end result. | |
78 ** | |
79 ** Ticket #1066. the SQL standard does not allow '$' in the | |
80 ** middle of identifiers. But many SQL implementations do. | |
81 ** SQLite will allow '$' in identifiers for compatibility. | |
82 ** But the feature is undocumented. | |
83 */ | |
84 #ifdef SQLITE_ASCII | |
85 #define IdChar(C) ((sqlite3CtypeMap[(unsigned char)C]&0x46)!=0) | |
86 #endif | |
87 #ifdef SQLITE_EBCDIC | |
88 const char sqlite3IsEbcdicIdChar[] = { | |
89 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ | |
90 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 4x */ | |
91 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, /* 5x */ | |
92 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, /* 6x */ | |
93 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, /* 7x */ | |
94 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, /* 8x */ | |
95 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, /* 9x */ | |
96 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, /* Ax */ | |
97 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ | |
98 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Cx */ | |
99 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Dx */ | |
100 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Ex */ | |
101 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, /* Fx */ | |
102 }; | |
103 #define IdChar(C) (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40])) | |
104 #endif | |
105 int sqlite3IsIdChar(u8 c){ return IdChar(c); } | |
106 | |
107 | |
108 /* | |
109 ** Return the length of the token that begins at z[0]. | |
110 ** Store the token type in *tokenType before returning. | |
111 */ | |
112 int sqlite3GetToken(const unsigned char *z, int *tokenType){ | |
113 int i, c; | |
114 switch( *z ){ | |
115 case ' ': case '\t': case '\n': case '\f': case '\r': { | |
116 testcase( z[0]==' ' ); | |
117 testcase( z[0]=='\t' ); | |
118 testcase( z[0]=='\n' ); | |
119 testcase( z[0]=='\f' ); | |
120 testcase( z[0]=='\r' ); | |
121 for(i=1; sqlite3Isspace(z[i]); i++){} | |
122 *tokenType = TK_SPACE; | |
123 return i; | |
124 } | |
125 case '-': { | |
126 if( z[1]=='-' ){ | |
127 for(i=2; (c=z[i])!=0 && c!='\n'; i++){} | |
128 *tokenType = TK_SPACE; /* IMP: R-22934-25134 */ | |
129 return i; | |
130 } | |
131 *tokenType = TK_MINUS; | |
132 return 1; | |
133 } | |
134 case '(': { | |
135 *tokenType = TK_LP; | |
136 return 1; | |
137 } | |
138 case ')': { | |
139 *tokenType = TK_RP; | |
140 return 1; | |
141 } | |
142 case ';': { | |
143 *tokenType = TK_SEMI; | |
144 return 1; | |
145 } | |
146 case '+': { | |
147 *tokenType = TK_PLUS; | |
148 return 1; | |
149 } | |
150 case '*': { | |
151 *tokenType = TK_STAR; | |
152 return 1; | |
153 } | |
154 case '/': { | |
155 if( z[1]!='*' || z[2]==0 ){ | |
156 *tokenType = TK_SLASH; | |
157 return 1; | |
158 } | |
159 for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){} | |
160 if( c ) i++; | |
161 *tokenType = TK_SPACE; /* IMP: R-22934-25134 */ | |
162 return i; | |
163 } | |
164 case '%': { | |
165 *tokenType = TK_REM; | |
166 return 1; | |
167 } | |
168 case '=': { | |
169 *tokenType = TK_EQ; | |
170 return 1 + (z[1]=='='); | |
171 } | |
172 case '<': { | |
173 if( (c=z[1])=='=' ){ | |
174 *tokenType = TK_LE; | |
175 return 2; | |
176 }else if( c=='>' ){ | |
177 *tokenType = TK_NE; | |
178 return 2; | |
179 }else if( c=='<' ){ | |
180 *tokenType = TK_LSHIFT; | |
181 return 2; | |
182 }else{ | |
183 *tokenType = TK_LT; | |
184 return 1; | |
185 } | |
186 } | |
187 case '>': { | |
188 if( (c=z[1])=='=' ){ | |
189 *tokenType = TK_GE; | |
190 return 2; | |
191 }else if( c=='>' ){ | |
192 *tokenType = TK_RSHIFT; | |
193 return 2; | |
194 }else{ | |
195 *tokenType = TK_GT; | |
196 return 1; | |
197 } | |
198 } | |
199 case '!': { | |
200 if( z[1]!='=' ){ | |
201 *tokenType = TK_ILLEGAL; | |
202 return 2; | |
203 }else{ | |
204 *tokenType = TK_NE; | |
205 return 2; | |
206 } | |
207 } | |
208 case '|': { | |
209 if( z[1]!='|' ){ | |
210 *tokenType = TK_BITOR; | |
211 return 1; | |
212 }else{ | |
213 *tokenType = TK_CONCAT; | |
214 return 2; | |
215 } | |
216 } | |
217 case ',': { | |
218 *tokenType = TK_COMMA; | |
219 return 1; | |
220 } | |
221 case '&': { | |
222 *tokenType = TK_BITAND; | |
223 return 1; | |
224 } | |
225 case '~': { | |
226 *tokenType = TK_BITNOT; | |
227 return 1; | |
228 } | |
229 case '`': | |
230 case '\'': | |
231 case '"': { | |
232 int delim = z[0]; | |
233 testcase( delim=='`' ); | |
234 testcase( delim=='\'' ); | |
235 testcase( delim=='"' ); | |
236 for(i=1; (c=z[i])!=0; i++){ | |
237 if( c==delim ){ | |
238 if( z[i+1]==delim ){ | |
239 i++; | |
240 }else{ | |
241 break; | |
242 } | |
243 } | |
244 } | |
245 if( c=='\'' ){ | |
246 *tokenType = TK_STRING; | |
247 return i+1; | |
248 }else if( c!=0 ){ | |
249 *tokenType = TK_ID; | |
250 return i+1; | |
251 }else{ | |
252 *tokenType = TK_ILLEGAL; | |
253 return i; | |
254 } | |
255 } | |
256 case '.': { | |
257 #ifndef SQLITE_OMIT_FLOATING_POINT | |
258 if( !sqlite3Isdigit(z[1]) ) | |
259 #endif | |
260 { | |
261 *tokenType = TK_DOT; | |
262 return 1; | |
263 } | |
264 /* If the next character is a digit, this is a floating point | |
265 ** number that begins with ".". Fall thru into the next case */ | |
266 } | |
267 case '0': case '1': case '2': case '3': case '4': | |
268 case '5': case '6': case '7': case '8': case '9': { | |
269 testcase( z[0]=='0' ); testcase( z[0]=='1' ); testcase( z[0]=='2' ); | |
270 testcase( z[0]=='3' ); testcase( z[0]=='4' ); testcase( z[0]=='5' ); | |
271 testcase( z[0]=='6' ); testcase( z[0]=='7' ); testcase( z[0]=='8' ); | |
272 testcase( z[0]=='9' ); | |
273 *tokenType = TK_INTEGER; | |
274 #ifndef SQLITE_OMIT_HEX_INTEGER | |
275 if( z[0]=='0' && (z[1]=='x' || z[1]=='X') && sqlite3Isxdigit(z[2]) ){ | |
276 for(i=3; sqlite3Isxdigit(z[i]); i++){} | |
277 return i; | |
278 } | |
279 #endif | |
280 for(i=0; sqlite3Isdigit(z[i]); i++){} | |
281 #ifndef SQLITE_OMIT_FLOATING_POINT | |
282 if( z[i]=='.' ){ | |
283 i++; | |
284 while( sqlite3Isdigit(z[i]) ){ i++; } | |
285 *tokenType = TK_FLOAT; | |
286 } | |
287 if( (z[i]=='e' || z[i]=='E') && | |
288 ( sqlite3Isdigit(z[i+1]) | |
289 || ((z[i+1]=='+' || z[i+1]=='-') && sqlite3Isdigit(z[i+2])) | |
290 ) | |
291 ){ | |
292 i += 2; | |
293 while( sqlite3Isdigit(z[i]) ){ i++; } | |
294 *tokenType = TK_FLOAT; | |
295 } | |
296 #endif | |
297 while( IdChar(z[i]) ){ | |
298 *tokenType = TK_ILLEGAL; | |
299 i++; | |
300 } | |
301 return i; | |
302 } | |
303 case '[': { | |
304 for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} | |
305 *tokenType = c==']' ? TK_ID : TK_ILLEGAL; | |
306 return i; | |
307 } | |
308 case '?': { | |
309 *tokenType = TK_VARIABLE; | |
310 for(i=1; sqlite3Isdigit(z[i]); i++){} | |
311 return i; | |
312 } | |
313 #ifndef SQLITE_OMIT_TCL_VARIABLE | |
314 case '$': | |
315 #endif | |
316 case '@': /* For compatibility with MS SQL Server */ | |
317 case '#': | |
318 case ':': { | |
319 int n = 0; | |
320 testcase( z[0]=='$' ); testcase( z[0]=='@' ); | |
321 testcase( z[0]==':' ); testcase( z[0]=='#' ); | |
322 *tokenType = TK_VARIABLE; | |
323 for(i=1; (c=z[i])!=0; i++){ | |
324 if( IdChar(c) ){ | |
325 n++; | |
326 #ifndef SQLITE_OMIT_TCL_VARIABLE | |
327 }else if( c=='(' && n>0 ){ | |
328 do{ | |
329 i++; | |
330 }while( (c=z[i])!=0 && !sqlite3Isspace(c) && c!=')' ); | |
331 if( c==')' ){ | |
332 i++; | |
333 }else{ | |
334 *tokenType = TK_ILLEGAL; | |
335 } | |
336 break; | |
337 }else if( c==':' && z[i+1]==':' ){ | |
338 i++; | |
339 #endif | |
340 }else{ | |
341 break; | |
342 } | |
343 } | |
344 if( n==0 ) *tokenType = TK_ILLEGAL; | |
345 return i; | |
346 } | |
347 #ifndef SQLITE_OMIT_BLOB_LITERAL | |
348 case 'x': case 'X': { | |
349 testcase( z[0]=='x' ); testcase( z[0]=='X' ); | |
350 if( z[1]=='\'' ){ | |
351 *tokenType = TK_BLOB; | |
352 for(i=2; sqlite3Isxdigit(z[i]); i++){} | |
353 if( z[i]!='\'' || i%2 ){ | |
354 *tokenType = TK_ILLEGAL; | |
355 while( z[i] && z[i]!='\'' ){ i++; } | |
356 } | |
357 if( z[i] ) i++; | |
358 return i; | |
359 } | |
360 /* Otherwise fall through to the next case */ | |
361 } | |
362 #endif | |
363 default: { | |
364 if( !IdChar(*z) ){ | |
365 break; | |
366 } | |
367 for(i=1; IdChar(z[i]); i++){} | |
368 *tokenType = keywordCode((char*)z, i); | |
369 return i; | |
370 } | |
371 } | |
372 *tokenType = TK_ILLEGAL; | |
373 return 1; | |
374 } | |
375 | |
376 /* | |
377 ** Run the parser on the given SQL string. The parser structure is | |
378 ** passed in. An SQLITE_ status code is returned. If an error occurs | |
379 ** then an and attempt is made to write an error message into | |
380 ** memory obtained from sqlite3_malloc() and to make *pzErrMsg point to that | |
381 ** error message. | |
382 */ | |
383 int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){ | |
384 int nErr = 0; /* Number of errors encountered */ | |
385 int i; /* Loop counter */ | |
386 void *pEngine; /* The LEMON-generated LALR(1) parser */ | |
387 int tokenType; /* type of the next token */ | |
388 int lastTokenParsed = -1; /* type of the previous token */ | |
389 u8 enableLookaside; /* Saved value of db->lookaside.bEnabled */ | |
390 sqlite3 *db = pParse->db; /* The database connection */ | |
391 int mxSqlLen; /* Max length of an SQL string */ | |
392 | |
393 | |
394 mxSqlLen = db->aLimit[SQLITE_LIMIT_SQL_LENGTH]; | |
395 if( db->nVdbeActive==0 ){ | |
396 db->u1.isInterrupted = 0; | |
397 } | |
398 pParse->rc = SQLITE_OK; | |
399 pParse->zTail = zSql; | |
400 i = 0; | |
401 assert( pzErrMsg!=0 ); | |
402 pEngine = sqlite3ParserAlloc(sqlite3Malloc); | |
403 if( pEngine==0 ){ | |
404 db->mallocFailed = 1; | |
405 return SQLITE_NOMEM; | |
406 } | |
407 assert( pParse->pNewTable==0 ); | |
408 assert( pParse->pNewTrigger==0 ); | |
409 assert( pParse->nVar==0 ); | |
410 assert( pParse->nzVar==0 ); | |
411 assert( pParse->azVar==0 ); | |
412 enableLookaside = db->lookaside.bEnabled; | |
413 if( db->lookaside.pStart ) db->lookaside.bEnabled = 1; | |
414 while( !db->mallocFailed && zSql[i]!=0 ){ | |
415 assert( i>=0 ); | |
416 pParse->sLastToken.z = &zSql[i]; | |
417 pParse->sLastToken.n = sqlite3GetToken((unsigned char*)&zSql[i],&tokenType); | |
418 i += pParse->sLastToken.n; | |
419 if( i>mxSqlLen ){ | |
420 pParse->rc = SQLITE_TOOBIG; | |
421 break; | |
422 } | |
423 switch( tokenType ){ | |
424 case TK_SPACE: { | |
425 if( db->u1.isInterrupted ){ | |
426 sqlite3ErrorMsg(pParse, "interrupt"); | |
427 pParse->rc = SQLITE_INTERRUPT; | |
428 goto abort_parse; | |
429 } | |
430 break; | |
431 } | |
432 case TK_ILLEGAL: { | |
433 sqlite3DbFree(db, *pzErrMsg); | |
434 *pzErrMsg = sqlite3MPrintf(db, "unrecognized token: \"%T\"", | |
435 &pParse->sLastToken); | |
436 nErr++; | |
437 goto abort_parse; | |
438 } | |
439 case TK_SEMI: { | |
440 pParse->zTail = &zSql[i]; | |
441 /* Fall thru into the default case */ | |
442 } | |
443 default: { | |
444 sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse); | |
445 lastTokenParsed = tokenType; | |
446 if( pParse->rc!=SQLITE_OK ){ | |
447 goto abort_parse; | |
448 } | |
449 break; | |
450 } | |
451 } | |
452 } | |
453 abort_parse: | |
454 if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){ | |
455 if( lastTokenParsed!=TK_SEMI ){ | |
456 sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse); | |
457 pParse->zTail = &zSql[i]; | |
458 } | |
459 sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse); | |
460 } | |
461 #ifdef YYTRACKMAXSTACKDEPTH | |
462 sqlite3StatusSet(SQLITE_STATUS_PARSER_STACK, | |
463 sqlite3ParserStackPeak(pEngine) | |
464 ); | |
465 #endif /* YYDEBUG */ | |
466 sqlite3ParserFree(pEngine, sqlite3_free); | |
467 db->lookaside.bEnabled = enableLookaside; | |
468 if( db->mallocFailed ){ | |
469 pParse->rc = SQLITE_NOMEM; | |
470 } | |
471 if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){ | |
472 sqlite3SetString(&pParse->zErrMsg, db, "%s", sqlite3ErrStr(pParse->rc)); | |
473 } | |
474 assert( pzErrMsg!=0 ); | |
475 if( pParse->zErrMsg ){ | |
476 *pzErrMsg = pParse->zErrMsg; | |
477 sqlite3_log(pParse->rc, "%s", *pzErrMsg); | |
478 pParse->zErrMsg = 0; | |
479 nErr++; | |
480 } | |
481 if( pParse->pVdbe && pParse->nErr>0 && pParse->nested==0 ){ | |
482 sqlite3VdbeDelete(pParse->pVdbe); | |
483 pParse->pVdbe = 0; | |
484 } | |
485 #ifndef SQLITE_OMIT_SHARED_CACHE | |
486 if( pParse->nested==0 ){ | |
487 sqlite3DbFree(db, pParse->aTableLock); | |
488 pParse->aTableLock = 0; | |
489 pParse->nTableLock = 0; | |
490 } | |
491 #endif | |
492 #ifndef SQLITE_OMIT_VIRTUALTABLE | |
493 sqlite3_free(pParse->apVtabLock); | |
494 #endif | |
495 | |
496 if( !IN_DECLARE_VTAB ){ | |
497 /* If the pParse->declareVtab flag is set, do not delete any table | |
498 ** structure built up in pParse->pNewTable. The calling code (see vtab.c) | |
499 ** will take responsibility for freeing the Table structure. | |
500 */ | |
501 sqlite3DeleteTable(db, pParse->pNewTable); | |
502 } | |
503 | |
504 if( pParse->bFreeWith ) sqlite3WithDelete(db, pParse->pWith); | |
505 sqlite3DeleteTrigger(db, pParse->pNewTrigger); | |
506 for(i=pParse->nzVar-1; i>=0; i--) sqlite3DbFree(db, pParse->azVar[i]); | |
507 sqlite3DbFree(db, pParse->azVar); | |
508 while( pParse->pAinc ){ | |
509 AutoincInfo *p = pParse->pAinc; | |
510 pParse->pAinc = p->pNext; | |
511 sqlite3DbFree(db, p); | |
512 } | |
513 while( pParse->pZombieTab ){ | |
514 Table *p = pParse->pZombieTab; | |
515 pParse->pZombieTab = p->pNextZombie; | |
516 sqlite3DeleteTable(db, p); | |
517 } | |
518 if( nErr>0 && pParse->rc==SQLITE_OK ){ | |
519 pParse->rc = SQLITE_ERROR; | |
520 } | |
521 return nErr; | |
522 } | |
OLD | NEW |