| OLD | NEW | 
 | (Empty) | 
|    1 /* |  | 
|    2 ** 2007 June 22 |  | 
|    3 ** |  | 
|    4 ** The author disclaims copyright to this source code.  In place of |  | 
|    5 ** a legal notice, here is a blessing: |  | 
|    6 ** |  | 
|    7 **    May you do good and not evil. |  | 
|    8 **    May you find forgiveness for yourself and forgive others. |  | 
|    9 **    May you share freely, never taking more than you give. |  | 
|   10 ** |  | 
|   11 ****************************************************************************** |  | 
|   12 ** |  | 
|   13 ** This is part of an SQLite module implementing full-text search. |  | 
|   14 ** This particular file implements the generic tokenizer interface. |  | 
|   15 */ |  | 
|   16  |  | 
|   17 /* |  | 
|   18 ** The code in this file is only compiled if: |  | 
|   19 ** |  | 
|   20 **     * The FTS3 module is being built as an extension |  | 
|   21 **       (in which case SQLITE_CORE is not defined), or |  | 
|   22 ** |  | 
|   23 **     * The FTS3 module is being built into the core of |  | 
|   24 **       SQLite (in which case SQLITE_ENABLE_FTS3 is defined). |  | 
|   25 */ |  | 
|   26 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |  | 
|   27  |  | 
|   28 #include "sqlite3ext.h" |  | 
|   29 #ifndef SQLITE_CORE |  | 
|   30   SQLITE_EXTENSION_INIT1 |  | 
|   31 #endif |  | 
|   32  |  | 
|   33 #include "fts3_hash.h" |  | 
|   34 #include "fts3_tokenizer.h" |  | 
|   35 #include <assert.h> |  | 
|   36 #include <stddef.h> |  | 
|   37  |  | 
|   38 /* |  | 
|   39 ** Implementation of the SQL scalar function for accessing the underlying  |  | 
|   40 ** hash table. This function may be called as follows: |  | 
|   41 ** |  | 
|   42 **   SELECT <function-name>(<key-name>); |  | 
|   43 **   SELECT <function-name>(<key-name>, <pointer>); |  | 
|   44 ** |  | 
|   45 ** where <function-name> is the name passed as the second argument |  | 
|   46 ** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer'). |  | 
|   47 ** |  | 
|   48 ** If the <pointer> argument is specified, it must be a blob value |  | 
|   49 ** containing a pointer to be stored as the hash data corresponding |  | 
|   50 ** to the string <key-name>. If <pointer> is not specified, then |  | 
|   51 ** the string <key-name> must already exist in the has table. Otherwise, |  | 
|   52 ** an error is returned. |  | 
|   53 ** |  | 
|   54 ** Whether or not the <pointer> argument is specified, the value returned |  | 
|   55 ** is a blob containing the pointer stored as the hash data corresponding |  | 
|   56 ** to string <key-name> (after the hash-table is updated, if applicable). |  | 
|   57 */ |  | 
|   58 static void scalarFunc( |  | 
|   59   sqlite3_context *context, |  | 
|   60   int argc, |  | 
|   61   sqlite3_value **argv |  | 
|   62 ){ |  | 
|   63   fts3Hash *pHash; |  | 
|   64   void *pPtr = 0; |  | 
|   65   const unsigned char *zName; |  | 
|   66   int nName; |  | 
|   67  |  | 
|   68   assert( argc==1 || argc==2 ); |  | 
|   69  |  | 
|   70   pHash = (fts3Hash *)sqlite3_user_data(context); |  | 
|   71  |  | 
|   72   zName = sqlite3_value_text(argv[0]); |  | 
|   73   nName = sqlite3_value_bytes(argv[0])+1; |  | 
|   74  |  | 
|   75   if( argc==2 ){ |  | 
|   76     void *pOld; |  | 
|   77     int n = sqlite3_value_bytes(argv[1]); |  | 
|   78     if( n!=sizeof(pPtr) ){ |  | 
|   79       sqlite3_result_error(context, "argument type mismatch", -1); |  | 
|   80       return; |  | 
|   81     } |  | 
|   82     pPtr = *(void **)sqlite3_value_blob(argv[1]); |  | 
|   83     pOld = sqlite3Fts3HashInsert(pHash, (void *)zName, nName, pPtr); |  | 
|   84     if( pOld==pPtr ){ |  | 
|   85       sqlite3_result_error(context, "out of memory", -1); |  | 
|   86       return; |  | 
|   87     } |  | 
|   88   }else{ |  | 
|   89     pPtr = sqlite3Fts3HashFind(pHash, zName, nName); |  | 
|   90     if( !pPtr ){ |  | 
|   91       char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); |  | 
|   92       sqlite3_result_error(context, zErr, -1); |  | 
|   93       sqlite3_free(zErr); |  | 
|   94       return; |  | 
|   95     } |  | 
|   96   } |  | 
|   97  |  | 
|   98   sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT); |  | 
|   99 } |  | 
|  100  |  | 
|  101 #ifdef SQLITE_TEST |  | 
|  102  |  | 
|  103 #include <tcl.h> |  | 
|  104 #include <string.h> |  | 
|  105  |  | 
|  106 /* |  | 
|  107 ** Implementation of a special SQL scalar function for testing tokenizers  |  | 
|  108 ** designed to be used in concert with the Tcl testing framework. This |  | 
|  109 ** function must be called with two arguments: |  | 
|  110 ** |  | 
|  111 **   SELECT <function-name>(<key-name>, <input-string>); |  | 
|  112 **   SELECT <function-name>(<key-name>, <pointer>); |  | 
|  113 ** |  | 
|  114 ** where <function-name> is the name passed as the second argument |  | 
|  115 ** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer') |  | 
|  116 ** concatenated with the string '_test' (e.g. 'fts3_tokenizer_test'). |  | 
|  117 ** |  | 
|  118 ** The return value is a string that may be interpreted as a Tcl |  | 
|  119 ** list. For each token in the <input-string>, three elements are |  | 
|  120 ** added to the returned list. The first is the token position, the  |  | 
|  121 ** second is the token text (folded, stemmed, etc.) and the third is the |  | 
|  122 ** substring of <input-string> associated with the token. For example,  |  | 
|  123 ** using the built-in "simple" tokenizer: |  | 
|  124 ** |  | 
|  125 **   SELECT fts_tokenizer_test('simple', 'I don't see how'); |  | 
|  126 ** |  | 
|  127 ** will return the string: |  | 
|  128 ** |  | 
|  129 **   "{0 i I 1 dont don't 2 see see 3 how how}" |  | 
|  130 **    |  | 
|  131 */ |  | 
|  132 static void testFunc( |  | 
|  133   sqlite3_context *context, |  | 
|  134   int argc, |  | 
|  135   sqlite3_value **argv |  | 
|  136 ){ |  | 
|  137   fts3Hash *pHash; |  | 
|  138   sqlite3_tokenizer_module *p; |  | 
|  139   sqlite3_tokenizer *pTokenizer = 0; |  | 
|  140   sqlite3_tokenizer_cursor *pCsr = 0; |  | 
|  141  |  | 
|  142   const char *zErr = 0; |  | 
|  143  |  | 
|  144   const char *zName; |  | 
|  145   int nName; |  | 
|  146   const char *zInput; |  | 
|  147   int nInput; |  | 
|  148  |  | 
|  149   const char *zArg = 0; |  | 
|  150  |  | 
|  151   const char *zToken; |  | 
|  152   int nToken; |  | 
|  153   int iStart; |  | 
|  154   int iEnd; |  | 
|  155   int iPos; |  | 
|  156  |  | 
|  157   Tcl_Obj *pRet; |  | 
|  158  |  | 
|  159   assert( argc==2 || argc==3 ); |  | 
|  160  |  | 
|  161   nName = sqlite3_value_bytes(argv[0]); |  | 
|  162   zName = (const char *)sqlite3_value_text(argv[0]); |  | 
|  163   nInput = sqlite3_value_bytes(argv[argc-1]); |  | 
|  164   zInput = (const char *)sqlite3_value_text(argv[argc-1]); |  | 
|  165  |  | 
|  166   if( argc==3 ){ |  | 
|  167     zArg = (const char *)sqlite3_value_text(argv[1]); |  | 
|  168   } |  | 
|  169  |  | 
|  170   pHash = (fts3Hash *)sqlite3_user_data(context); |  | 
|  171   p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1); |  | 
|  172  |  | 
|  173   if( !p ){ |  | 
|  174     char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); |  | 
|  175     sqlite3_result_error(context, zErr, -1); |  | 
|  176     sqlite3_free(zErr); |  | 
|  177     return; |  | 
|  178   } |  | 
|  179  |  | 
|  180   pRet = Tcl_NewObj(); |  | 
|  181   Tcl_IncrRefCount(pRet); |  | 
|  182  |  | 
|  183   if( SQLITE_OK!=p->xCreate(zArg ? 1 : 0, &zArg, &pTokenizer) ){ |  | 
|  184     zErr = "error in xCreate()"; |  | 
|  185     goto finish; |  | 
|  186   } |  | 
|  187   pTokenizer->pModule = p; |  | 
|  188   if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){ |  | 
|  189     zErr = "error in xOpen()"; |  | 
|  190     goto finish; |  | 
|  191   } |  | 
|  192   pCsr->pTokenizer = pTokenizer; |  | 
|  193  |  | 
|  194   while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){ |  | 
|  195     Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos)); |  | 
|  196     Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); |  | 
|  197     zToken = &zInput[iStart]; |  | 
|  198     nToken = iEnd-iStart; |  | 
|  199     Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); |  | 
|  200   } |  | 
|  201  |  | 
|  202   if( SQLITE_OK!=p->xClose(pCsr) ){ |  | 
|  203     zErr = "error in xClose()"; |  | 
|  204     goto finish; |  | 
|  205   } |  | 
|  206   if( SQLITE_OK!=p->xDestroy(pTokenizer) ){ |  | 
|  207     zErr = "error in xDestroy()"; |  | 
|  208     goto finish; |  | 
|  209   } |  | 
|  210  |  | 
|  211 finish: |  | 
|  212   if( zErr ){ |  | 
|  213     sqlite3_result_error(context, zErr, -1); |  | 
|  214   }else{ |  | 
|  215     sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT); |  | 
|  216   } |  | 
|  217   Tcl_DecrRefCount(pRet); |  | 
|  218 } |  | 
|  219  |  | 
|  220 static |  | 
|  221 int registerTokenizer( |  | 
|  222   sqlite3 *db,  |  | 
|  223   char *zName,  |  | 
|  224   const sqlite3_tokenizer_module *p |  | 
|  225 ){ |  | 
|  226   int rc; |  | 
|  227   sqlite3_stmt *pStmt; |  | 
|  228   const char zSql[] = "SELECT fts3_tokenizer(?, ?)"; |  | 
|  229  |  | 
|  230   rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); |  | 
|  231   if( rc!=SQLITE_OK ){ |  | 
|  232     return rc; |  | 
|  233   } |  | 
|  234  |  | 
|  235   sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); |  | 
|  236   sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC); |  | 
|  237   sqlite3_step(pStmt); |  | 
|  238  |  | 
|  239   return sqlite3_finalize(pStmt); |  | 
|  240 } |  | 
|  241  |  | 
|  242 static |  | 
|  243 int queryTokenizer( |  | 
|  244   sqlite3 *db,  |  | 
|  245   char *zName,   |  | 
|  246   const sqlite3_tokenizer_module **pp |  | 
|  247 ){ |  | 
|  248   int rc; |  | 
|  249   sqlite3_stmt *pStmt; |  | 
|  250   const char zSql[] = "SELECT fts3_tokenizer(?)"; |  | 
|  251  |  | 
|  252   *pp = 0; |  | 
|  253   rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); |  | 
|  254   if( rc!=SQLITE_OK ){ |  | 
|  255     return rc; |  | 
|  256   } |  | 
|  257  |  | 
|  258   sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); |  | 
|  259   if( SQLITE_ROW==sqlite3_step(pStmt) ){ |  | 
|  260     if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ |  | 
|  261       memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); |  | 
|  262     } |  | 
|  263   } |  | 
|  264  |  | 
|  265   return sqlite3_finalize(pStmt); |  | 
|  266 } |  | 
|  267  |  | 
|  268 void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); |  | 
|  269  |  | 
|  270 /* |  | 
|  271 ** Implementation of the scalar function fts3_tokenizer_internal_test(). |  | 
|  272 ** This function is used for testing only, it is not included in the |  | 
|  273 ** build unless SQLITE_TEST is defined. |  | 
|  274 ** |  | 
|  275 ** The purpose of this is to test that the fts3_tokenizer() function |  | 
|  276 ** can be used as designed by the C-code in the queryTokenizer and |  | 
|  277 ** registerTokenizer() functions above. These two functions are repeated |  | 
|  278 ** in the README.tokenizer file as an example, so it is important to |  | 
|  279 ** test them. |  | 
|  280 ** |  | 
|  281 ** To run the tests, evaluate the fts3_tokenizer_internal_test() scalar |  | 
|  282 ** function with no arguments. An assert() will fail if a problem is |  | 
|  283 ** detected. i.e.: |  | 
|  284 ** |  | 
|  285 **     SELECT fts3_tokenizer_internal_test(); |  | 
|  286 ** |  | 
|  287 */ |  | 
|  288 static void intTestFunc( |  | 
|  289   sqlite3_context *context, |  | 
|  290   int argc, |  | 
|  291   sqlite3_value **argv |  | 
|  292 ){ |  | 
|  293   int rc; |  | 
|  294   const sqlite3_tokenizer_module *p1; |  | 
|  295   const sqlite3_tokenizer_module *p2; |  | 
|  296   sqlite3 *db = (sqlite3 *)sqlite3_user_data(context); |  | 
|  297  |  | 
|  298   /* Test the query function */ |  | 
|  299   sqlite3Fts3SimpleTokenizerModule(&p1); |  | 
|  300   rc = queryTokenizer(db, "simple", &p2); |  | 
|  301   assert( rc==SQLITE_OK ); |  | 
|  302   assert( p1==p2 ); |  | 
|  303   rc = queryTokenizer(db, "nosuchtokenizer", &p2); |  | 
|  304   assert( rc==SQLITE_ERROR ); |  | 
|  305   assert( p2==0 ); |  | 
|  306   assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") ); |  | 
|  307  |  | 
|  308   /* Test the storage function */ |  | 
|  309   rc = registerTokenizer(db, "nosuchtokenizer", p1); |  | 
|  310   assert( rc==SQLITE_OK ); |  | 
|  311   rc = queryTokenizer(db, "nosuchtokenizer", &p2); |  | 
|  312   assert( rc==SQLITE_OK ); |  | 
|  313   assert( p2==p1 ); |  | 
|  314  |  | 
|  315   sqlite3_result_text(context, "ok", -1, SQLITE_STATIC); |  | 
|  316 } |  | 
|  317  |  | 
|  318 #endif |  | 
|  319  |  | 
|  320 /* |  | 
|  321 ** Set up SQL objects in database db used to access the contents of |  | 
|  322 ** the hash table pointed to by argument pHash. The hash table must |  | 
|  323 ** been initialised to use string keys, and to take a private copy  |  | 
|  324 ** of the key when a value is inserted. i.e. by a call similar to: |  | 
|  325 ** |  | 
|  326 **    sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1); |  | 
|  327 ** |  | 
|  328 ** This function adds a scalar function (see header comment above |  | 
|  329 ** scalarFunc() in this file for details) and, if ENABLE_TABLE is |  | 
|  330 ** defined at compilation time, a temporary virtual table (see header  |  | 
|  331 ** comment above struct HashTableVtab) to the database schema. Both  |  | 
|  332 ** provide read/write access to the contents of *pHash. |  | 
|  333 ** |  | 
|  334 ** The third argument to this function, zName, is used as the name |  | 
|  335 ** of both the scalar and, if created, the virtual table. |  | 
|  336 */ |  | 
|  337 int sqlite3Fts3InitHashTable( |  | 
|  338   sqlite3 *db,  |  | 
|  339   fts3Hash *pHash,  |  | 
|  340   const char *zName |  | 
|  341 ){ |  | 
|  342   int rc = SQLITE_OK; |  | 
|  343   void *p = (void *)pHash; |  | 
|  344   const int any = SQLITE_ANY; |  | 
|  345   char *zTest = 0; |  | 
|  346   char *zTest2 = 0; |  | 
|  347  |  | 
|  348 #ifdef SQLITE_TEST |  | 
|  349   void *pdb = (void *)db; |  | 
|  350   zTest = sqlite3_mprintf("%s_test", zName); |  | 
|  351   zTest2 = sqlite3_mprintf("%s_internal_test", zName); |  | 
|  352   if( !zTest || !zTest2 ){ |  | 
|  353     rc = SQLITE_NOMEM; |  | 
|  354   } |  | 
|  355 #endif |  | 
|  356  |  | 
|  357   if( rc!=SQLITE_OK |  | 
|  358    || (rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0)) |  | 
|  359    || (rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0)) |  | 
|  360 #ifdef SQLITE_TEST |  | 
|  361    || (rc = sqlite3_create_function(db, zTest, 2, any, p, testFunc, 0, 0)) |  | 
|  362    || (rc = sqlite3_create_function(db, zTest, 3, any, p, testFunc, 0, 0)) |  | 
|  363    || (rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0)) |  | 
|  364 #endif |  | 
|  365   ); |  | 
|  366  |  | 
|  367   sqlite3_free(zTest); |  | 
|  368   sqlite3_free(zTest2); |  | 
|  369   return rc; |  | 
|  370 } |  | 
|  371  |  | 
|  372 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |  | 
| OLD | NEW |