| OLD | NEW | 
| (Empty) |  | 
 |    1 /* | 
 |    2 ** 2006 July 10 | 
 |    3 ** | 
 |    4 ** The author disclaims copyright to this source code. | 
 |    5 ** | 
 |    6 ************************************************************************* | 
 |    7 ** Defines the interface to tokenizers used by fulltext-search.  There | 
 |    8 ** are three basic components: | 
 |    9 ** | 
 |   10 ** sqlite3_tokenizer_module is a singleton defining the tokenizer | 
 |   11 ** interface functions.  This is essentially the class structure for | 
 |   12 ** tokenizers. | 
 |   13 ** | 
 |   14 ** sqlite3_tokenizer is used to define a particular tokenizer, perhaps | 
 |   15 ** including customization information defined at creation time. | 
 |   16 ** | 
 |   17 ** sqlite3_tokenizer_cursor is generated by a tokenizer to generate | 
 |   18 ** tokens from a particular input. | 
 |   19 */ | 
 |   20 #ifndef _FTS2_TOKENIZER_H_ | 
 |   21 #define _FTS2_TOKENIZER_H_ | 
 |   22  | 
 |   23 /* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time. | 
 |   24 ** If tokenizers are to be allowed to call sqlite3_*() functions, then | 
 |   25 ** we will need a way to register the API consistently. | 
 |   26 */ | 
 |   27 #include "sqlite3.h" | 
 |   28  | 
 |   29 /* | 
 |   30 ** Structures used by the tokenizer interface. When a new tokenizer | 
 |   31 ** implementation is registered, the caller provides a pointer to | 
 |   32 ** an sqlite3_tokenizer_module containing pointers to the callback | 
 |   33 ** functions that make up an implementation. | 
 |   34 ** | 
 |   35 ** When an fts2 table is created, it passes any arguments passed to | 
 |   36 ** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the | 
 |   37 ** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer | 
 |   38 ** implementation. The xCreate() function in turn returns an  | 
 |   39 ** sqlite3_tokenizer structure representing the specific tokenizer to | 
 |   40 ** be used for the fts2 table (customized by the tokenizer clause arguments). | 
 |   41 ** | 
 |   42 ** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen() | 
 |   43 ** method is called. It returns an sqlite3_tokenizer_cursor object | 
 |   44 ** that may be used to tokenize a specific input buffer based on | 
 |   45 ** the tokenization rules supplied by a specific sqlite3_tokenizer | 
 |   46 ** object. | 
 |   47 */ | 
 |   48 typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module; | 
 |   49 typedef struct sqlite3_tokenizer sqlite3_tokenizer; | 
 |   50 typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor; | 
 |   51  | 
 |   52 struct sqlite3_tokenizer_module { | 
 |   53  | 
 |   54   /* | 
 |   55   ** Structure version. Should always be set to 0. | 
 |   56   */ | 
 |   57   int iVersion; | 
 |   58  | 
 |   59   /* | 
 |   60   ** Create a new tokenizer. The values in the argv[] array are the | 
 |   61   ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL | 
 |   62   ** TABLE statement that created the fts2 table. For example, if | 
 |   63   ** the following SQL is executed: | 
 |   64   ** | 
 |   65   **   CREATE .. USING fts2( ... , tokenizer <tokenizer-name> arg1 arg2) | 
 |   66   ** | 
 |   67   ** then argc is set to 2, and the argv[] array contains pointers | 
 |   68   ** to the strings "arg1" and "arg2". | 
 |   69   ** | 
 |   70   ** This method should return either SQLITE_OK (0), or an SQLite error  | 
 |   71   ** code. If SQLITE_OK is returned, then *ppTokenizer should be set | 
 |   72   ** to point at the newly created tokenizer structure. The generic | 
 |   73   ** sqlite3_tokenizer.pModule variable should not be initialised by | 
 |   74   ** this callback. The caller will do so. | 
 |   75   */ | 
 |   76   int (*xCreate)( | 
 |   77     int argc,                           /* Size of argv array */ | 
 |   78     const char *const*argv,             /* Tokenizer argument strings */ | 
 |   79     sqlite3_tokenizer **ppTokenizer     /* OUT: Created tokenizer */ | 
 |   80   ); | 
 |   81  | 
 |   82   /* | 
 |   83   ** Destroy an existing tokenizer. The fts2 module calls this method | 
 |   84   ** exactly once for each successful call to xCreate(). | 
 |   85   */ | 
 |   86   int (*xDestroy)(sqlite3_tokenizer *pTokenizer); | 
 |   87  | 
 |   88   /* | 
 |   89   ** Create a tokenizer cursor to tokenize an input buffer. The caller | 
 |   90   ** is responsible for ensuring that the input buffer remains valid | 
 |   91   ** until the cursor is closed (using the xClose() method).  | 
 |   92   */ | 
 |   93   int (*xOpen)( | 
 |   94     sqlite3_tokenizer *pTokenizer,       /* Tokenizer object */ | 
 |   95     const char *pInput, int nBytes,      /* Input buffer */ | 
 |   96     sqlite3_tokenizer_cursor **ppCursor  /* OUT: Created tokenizer cursor */ | 
 |   97   ); | 
 |   98  | 
 |   99   /* | 
 |  100   ** Destroy an existing tokenizer cursor. The fts2 module calls this  | 
 |  101   ** method exactly once for each successful call to xOpen(). | 
 |  102   */ | 
 |  103   int (*xClose)(sqlite3_tokenizer_cursor *pCursor); | 
 |  104  | 
 |  105   /* | 
 |  106   ** Retrieve the next token from the tokenizer cursor pCursor. This | 
 |  107   ** method should either return SQLITE_OK and set the values of the | 
 |  108   ** "OUT" variables identified below, or SQLITE_DONE to indicate that | 
 |  109   ** the end of the buffer has been reached, or an SQLite error code. | 
 |  110   ** | 
 |  111   ** *ppToken should be set to point at a buffer containing the  | 
 |  112   ** normalized version of the token (i.e. after any case-folding and/or | 
 |  113   ** stemming has been performed). *pnBytes should be set to the length | 
 |  114   ** of this buffer in bytes. The input text that generated the token is | 
 |  115   ** identified by the byte offsets returned in *piStartOffset and | 
 |  116   ** *piEndOffset. | 
 |  117   ** | 
 |  118   ** The buffer *ppToken is set to point at is managed by the tokenizer | 
 |  119   ** implementation. It is only required to be valid until the next call | 
 |  120   ** to xNext() or xClose().  | 
 |  121   */ | 
 |  122   /* TODO(shess) current implementation requires pInput to be | 
 |  123   ** nul-terminated.  This should either be fixed, or pInput/nBytes | 
 |  124   ** should be converted to zInput. | 
 |  125   */ | 
 |  126   int (*xNext)( | 
 |  127     sqlite3_tokenizer_cursor *pCursor,   /* Tokenizer cursor */ | 
 |  128     const char **ppToken, int *pnBytes,  /* OUT: Normalized text for token */ | 
 |  129     int *piStartOffset,  /* OUT: Byte offset of token in input buffer */ | 
 |  130     int *piEndOffset,    /* OUT: Byte offset of end of token in input buffer */ | 
 |  131     int *piPosition      /* OUT: Number of tokens returned before this one */ | 
 |  132   ); | 
 |  133 }; | 
 |  134  | 
 |  135 struct sqlite3_tokenizer { | 
 |  136   const sqlite3_tokenizer_module *pModule;  /* The module for this tokenizer */ | 
 |  137   /* Tokenizer implementations will typically add additional fields */ | 
 |  138 }; | 
 |  139  | 
 |  140 struct sqlite3_tokenizer_cursor { | 
 |  141   sqlite3_tokenizer *pTokenizer;       /* Tokenizer for this cursor. */ | 
 |  142   /* Tokenizer implementations will typically add additional fields */ | 
 |  143 }; | 
 |  144  | 
 |  145 #endif /* _FTS2_TOKENIZER_H_ */ | 
| OLD | NEW |