third_party/sqlite/sqlite-src-3170000/ext/fts5/fts5.h - Issue 2747283002: [sql] Import reference version of SQLite 3.17..

Side by Side Diff: third_party/sqlite/sqlite-src-3170000/ext/fts5/fts5.h

Issue 2747283002: [sql] Import reference version of SQLite 3.17.. (Closed)

Patch Set: Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « third_party/sqlite/sqlite-src-3170000/ext/fts5/extract_api_docs.tcl ('k') | third_party/sqlite/sqlite-src-3170000/ext/fts5/fts5Int.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 /*

	2 ** 2014 May 31

	3 **

	4 ** The author disclaims copyright to this source code. In place of

	5 ** a legal notice, here is a blessing:

	6 **

	7 ** May you do good and not evil.

	8 ** May you find forgiveness for yourself and forgive others.

	9 ** May you share freely, never taking more than you give.

	10 **

	11 ******************************************************************************

	12 **

	13 ** Interfaces to extend FTS5. Using the interfaces defined in this file,

	14 ** FTS5 may be extended with:

	15 **

	16 ** * custom tokenizers, and

	17 ** * custom auxiliary functions.

	18 */

	19

	20

	21 #ifndef _FTS5_H

	22 #define _FTS5_H

	23

	24 #include "sqlite3.h"

	25

	26 #ifdef __cplusplus

	27 extern "C" {

	28 #endif

	29

	30 /*************************************************************************

	31 ** CUSTOM AUXILIARY FUNCTIONS

	32 **

	33 ** Virtual table implementations may overload SQL functions by implementing

	34 ** the sqlite3_module.xFindFunction() method.

	35 */

	36

	37 typedef struct Fts5ExtensionApi Fts5ExtensionApi;

	38 typedef struct Fts5Context Fts5Context;

	39 typedef struct Fts5PhraseIter Fts5PhraseIter;

	40

	41 typedef void (*fts5_extension_function)(

	42 const Fts5ExtensionApi pApi, / API offered by current FTS version */

	43 Fts5Context pFts, / First arg to pass to pApi functions */

	44 sqlite3_context pCtx, / Context for returning result/error */

	45 int nVal, /* Number of values in apVal[] array */

	46 sqlite3_value *apVal / Array of trailing arguments */

	47 );

	48

	49 struct Fts5PhraseIter {

	50 const unsigned char *a;

	51 const unsigned char *b;

	52 };

	53

	54 /*

	55 ** EXTENSION API FUNCTIONS

	56 **

	57 ** xUserData(pFts):

	58 ** Return a copy of the context pointer the extension function was

	59 ** registered with.

	60 **

	61 ** xColumnTotalSize(pFts, iCol, pnToken):

	62 ** If parameter iCol is less than zero, set output variable *pnToken

	63 ** to the total number of tokens in the FTS5 table. Or, if iCol is

	64 ** non-negative but less than the number of columns in the table, return

	65 ** the total number of tokens in column iCol, considering all rows in

	66 ** the FTS5 table.

	67 **

	68 ** If parameter iCol is greater than or equal to the number of columns

	69 ** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.

	70 ** an OOM condition or IO error), an appropriate SQLite error code is

	71 ** returned.

	72 **

	73 ** xColumnCount(pFts):

	74 ** Return the number of columns in the table.

	75 **

	76 ** xColumnSize(pFts, iCol, pnToken):

	77 ** If parameter iCol is less than zero, set output variable *pnToken

	78 ** to the total number of tokens in the current row. Or, if iCol is

	79 ** non-negative but less than the number of columns in the table, set

	80 ** *pnToken to the number of tokens in column iCol of the current row.

	81 **

	82 ** If parameter iCol is greater than or equal to the number of columns

	83 ** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.

	84 ** an OOM condition or IO error), an appropriate SQLite error code is

	85 ** returned.

	86 **

	87 ** This function may be quite inefficient if used with an FTS5 table

	88 ** created with the "columnsize=0" option.

	89 **

	90 ** xColumnText:

	91 ** This function attempts to retrieve the text of column iCol of the

	92 ** current document. If successful, (*pz) is set to point to a buffer

	93 ** containing the text in utf-8 encoding, (*pn) is set to the size in bytes

	94 ** (not characters) of the buffer and SQLITE_OK is returned. Otherwise,

	95 ** if an error occurs, an SQLite error code is returned and the final values

	96 ** of (pz) and (pn) are undefined.

	97 **

	98 ** xPhraseCount:

	99 ** Returns the number of phrases in the current query expression.

	100 **

	101 ** xPhraseSize:

	102 ** Returns the number of tokens in phrase iPhrase of the query. Phrases

	103 ** are numbered starting from zero.

	104 **

	105 ** xInstCount:

	106 ** Set *pnInst to the total number of occurrences of all phrases within

	107 ** the query within the current row. Return SQLITE_OK if successful, or

	108 ** an error code (i.e. SQLITE_NOMEM) if an error occurs.

	109 **

	110 ** This API can be quite slow if used with an FTS5 table created with the

	111 ** "detail=none" or "detail=column" option. If the FTS5 table is created

	112 ** with either "detail=none" or "detail=column" and "content=" option

	113 ** (i.e. if it is a contentless table), then this API always returns 0.

	114 **

	115 ** xInst:

	116 ** Query for the details of phrase match iIdx within the current row.

	117 ** Phrase matches are numbered starting from zero, so the iIdx argument

	118 ** should be greater than or equal to zero and smaller than the value

	119 ** output by xInstCount().

	120 **

	121 ** Usually, output parameter piPhrase is set to the phrase number, piCol

	122 ** to the column in which it occurs and *piOff the token offset of the

	123 ** first token of the phrase. The exception is if the table was created

	124 ** with the offsets=0 option specified. In this case *piOff is always

	125 ** set to -1.

	126 **

	127 ** Returns SQLITE_OK if successful, or an error code (i.e. SQLITE_NOMEM)

	128 ** if an error occurs.

	129 **

	130 ** This API can be quite slow if used with an FTS5 table created with the

	131 ** "detail=none" or "detail=column" option.

	132 **

	133 ** xRowid:

	134 ** Returns the rowid of the current row.

	135 **

	136 ** xTokenize:

	137 ** Tokenize text using the tokenizer belonging to the FTS5 table.

	138 **

	139 ** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback):

	140 ** This API function is used to query the FTS table for phrase iPhrase

	141 ** of the current query. Specifically, a query equivalent to:

	142 **

	143 ** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid

	144 **

	145 ** with $p set to a phrase equivalent to the phrase iPhrase of the

	146 ** current query is executed. Any column filter that applies to

	147 ** phrase iPhrase of the current query is included in $p. For each

	148 ** row visited, the callback function passed as the fourth argument

	149 ** is invoked. The context and API objects passed to the callback

	150 ** function may be used to access the properties of each matched row.

	151 ** Invoking Api.xUserData() returns a copy of the pointer passed as

	152 ** the third argument to pUserData.

	153 **

	154 ** If the callback function returns any value other than SQLITE_OK, the

	155 ** query is abandoned and the xQueryPhrase function returns immediately.

	156 ** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK.

	157 ** Otherwise, the error code is propagated upwards.

	158 **

	159 ** If the query runs to completion without incident, SQLITE_OK is returned.

	160 ** Or, if some error occurs before the query completes or is aborted by

	161 ** the callback, an SQLite error code is returned.

	162 **

	163 **

	164 ** xSetAuxdata(pFts5, pAux, xDelete)

	165 **

	166 ** Save the pointer passed as the second argument as the extension functions

	167 ** "auxiliary data". The pointer may then be retrieved by the current or any

	168 ** future invocation of the same fts5 extension function made as part of

	169 ** of the same MATCH query using the xGetAuxdata() API.

	170 **

	171 ** Each extension function is allocated a single auxiliary data slot for

	172 ** each FTS query (MATCH expression). If the extension function is invoked

	173 ** more than once for a single FTS query, then all invocations share a

	174 ** single auxiliary data context.

	175 **

	176 ** If there is already an auxiliary data pointer when this function is

	177 ** invoked, then it is replaced by the new pointer. If an xDelete callback

	178 ** was specified along with the original pointer, it is invoked at this

	179 ** point.

	180 **

	181 ** The xDelete callback, if one is specified, is also invoked on the

	182 ** auxiliary data pointer after the FTS5 query has finished.

	183 **

	184 ** If an error (e.g. an OOM condition) occurs within this function, an

	185 ** the auxiliary data is set to NULL and an error code returned. If the

	186 ** xDelete parameter was not NULL, it is invoked on the auxiliary data

	187 ** pointer before returning.

	188 **

	189 **

	190 ** xGetAuxdata(pFts5, bClear)

	191 **

	192 ** Returns the current auxiliary data pointer for the fts5 extension

	193 ** function. See the xSetAuxdata() method for details.

	194 **

	195 ** If the bClear argument is non-zero, then the auxiliary data is cleared

	196 ** (set to NULL) before this function returns. In this case the xDelete,

	197 ** if any, is not invoked.

	198 **

	199 **

	200 ** xRowCount(pFts5, pnRow)

	201 **

	202 ** This function is used to retrieve the total number of rows in the table.

	203 ** In other words, the same value that would be returned by:

	204 **

	205 ** SELECT count(*) FROM ftstable;

	206 **

	207 ** xPhraseFirst()

	208 ** This function is used, along with type Fts5PhraseIter and the xPhraseNext

	209 ** method, to iterate through all instances of a single query phrase within

	210 ** the current row. This is the same information as is accessible via the

	211 ** xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient

	212 ** to use, this API may be faster under some circumstances. To iterate

	213 ** through instances of phrase iPhrase, use the following code:

	214 **

	215 ** Fts5PhraseIter iter;

	216 ** int iCol, iOff;

	217 ** for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff);

	218 ** iCol>=0;

	219 ** pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)

	220 ** ){

	221 ** // An instance of phrase iPhrase at offset iOff of column iCol

	222 ** }

	223 **

	224 ** The Fts5PhraseIter structure is defined above. Applications should not

	225 ** modify this structure directly - it should only be used as shown above

	226 ** with the xPhraseFirst() and xPhraseNext() API methods (and by

	227 ** xPhraseFirstColumn() and xPhraseNextColumn() as illustrated below).

	228 **

	229 ** This API can be quite slow if used with an FTS5 table created with the

	230 ** "detail=none" or "detail=column" option. If the FTS5 table is created

	231 ** with either "detail=none" or "detail=column" and "content=" option

	232 ** (i.e. if it is a contentless table), then this API always iterates

	233 ** through an empty set (all calls to xPhraseFirst() set iCol to -1).

	234 **

	235 ** xPhraseNext()

	236 ** See xPhraseFirst above.

	237 **

	238 ** xPhraseFirstColumn()

	239 ** This function and xPhraseNextColumn() are similar to the xPhraseFirst()

	240 ** and xPhraseNext() APIs described above. The difference is that instead

	241 ** of iterating through all instances of a phrase in the current row, these

	242 ** APIs are used to iterate through the set of columns in the current row

	243 ** that contain one or more instances of a specified phrase. For example:

	244 **

	245 ** Fts5PhraseIter iter;

	246 ** int iCol;

	247 ** for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol);

	248 ** iCol>=0;

	249 ** pApi->xPhraseNextColumn(pFts, &iter, &iCol)

	250 ** ){

	251 ** // Column iCol contains at least one instance of phrase iPhrase

	252 ** }

	253 **

	254 ** This API can be quite slow if used with an FTS5 table created with the

	255 ** "detail=none" option. If the FTS5 table is created with either

	256 ** "detail=none" "content=" option (i.e. if it is a contentless table),

	257 ** then this API always iterates through an empty set (all calls to

	258 ** xPhraseFirstColumn() set iCol to -1).

	259 **

	260 ** The information accessed using this API and its companion

	261 ** xPhraseFirstColumn() may also be obtained using xPhraseFirst/xPhraseNext

	262 ** (or xInst/xInstCount). The chief advantage of this API is that it is

	263 ** significantly more efficient than those alternatives when used with

	264 ** "detail=column" tables.

	265 **

	266 ** xPhraseNextColumn()

	267 ** See xPhraseFirstColumn above.

	268 */

	269 struct Fts5ExtensionApi {

	270 int iVersion; /* Currently always set to 3 */

	271

	272 void (xUserData)(Fts5Context*);

	273

	274 int (xColumnCount)(Fts5Context);

	275 int (xRowCount)(Fts5Context, sqlite3_int64 *pnRow);

	276 int (xColumnTotalSize)(Fts5Context, int iCol, sqlite3_int64 *pnToken);

	277

	278 int (xTokenize)(Fts5Context,

	279 const char pText, int nText, / Text to tokenize */

	280 void pCtx, / Context passed to xToken() */

	281 int (xToken)(void, int, const char, int, int, int) / Callback */

	282 );

	283

	284 int (xPhraseCount)(Fts5Context);

	285 int (xPhraseSize)(Fts5Context, int iPhrase);

	286

	287 int (xInstCount)(Fts5Context, int *pnInst);

	288 int (xInst)(Fts5Context, int iIdx, int piPhrase, int piCol, int *piOff);

	289

	290 sqlite3_int64 (xRowid)(Fts5Context);

	291 int (xColumnText)(Fts5Context, int iCol, const char *pz, int pn);

	292 int (xColumnSize)(Fts5Context, int iCol, int *pnToken);

	293

	294 int (xQueryPhrase)(Fts5Context, int iPhrase, void *pUserData,

	295 int()(const Fts5ExtensionApi,Fts5Context,void)

	296 );

	297 int (xSetAuxdata)(Fts5Context, void pAux, void(xDelete)(void*));

	298 void (xGetAuxdata)(Fts5Context*, int bClear);

	299

	300 int (xPhraseFirst)(Fts5Context, int iPhrase, Fts5PhraseIter, int, int*);

	301 void (xPhraseNext)(Fts5Context, Fts5PhraseIter, int piCol, int *piOff);

	302

	303 int (xPhraseFirstColumn)(Fts5Context, int iPhrase, Fts5PhraseIter, int);

	304 void (xPhraseNextColumn)(Fts5Context, Fts5PhraseIter, int piCol);

	305 };

	306

	307 /*

	308 ** CUSTOM AUXILIARY FUNCTIONS

	309 *************************************************************************/

	310

	311 /*************************************************************************

	312 ** CUSTOM TOKENIZERS

	313 **

	314 ** Applications may also register custom tokenizer types. A tokenizer

	315 ** is registered by providing fts5 with a populated instance of the

	316 ** following structure. All structure methods must be defined, setting

	317 ** any member of the fts5_tokenizer struct to NULL leads to undefined

	318 ** behaviour. The structure methods are expected to function as follows:

	319 **

	320 ** xCreate:

	321 ** This function is used to allocate and initialize a tokenizer instance.

	322 ** A tokenizer instance is required to actually tokenize text.

	323 **

	324 ** The first argument passed to this function is a copy of the (void*)

	325 ** pointer provided by the application when the fts5_tokenizer object

	326 ** was registered with FTS5 (the third argument to xCreateTokenizer()).

	327 ** The second and third arguments are an array of nul-terminated strings

	328 ** containing the tokenizer arguments, if any, specified following the

	329 ** tokenizer name as part of the CREATE VIRTUAL TABLE statement used

	330 ** to create the FTS5 table.

	331 **

	332 ** The final argument is an output variable. If successful, (*ppOut)

	333 ** should be set to point to the new tokenizer handle and SQLITE_OK

	334 ** returned. If an error occurs, some value other than SQLITE_OK should

	335 ** be returned. In this case, fts5 assumes that the final value of *ppOut

	336 ** is undefined.

	337 **

	338 ** xDelete:

	339 ** This function is invoked to delete a tokenizer handle previously

	340 ** allocated using xCreate(). Fts5 guarantees that this function will

	341 ** be invoked exactly once for each successful call to xCreate().

	342 **

	343 ** xTokenize:

	344 ** This function is expected to tokenize the nText byte string indicated

	345 ** by argument pText. pText may or may not be nul-terminated. The first

	346 ** argument passed to this function is a pointer to an Fts5Tokenizer object

	347 ** returned by an earlier call to xCreate().

	348 **

	349 ** The second argument indicates the reason that FTS5 is requesting

	350 ** tokenization of the supplied text. This is always one of the following

	351 ** four values:

	352 **

	353 ** <ul><li> <b>FTS5_TOKENIZE_DOCUMENT</b> - A document is being inserted into

	354 ** or removed from the FTS table. The tokenizer is being invoked to

	355 ** determine the set of tokens to add to (or delete from) the

	356 ** FTS index.

	357 **

	358 ** <li> <b>FTS5_TOKENIZE_QUERY</b> - A MATCH query is being executed

	359 ** against the FTS index. The tokenizer is being called to tokenize

	360 ** a bareword or quoted string specified as part of the query.

	361 **

	362 ** <li> <b>(FTS5_TOKENIZE_QUERY \| FTS5_TOKENIZE_PREFIX)</b> - Same as

	363 ** FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is

	364 ** followed by a "*" character, indicating that the last token

	365 ** returned by the tokenizer will be treated as a token prefix.

	366 **

	367 ** <li> <b>FTS5_TOKENIZE_AUX</b> - The tokenizer is being invoked to

	368 ** satisfy an fts5_api.xTokenize() request made by an auxiliary

	369 ** function. Or an fts5_api.xColumnSize() request made by the same

	370 ** on a columnsize=0 database.

	371 ** </ul>

	372 **

	373 ** For each token in the input string, the supplied callback xToken() must

	374 ** be invoked. The first argument to it should be a copy of the pointer

	375 ** passed as the second argument to xTokenize(). The third and fourth

	376 ** arguments are a pointer to a buffer containing the token text, and the

	377 ** size of the token in bytes. The 4th and 5th arguments are the byte offsets

	378 ** of the first byte of and first byte immediately following the text from

	379 ** which the token is derived within the input.

	380 **

	381 ** The second argument passed to the xToken() callback ("tflags") should

	382 ** normally be set to 0. The exception is if the tokenizer supports

	383 ** synonyms. In this case see the discussion below for details.

	384 **

	385 ** FTS5 assumes the xToken() callback is invoked for each token in the

	386 ** order that they occur within the input text.

	387 **

	388 ** If an xToken() callback returns any value other than SQLITE_OK, then

	389 ** the tokenization should be abandoned and the xTokenize() method should

	390 ** immediately return a copy of the xToken() return value. Or, if the

	391 ** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally,

	392 ** if an error occurs with the xTokenize() implementation itself, it

	393 ** may abandon the tokenization and return any error code other than

	394 ** SQLITE_OK or SQLITE_DONE.

	395 **

	396 ** SYNONYM SUPPORT

	397 **

	398 ** Custom tokenizers may also support synonyms. Consider a case in which a

	399 ** user wishes to query for a phrase such as "first place". Using the

	400 ** built-in tokenizers, the FTS5 query 'first + place' will match instances

	401 ** of "first place" within the document set, but not alternative forms

	402 ** such as "1st place". In some applications, it would be better to match

	403 ** all instances of "first place" or "1st place" regardless of which form

	404 ** the user specified in the MATCH query text.

	405 **

	406 ** There are several ways to approach this in FTS5:

	407 **

	408 ** <ol><li> By mapping all synonyms to a single token. In this case, the

	409 ** In the above example, this means that the tokenizer returns the

	410 ** same token for inputs "first" and "1st". Say that token is in

	411 ** fact "first", so that when the user inserts the document "I won

	412 ** 1st place" entries are added to the index for tokens "i", "won",

	413 ** "first" and "place". If the user then queries for '1st + place',

	414 ** the tokenizer substitutes "first" for "1st" and the query works

	415 ** as expected.

	416 **

	417 ** <li> By adding multiple synonyms for a single term to the FTS index.

	418 ** In this case, when tokenizing query text, the tokenizer may

	419 ** provide multiple synonyms for a single term within the document.

	420 ** FTS5 then queries the index for each synonym individually. For

	421 ** example, faced with the query:

	422 **

	423 ** <codeblock>

	424 ** ... MATCH 'first place'</codeblock>

	425 **

	426 ** the tokenizer offers both "1st" and "first" as synonyms for the

	427 ** first token in the MATCH query and FTS5 effectively runs a query

	428 ** similar to:

	429 **

	430 ** <codeblock>

	431 ** ... MATCH '(first OR 1st) place'</codeblock>

	432 **

	433 ** except that, for the purposes of auxiliary functions, the query

	434 ** still appears to contain just two phrases - "(first OR 1st)"

	435 ** being treated as a single phrase.

	436 **

	437 ** <li> By adding multiple synonyms for a single term to the FTS index.

	438 ** Using this method, when tokenizing document text, the tokenizer

	439 ** provides multiple synonyms for each token. So that when a

	440 ** document such as "I won first place" is tokenized, entries are

	441 ** added to the FTS index for "i", "won", "first", "1st" and

	442 ** "place".

	443 **

	444 ** This way, even if the tokenizer does not provide synonyms

	445 ** when tokenizing query text (it should not - to do would be

	446 ** inefficient), it doesn't matter if the user queries for

	447 ** 'first + place' or '1st + place', as there are entires in the

	448 ** FTS index corresponding to both forms of the first token.

	449 ** </ol>

	450 **

	451 ** Whether it is parsing document or query text, any call to xToken that

	452 ** specifies a <i>tflags</i> argument with the FTS5_TOKEN_COLOCATED bit

	453 ** is considered to supply a synonym for the previous token. For example,

	454 ** when parsing the document "I won first place", a tokenizer that supports

	455 ** synonyms would call xToken() 5 times, as follows:

	456 **

	457 ** <codeblock>

	458 ** xToken(pCtx, 0, "i", 1, 0, 1);

	459 ** xToken(pCtx, 0, "won", 3, 2, 5);

	460 ** xToken(pCtx, 0, "first", 5, 6, 11);

	461 ** xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3, 6, 11);

	462 ** xToken(pCtx, 0, "place", 5, 12, 17);

	463 **</codeblock>

	464 **

	465 ** It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time

	466 ** xToken() is called. Multiple synonyms may be specified for a single token

	467 ** by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence.

	468 ** There is no limit to the number of synonyms that may be provided for a

	469 ** single token.

	470 **

	471 ** In many cases, method (1) above is the best approach. It does not add

	472 ** extra data to the FTS index or require FTS5 to query for multiple terms,

	473 ** so it is efficient in terms of disk space and query speed. However, it

	474 ** does not support prefix queries very well. If, as suggested above, the

	475 ** token "first" is subsituted for "1st" by the tokenizer, then the query:

	476 **

	477 ** <codeblock>

	478 ** ... MATCH '1s*'</codeblock>

	479 **

	480 ** will not match documents that contain the token "1st" (as the tokenizer

	481 ** will probably not map "1s" to any prefix of "first").

	482 **

	483 ** For full prefix support, method (3) may be preferred. In this case,

	484 ** because the index contains entries for both "first" and "1st", prefix

	485 ** queries such as 'fi' or '1s' will match correctly. However, because

	486 ** extra entries are added to the FTS index, this method uses more space

	487 ** within the database.

	488 **

	489 ** Method (2) offers a midpoint between (1) and (3). Using this method,

	490 ** a query such as '1s*' will match documents that contain the literal

	491 ** token "1st", but not "first" (assuming the tokenizer is not able to

	492 ** provide synonyms for prefixes). However, a non-prefix query like '1st'

	493 ** will match against "1st" and "first". This method does not require

	494 ** extra disk space, as no extra entries are added to the FTS index.

	495 ** On the other hand, it may require more CPU cycles to run MATCH queries,

	496 ** as separate queries of the FTS index are required for each synonym.

	497 **

	498 ** When using methods (2) or (3), it is important that the tokenizer only

	499 ** provide synonyms when tokenizing document text (method (2)) or query

	500 ** text (method (3)), not both. Doing so will not cause any errors, but is

	501 ** inefficient.

	502 */

	503 typedef struct Fts5Tokenizer Fts5Tokenizer;

	504 typedef struct fts5_tokenizer fts5_tokenizer;

	505 struct fts5_tokenizer {

	506 int (xCreate)(void, const char azArg, int nArg, Fts5Tokenizer ppOut);

	507 void (xDelete)(Fts5Tokenizer);

	508 int (xTokenize)(Fts5Tokenizer,

	509 void *pCtx,

	510 int flags, /* Mask of FTS5_TOKENIZE_* flags */

	511 const char *pText, int nText,

	512 int (*xToken)(

	513 void pCtx, / Copy of 2nd argument to xTokenize() */

	514 int tflags, /* Mask of FTS5_TOKEN_* flags */

	515 const char pToken, / Pointer to buffer containing token */

	516 int nToken, /* Size of token in bytes */

	517 int iStart, /* Byte offset of token within input text */

	518 int iEnd /* Byte offset of end of token within input text */

	519 )

	520 );

	521 };

	522

	523 /* Flags that may be passed as the third argument to xTokenize() */

	524 #define FTS5_TOKENIZE_QUERY 0x0001

	525 #define FTS5_TOKENIZE_PREFIX 0x0002

	526 #define FTS5_TOKENIZE_DOCUMENT 0x0004

	527 #define FTS5_TOKENIZE_AUX 0x0008

	528

	529 /* Flags that may be passed by the tokenizer implementation back to FTS5

	530 ** as the third argument to the supplied xToken callback. */

	531 #define FTS5_TOKEN_COLOCATED 0x0001 /* Same position as prev. token */

	532

	533 /*

	534 ** END OF CUSTOM TOKENIZERS

	535 *************************************************************************/

	536

	537 /*************************************************************************

	538 ** FTS5 EXTENSION REGISTRATION API

	539 */

	540 typedef struct fts5_api fts5_api;

	541 struct fts5_api {

	542 int iVersion; /* Currently always set to 2 */

	543

	544 /* Create a new tokenizer */

	545 int (*xCreateTokenizer)(

	546 fts5_api *pApi,

	547 const char *zName,

	548 void *pContext,

	549 fts5_tokenizer *pTokenizer,

	550 void (xDestroy)(void)

	551 );

	552

	553 /* Find an existing tokenizer */

	554 int (*xFindTokenizer)(

	555 fts5_api *pApi,

	556 const char *zName,

	557 void **ppContext,

	558 fts5_tokenizer *pTokenizer

	559 );

	560

	561 /* Create a new auxiliary function */

	562 int (*xCreateFunction)(

	563 fts5_api *pApi,

	564 const char *zName,

	565 void *pContext,

	566 fts5_extension_function xFunction,

	567 void (xDestroy)(void)

	568 );

	569 };

	570

	571 /*

	572 ** END OF REGISTRATION API

	573 *************************************************************************/

	574

	575 #ifdef __cplusplus

	576 } /* end of the 'extern "C"' block */

	577 #endif

	578

	579 #endif /* _FTS5_H */

OLD	NEW