third_party/sqlite/src/ext/fts3/fts3_test.c - Issue 949043002: Add //third_party/sqlite to dirs_to_snapshot, remove net_sql.patch

Unified Diff: third_party/sqlite/src/ext/fts3/fts3_test.c

Issue 949043002: Add //third_party/sqlite to dirs_to_snapshot, remove net_sql.patch (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 5 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: third_party/sqlite/src/ext/fts3/fts3_test.c

diff --git a/third_party/sqlite/src/ext/fts3/fts3_test.c b/third_party/sqlite/src/ext/fts3/fts3_test.c

new file mode 100644

index 0000000000000000000000000000000000000000..36dcc94e6dabcca9aa220eb04cf49bf4cad30878

--- /dev/null

+++ b/third_party/sqlite/src/ext/fts3/fts3_test.c

@@ -0,0 +1,584 @@

+/*

+** 2011 Jun 13

+**

+** The author disclaims copyright to this source code. In place of

+** a legal notice, here is a blessing:

+**

+** May you do good and not evil.

+** May you find forgiveness for yourself and forgive others.

+** May you share freely, never taking more than you give.

+**

+******************************************************************************

+**

+** This file is not part of the production FTS code. It is only used for

+** testing. It contains a Tcl command that can be used to test if a document

+** matches an FTS NEAR expression.

+**

+** As of March 2012, it also contains a version 1 tokenizer used for testing

+** that the sqlite3_tokenizer_module.xLanguage() method is invoked correctly.

+*/

+#include <tcl.h>

+#include <string.h>

+#include <assert.h>

+#if defined(SQLITE_TEST)

+#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)

+/* Required so that the "ifdef SQLITE_ENABLE_FTS3" below works */

+#include "fts3Int.h"

+#define NM_MAX_TOKEN 12

+typedef struct NearPhrase NearPhrase;

+typedef struct NearDocument NearDocument;

+typedef struct NearToken NearToken;

+struct NearDocument {

+ int nToken; /* Length of token in bytes */

+ NearToken *aToken; /* Token array */

+};

+struct NearToken {

+ int n; /* Length of token in bytes */

+ const char *z; /* Pointer to token string */

+};

+struct NearPhrase {

+ int nNear; /* Preceding NEAR value */

+ int nToken; /* Number of tokens in this phrase */

+ NearToken aToken[NM_MAX_TOKEN]; /* Array of tokens in this phrase */

+};

+static int nm_phrase_match(

+ NearPhrase *p,

+ NearToken *aToken

+){

+ int ii;

+ for(ii=0; ii<p->nToken; ii++){

+ NearToken *pToken = &p->aToken[ii];

+ if( pToken->n>0 && pToken->z[pToken->n-1]=='*' ){

+ if( aToken[ii].n<(pToken->n-1) ) return 0;

+ if( memcmp(aToken[ii].z, pToken->z, pToken->n-1) ) return 0;

+ }else{

+ if( aToken[ii].n!=pToken->n ) return 0;

+ if( memcmp(aToken[ii].z, pToken->z, pToken->n) ) return 0;

+ }

+ return 1;

+static int nm_near_chain(

+ int iDir, /* Direction to iterate through aPhrase[] */

+ NearDocument *pDoc, /* Document to match against */

+ int iPos, /* Position at which iPhrase was found */

+ int nPhrase, /* Size of phrase array */

+ NearPhrase *aPhrase, /* Phrase array */

+ int iPhrase /* Index of phrase found */

+){

+ int iStart;

+ int iStop;

+ int ii;

+ int nNear;

+ int iPhrase2;

+ NearPhrase *p;

+ NearPhrase *pPrev;

+ assert( iDir==1 || iDir==-1 );

+ if( iDir==1 ){

+ if( (iPhrase+1)==nPhrase ) return 1;

+ nNear = aPhrase[iPhrase+1].nNear;

+ }else{

+ if( iPhrase==0 ) return 1;

+ nNear = aPhrase[iPhrase].nNear;

+ }

+ pPrev = &aPhrase[iPhrase];

+ iPhrase2 = iPhrase+iDir;

+ p = &aPhrase[iPhrase2];

+ iStart = iPos - nNear - p->nToken;

+ iStop = iPos + nNear + pPrev->nToken;

+ if( iStart<0 ) iStart = 0;

+ if( iStop > pDoc->nToken - p->nToken ) iStop = pDoc->nToken - p->nToken;

+ for(ii=iStart; ii<=iStop; ii++){

+ if( nm_phrase_match(p, &pDoc->aToken[ii]) ){

+ if( nm_near_chain(iDir, pDoc, ii, nPhrase, aPhrase, iPhrase2) ) return 1;

+ }

+ return 0;

+static int nm_match_count(

+ NearDocument *pDoc, /* Document to match against */

+ int nPhrase, /* Size of phrase array */

+ NearPhrase *aPhrase, /* Phrase array */

+ int iPhrase /* Index of phrase to count matches for */

+){

+ int nOcc = 0;

+ int ii;

+ NearPhrase *p = &aPhrase[iPhrase];

+ for(ii=0; ii<(pDoc->nToken + 1 - p->nToken); ii++){

+ if( nm_phrase_match(p, &pDoc->aToken[ii]) ){

+ /* Test forward NEAR chain (i>iPhrase) */

+ if( 0==nm_near_chain(1, pDoc, ii, nPhrase, aPhrase, iPhrase) ) continue;

+ /* Test reverse NEAR chain (i<iPhrase) */

+ if( 0==nm_near_chain(-1, pDoc, ii, nPhrase, aPhrase, iPhrase) ) continue;

+ /* This is a real match. Increment the counter. */

+ nOcc++;

+ }

+ return nOcc;

+/*

+** Tclcmd: fts3_near_match DOCUMENT EXPR ?OPTIONS?

+*/

+static int fts3_near_match_cmd(

+ ClientData clientData,

+ Tcl_Interp *interp,

+ int objc,

+ Tcl_Obj *CONST objv[]

+){

+ int nTotal = 0;

+ int rc;

+ int ii;

+ int nPhrase;

+ NearPhrase *aPhrase = 0;

+ NearDocument doc = {0, 0};

+ Tcl_Obj **apDocToken;

+ Tcl_Obj *pRet;

+ Tcl_Obj *pPhrasecount = 0;

+ Tcl_Obj **apExprToken;

+ int nExprToken;

+ UNUSED_PARAMETER(clientData);

+ /* Must have 3 or more arguments. */

+ if( objc<3 || (objc%2)==0 ){

+ Tcl_WrongNumArgs(interp, 1, objv, "DOCUMENT EXPR ?OPTION VALUE?...");

+ rc = TCL_ERROR;

+ goto near_match_out;

+ }

+ for(ii=3; ii<objc; ii+=2){

+ enum NM_enum { NM_PHRASECOUNTS };

+ struct TestnmSubcmd {

+ char *zName;

+ enum NM_enum eOpt;

+ } aOpt[] = {

+ { "-phrasecountvar", NM_PHRASECOUNTS },

+ { 0, 0 }

+ };

+ int iOpt;

+ if( Tcl_GetIndexFromObjStruct(

+ interp, objv[ii], aOpt, sizeof(aOpt[0]), "option", 0, &iOpt)

+ ){

+ return TCL_ERROR;

+ }

+ switch( aOpt[iOpt].eOpt ){

+ case NM_PHRASECOUNTS:

+ pPhrasecount = objv[ii+1];

+ break;

+ }

+ rc = Tcl_ListObjGetElements(interp, objv[1], &doc.nToken, &apDocToken);

+ if( rc!=TCL_OK ) goto near_match_out;

+ doc.aToken = (NearToken *)ckalloc(doc.nToken*sizeof(NearToken));

+ for(ii=0; ii<doc.nToken; ii++){

+ doc.aToken[ii].z = Tcl_GetStringFromObj(apDocToken[ii], &doc.aToken[ii].n);

+ }

+ rc = Tcl_ListObjGetElements(interp, objv[2], &nExprToken, &apExprToken);

+ if( rc!=TCL_OK ) goto near_match_out;

+ nPhrase = (nExprToken + 1) / 2;

+ aPhrase = (NearPhrase *)ckalloc(nPhrase * sizeof(NearPhrase));

+ memset(aPhrase, 0, nPhrase * sizeof(NearPhrase));

+ for(ii=0; ii<nPhrase; ii++){

+ Tcl_Obj *pPhrase = apExprToken[ii*2];

+ Tcl_Obj **apToken;

+ int nToken;

+ int jj;

+ rc = Tcl_ListObjGetElements(interp, pPhrase, &nToken, &apToken);

+ if( rc!=TCL_OK ) goto near_match_out;

+ if( nToken>NM_MAX_TOKEN ){

+ Tcl_AppendResult(interp, "Too many tokens in phrase", 0);

+ rc = TCL_ERROR;

+ goto near_match_out;

+ }

+ for(jj=0; jj<nToken; jj++){

+ NearToken *pT = &aPhrase[ii].aToken[jj];

+ pT->z = Tcl_GetStringFromObj(apToken[jj], &pT->n);

+ }

+ aPhrase[ii].nToken = nToken;

+ }

+ for(ii=1; ii<nPhrase; ii++){

+ Tcl_Obj *pNear = apExprToken[2*ii-1];

+ int nNear;

+ rc = Tcl_GetIntFromObj(interp, pNear, &nNear);

+ if( rc!=TCL_OK ) goto near_match_out;

+ aPhrase[ii].nNear = nNear;

+ }

+ pRet = Tcl_NewObj();

+ Tcl_IncrRefCount(pRet);

+ for(ii=0; ii<nPhrase; ii++){

+ int nOcc = nm_match_count(&doc, nPhrase, aPhrase, ii);

+ Tcl_ListObjAppendElement(interp, pRet, Tcl_NewIntObj(nOcc));

+ nTotal += nOcc;

+ }

+ if( pPhrasecount ){

+ Tcl_ObjSetVar2(interp, pPhrasecount, 0, pRet, 0);

+ }

+ Tcl_DecrRefCount(pRet);

+ Tcl_SetObjResult(interp, Tcl_NewBooleanObj(nTotal>0));

+ near_match_out:

+ ckfree((char *)aPhrase);

+ ckfree((char *)doc.aToken);

+ return rc;

+/*

+** Tclcmd: fts3_configure_incr_load ?CHUNKSIZE THRESHOLD?

+**

+** Normally, FTS uses hard-coded values to determine the minimum doclist

+** size eligible for incremental loading, and the size of the chunks loaded

+** when a doclist is incrementally loaded. This command allows the built-in

+** values to be overridden for testing purposes.

+**

+** If present, the first argument is the chunksize in bytes to load doclists

+** in. The second argument is the minimum doclist size in bytes to use

+** incremental loading with.

+**

+** Whether or not the arguments are present, this command returns a list of

+** two integers - the initial chunksize and threshold when the command is

+** invoked. This can be used to restore the default behavior after running

+** tests. For example:

+**

+** # Override incr-load settings for testing:

+** set cfg [fts3_configure_incr_load $new_chunksize $new_threshold]

+**

+** .... run tests ....

+**

+** # Restore initial incr-load settings:

+** eval fts3_configure_incr_load $cfg

+*/

+static int fts3_configure_incr_load_cmd(

+ ClientData clientData,

+ Tcl_Interp *interp,

+ int objc,

+ Tcl_Obj *CONST objv[]

+){

+#ifdef SQLITE_ENABLE_FTS3

+ extern int test_fts3_node_chunksize;

+ extern int test_fts3_node_chunk_threshold;

+ Tcl_Obj *pRet;

+ if( objc!=1 && objc!=3 ){

+ Tcl_WrongNumArgs(interp, 1, objv, "?CHUNKSIZE THRESHOLD?");

+ return TCL_ERROR;

+ }

+ pRet = Tcl_NewObj();

+ Tcl_IncrRefCount(pRet);

+ Tcl_ListObjAppendElement(

+ interp, pRet, Tcl_NewIntObj(test_fts3_node_chunksize));

+ Tcl_ListObjAppendElement(

+ interp, pRet, Tcl_NewIntObj(test_fts3_node_chunk_threshold));

+ if( objc==3 ){

+ int iArg1;

+ int iArg2;

+ if( Tcl_GetIntFromObj(interp, objv[1], &iArg1)

+ || Tcl_GetIntFromObj(interp, objv[2], &iArg2)

+ ){

+ Tcl_DecrRefCount(pRet);

+ return TCL_ERROR;

+ }

+ test_fts3_node_chunksize = iArg1;

+ test_fts3_node_chunk_threshold = iArg2;

+ }

+ Tcl_SetObjResult(interp, pRet);

+ Tcl_DecrRefCount(pRet);

+#endif

+ UNUSED_PARAMETER(clientData);

+ return TCL_OK;

+#ifdef SQLITE_ENABLE_FTS3

+/**************************************************************************

+** Beginning of test tokenizer code.

+**

+** For language 0, this tokenizer is similar to the default 'simple'

+** tokenizer. For other languages L, the following:

+**

+** * Odd numbered languages are case-sensitive. Even numbered

+** languages are not.

+**

+** * Language ids 100 or greater are considered an error.

+**

+** The implementation assumes that the input contains only ASCII characters

+** (i.e. those that may be encoded in UTF-8 using a single byte).

+*/

+typedef struct test_tokenizer {

+ sqlite3_tokenizer base;

+} test_tokenizer;

+typedef struct test_tokenizer_cursor {

+ sqlite3_tokenizer_cursor base;

+ const char *aInput; /* Input being tokenized */

+ int nInput; /* Size of the input in bytes */

+ int iInput; /* Current offset in aInput */

+ int iToken; /* Index of next token to be returned */

+ char *aBuffer; /* Buffer containing current token */

+ int nBuffer; /* Number of bytes allocated at pToken */

+ int iLangid; /* Configured language id */

+} test_tokenizer_cursor;

+static int testTokenizerCreate(

+ int argc, const char * const *argv,

+ sqlite3_tokenizer **ppTokenizer

+){

+ test_tokenizer *pNew;

+ UNUSED_PARAMETER(argc);

+ UNUSED_PARAMETER(argv);

+ pNew = sqlite3_malloc(sizeof(test_tokenizer));

+ if( !pNew ) return SQLITE_NOMEM;

+ memset(pNew, 0, sizeof(test_tokenizer));

+ *ppTokenizer = (sqlite3_tokenizer *)pNew;

+ return SQLITE_OK;

+static int testTokenizerDestroy(sqlite3_tokenizer *pTokenizer){

+ test_tokenizer *p = (test_tokenizer *)pTokenizer;

+ sqlite3_free(p);

+ return SQLITE_OK;

+static int testTokenizerOpen(

+ sqlite3_tokenizer *pTokenizer, /* The tokenizer */

+ const char *pInput, int nBytes, /* String to be tokenized */

+ sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */

+){

+ int rc = SQLITE_OK; /* Return code */

+ test_tokenizer_cursor *pCsr; /* New cursor object */

+ UNUSED_PARAMETER(pTokenizer);

+ pCsr = (test_tokenizer_cursor *)sqlite3_malloc(sizeof(test_tokenizer_cursor));

+ if( pCsr==0 ){

+ rc = SQLITE_NOMEM;

+ }else{

+ memset(pCsr, 0, sizeof(test_tokenizer_cursor));

+ pCsr->aInput = pInput;

+ if( nBytes<0 ){

+ pCsr->nInput = (int)strlen(pInput);

+ }else{

+ pCsr->nInput = nBytes;

+ }

+ *ppCursor = (sqlite3_tokenizer_cursor *)pCsr;

+ return rc;

+static int testTokenizerClose(sqlite3_tokenizer_cursor *pCursor){

+ test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;

+ sqlite3_free(pCsr->aBuffer);

+ sqlite3_free(pCsr);

+ return SQLITE_OK;

+static int testIsTokenChar(char c){

+ return (c>='a' && c<='z') || (c>='A' && c<='Z');

+static int testTolower(char c){

+ char ret = c;

+ if( ret>='A' && ret<='Z') ret = ret - ('A'-'a');

+ return ret;

+static int testTokenizerNext(

+ sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by testTokenizerOpen */

+ const char **ppToken, /* OUT: *ppToken is the token text */

+ int *pnBytes, /* OUT: Number of bytes in token */

+ int *piStartOffset, /* OUT: Starting offset of token */

+ int *piEndOffset, /* OUT: Ending offset of token */

+ int *piPosition /* OUT: Position integer of token */

+){

+ test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;

+ int rc = SQLITE_OK;

+ const char *p;

+ const char *pEnd;

+ p = &pCsr->aInput[pCsr->iInput];

+ pEnd = &pCsr->aInput[pCsr->nInput];

+ /* Skip past any white-space */

+ assert( p<=pEnd );

+ while( p<pEnd && testIsTokenChar(*p)==0 ) p++;

+ if( p==pEnd ){

+ rc = SQLITE_DONE;

+ }else{

+ /* Advance to the end of the token */

+ const char *pToken = p;

+ int nToken;

+ while( p<pEnd && testIsTokenChar(*p) ) p++;

+ nToken = (int)(p-pToken);

+ /* Copy the token into the buffer */

+ if( nToken>pCsr->nBuffer ){

+ sqlite3_free(pCsr->aBuffer);

+ pCsr->aBuffer = sqlite3_malloc(nToken);

+ }

+ if( pCsr->aBuffer==0 ){

+ rc = SQLITE_NOMEM;

+ }else{

+ int i;

+ if( pCsr->iLangid & 0x00000001 ){

+ for(i=0; i<nToken; i++) pCsr->aBuffer[i] = pToken[i];

+ }else{

+ for(i=0; i<nToken; i++) pCsr->aBuffer[i] = testTolower(pToken[i]);

+ }

+ pCsr->iToken++;

+ pCsr->iInput = (int)(p - pCsr->aInput);

+ *ppToken = pCsr->aBuffer;

+ *pnBytes = nToken;

+ *piStartOffset = (int)(pToken - pCsr->aInput);

+ *piEndOffset = (int)(p - pCsr->aInput);

+ *piPosition = pCsr->iToken;

+ }

+ return rc;

+static int testTokenizerLanguage(

+ sqlite3_tokenizer_cursor *pCursor,

+ int iLangid

+){

+ int rc = SQLITE_OK;

+ test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;

+ pCsr->iLangid = iLangid;

+ if( pCsr->iLangid>=100 ){

+ rc = SQLITE_ERROR;

+ }

+ return rc;

+#endif

+static int fts3_test_tokenizer_cmd(

+ ClientData clientData,

+ Tcl_Interp *interp,

+ int objc,

+ Tcl_Obj *CONST objv[]

+){

+#ifdef SQLITE_ENABLE_FTS3

+ static const sqlite3_tokenizer_module testTokenizerModule = {

+ 1,

+ testTokenizerCreate,

+ testTokenizerDestroy,

+ testTokenizerOpen,

+ testTokenizerClose,

+ testTokenizerNext,

+ testTokenizerLanguage

+ };

+ const sqlite3_tokenizer_module *pPtr = &testTokenizerModule;

+ if( objc!=1 ){

+ Tcl_WrongNumArgs(interp, 1, objv, "");

+ return TCL_ERROR;

+ }

+ Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(

+ (const unsigned char *)&pPtr, sizeof(sqlite3_tokenizer_module *)

+ ));

+#endif

+ UNUSED_PARAMETER(clientData);

+ return TCL_OK;

+static int fts3_test_varint_cmd(

+ ClientData clientData,

+ Tcl_Interp *interp,

+ int objc,

+ Tcl_Obj *CONST objv[]

+){

+#ifdef SQLITE_ENABLE_FTS3

+ char aBuf[24];

+ int rc;

+ Tcl_WideInt w, w2;

+ int nByte, nByte2;

+ if( objc!=2 ){

+ Tcl_WrongNumArgs(interp, 1, objv, "INTEGER");

+ return TCL_ERROR;

+ }

+ rc = Tcl_GetWideIntFromObj(interp, objv[1], &w);

+ if( rc!=TCL_OK ) return rc;

+ nByte = sqlite3Fts3PutVarint(aBuf, w);

+ nByte2 = sqlite3Fts3GetVarint(aBuf, &w2);

+ if( w!=w2 || nByte!=nByte2 ){

+ char *zErr = sqlite3_mprintf("error testing %lld", w);

+ Tcl_ResetResult(interp);

+ Tcl_AppendResult(interp, zErr, 0);

+ return TCL_ERROR;

+ }

+ if( w<=2147483647 && w>=0 ){

+ int i;

+ nByte2 = fts3GetVarint32(aBuf, &i);

+ if( (int)w!=i || nByte!=nByte2 ){

+ char *zErr = sqlite3_mprintf("error testing %lld (32-bit)", w);

+ Tcl_ResetResult(interp);

+ Tcl_AppendResult(interp, zErr, 0);

+ return TCL_ERROR;

+ }

+#endif

+ UNUSED_PARAMETER(clientData);

+ return TCL_OK;

+/*

+** End of tokenizer code.

+**************************************************************************/

+int Sqlitetestfts3_Init(Tcl_Interp *interp){

+ Tcl_CreateObjCommand(interp, "fts3_near_match", fts3_near_match_cmd, 0, 0);

+ Tcl_CreateObjCommand(interp,

+ "fts3_configure_incr_load", fts3_configure_incr_load_cmd, 0, 0

+ );

+ Tcl_CreateObjCommand(

+ interp, "fts3_test_tokenizer", fts3_test_tokenizer_cmd, 0, 0

+ );

+ Tcl_CreateObjCommand(

+ interp, "fts3_test_varint", fts3_test_varint_cmd, 0, 0

+ );

+ return TCL_OK;

+#endif /* SQLITE_ENABLE_FTS3 || SQLITE_ENABLE_FTS4 */

+#endif /* ifdef SQLITE_TEST */

« no previous file with comments | « third_party/sqlite/src/ext/fts3/fts3_term.c ('k') | third_party/sqlite/src/ext/fts3/fts3_tokenize_vtab.c » ('j') | no next file with comments »