| Index: third_party/sqlite/sqlite-src-3080704/ext/fts3/fts3_tokenizer1.c | 
| diff --git a/third_party/sqlite/sqlite-src-3080704/ext/fts3/fts3_tokenizer1.c b/third_party/sqlite/sqlite-src-3080704/ext/fts3/fts3_tokenizer1.c | 
| new file mode 100644 | 
| index 0000000000000000000000000000000000000000..deea06d92bf895a1f513f863484cfbffd0a2faa1 | 
| --- /dev/null | 
| +++ b/third_party/sqlite/sqlite-src-3080704/ext/fts3/fts3_tokenizer1.c | 
| @@ -0,0 +1,234 @@ | 
| +/* | 
| +** 2006 Oct 10 | 
| +** | 
| +** The author disclaims copyright to this source code.  In place of | 
| +** a legal notice, here is a blessing: | 
| +** | 
| +**    May you do good and not evil. | 
| +**    May you find forgiveness for yourself and forgive others. | 
| +**    May you share freely, never taking more than you give. | 
| +** | 
| +****************************************************************************** | 
| +** | 
| +** Implementation of the "simple" full-text-search tokenizer. | 
| +*/ | 
| + | 
| +/* | 
| +** The code in this file is only compiled if: | 
| +** | 
| +**     * The FTS3 module is being built as an extension | 
| +**       (in which case SQLITE_CORE is not defined), or | 
| +** | 
| +**     * The FTS3 module is being built into the core of | 
| +**       SQLite (in which case SQLITE_ENABLE_FTS3 is defined). | 
| +*/ | 
| +#include "fts3Int.h" | 
| +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) | 
| + | 
| +#include <assert.h> | 
| +#include <stdlib.h> | 
| +#include <stdio.h> | 
| +#include <string.h> | 
| + | 
| +#include "fts3_tokenizer.h" | 
| + | 
| +typedef struct simple_tokenizer { | 
| +  sqlite3_tokenizer base; | 
| +  char delim[128];             /* flag ASCII delimiters */ | 
| +} simple_tokenizer; | 
| + | 
| +typedef struct simple_tokenizer_cursor { | 
| +  sqlite3_tokenizer_cursor base; | 
| +  const char *pInput;          /* input we are tokenizing */ | 
| +  int nBytes;                  /* size of the input */ | 
| +  int iOffset;                 /* current position in pInput */ | 
| +  int iToken;                  /* index of next token to be returned */ | 
| +  char *pToken;                /* storage for current token */ | 
| +  int nTokenAllocated;         /* space allocated to zToken buffer */ | 
| +} simple_tokenizer_cursor; | 
| + | 
| + | 
| +static int simpleDelim(simple_tokenizer *t, unsigned char c){ | 
| +  return c<0x80 && t->delim[c]; | 
| +} | 
| +static int fts3_isalnum(int x){ | 
| +  return (x>='0' && x<='9') || (x>='A' && x<='Z') || (x>='a' && x<='z'); | 
| +} | 
| + | 
| +/* | 
| +** Create a new tokenizer instance. | 
| +*/ | 
| +static int simpleCreate( | 
| +  int argc, const char * const *argv, | 
| +  sqlite3_tokenizer **ppTokenizer | 
| +){ | 
| +  simple_tokenizer *t; | 
| + | 
| +  t = (simple_tokenizer *) sqlite3_malloc(sizeof(*t)); | 
| +  if( t==NULL ) return SQLITE_NOMEM; | 
| +  memset(t, 0, sizeof(*t)); | 
| + | 
| +  /* TODO(shess) Delimiters need to remain the same from run to run, | 
| +  ** else we need to reindex.  One solution would be a meta-table to | 
| +  ** track such information in the database, then we'd only want this | 
| +  ** information on the initial create. | 
| +  */ | 
| +  if( argc>1 ){ | 
| +    int i, n = (int)strlen(argv[1]); | 
| +    for(i=0; i<n; i++){ | 
| +      unsigned char ch = argv[1][i]; | 
| +      /* We explicitly don't support UTF-8 delimiters for now. */ | 
| +      if( ch>=0x80 ){ | 
| +        sqlite3_free(t); | 
| +        return SQLITE_ERROR; | 
| +      } | 
| +      t->delim[ch] = 1; | 
| +    } | 
| +  } else { | 
| +    /* Mark non-alphanumeric ASCII characters as delimiters */ | 
| +    int i; | 
| +    for(i=1; i<0x80; i++){ | 
| +      t->delim[i] = !fts3_isalnum(i) ? -1 : 0; | 
| +    } | 
| +  } | 
| + | 
| +  *ppTokenizer = &t->base; | 
| +  return SQLITE_OK; | 
| +} | 
| + | 
| +/* | 
| +** Destroy a tokenizer | 
| +*/ | 
| +static int simpleDestroy(sqlite3_tokenizer *pTokenizer){ | 
| +  sqlite3_free(pTokenizer); | 
| +  return SQLITE_OK; | 
| +} | 
| + | 
| +/* | 
| +** Prepare to begin tokenizing a particular string.  The input | 
| +** string to be tokenized is pInput[0..nBytes-1].  A cursor | 
| +** used to incrementally tokenize this string is returned in | 
| +** *ppCursor. | 
| +*/ | 
| +static int simpleOpen( | 
| +  sqlite3_tokenizer *pTokenizer,         /* The tokenizer */ | 
| +  const char *pInput, int nBytes,        /* String to be tokenized */ | 
| +  sqlite3_tokenizer_cursor **ppCursor    /* OUT: Tokenization cursor */ | 
| +){ | 
| +  simple_tokenizer_cursor *c; | 
| + | 
| +  UNUSED_PARAMETER(pTokenizer); | 
| + | 
| +  c = (simple_tokenizer_cursor *) sqlite3_malloc(sizeof(*c)); | 
| +  if( c==NULL ) return SQLITE_NOMEM; | 
| + | 
| +  c->pInput = pInput; | 
| +  if( pInput==0 ){ | 
| +    c->nBytes = 0; | 
| +  }else if( nBytes<0 ){ | 
| +    c->nBytes = (int)strlen(pInput); | 
| +  }else{ | 
| +    c->nBytes = nBytes; | 
| +  } | 
| +  c->iOffset = 0;                 /* start tokenizing at the beginning */ | 
| +  c->iToken = 0; | 
| +  c->pToken = NULL;               /* no space allocated, yet. */ | 
| +  c->nTokenAllocated = 0; | 
| + | 
| +  *ppCursor = &c->base; | 
| +  return SQLITE_OK; | 
| +} | 
| + | 
| +/* | 
| +** Close a tokenization cursor previously opened by a call to | 
| +** simpleOpen() above. | 
| +*/ | 
| +static int simpleClose(sqlite3_tokenizer_cursor *pCursor){ | 
| +  simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; | 
| +  sqlite3_free(c->pToken); | 
| +  sqlite3_free(c); | 
| +  return SQLITE_OK; | 
| +} | 
| + | 
| +/* | 
| +** Extract the next token from a tokenization cursor.  The cursor must | 
| +** have been opened by a prior call to simpleOpen(). | 
| +*/ | 
| +static int simpleNext( | 
| +  sqlite3_tokenizer_cursor *pCursor,  /* Cursor returned by simpleOpen */ | 
| +  const char **ppToken,               /* OUT: *ppToken is the token text */ | 
| +  int *pnBytes,                       /* OUT: Number of bytes in token */ | 
| +  int *piStartOffset,                 /* OUT: Starting offset of token */ | 
| +  int *piEndOffset,                   /* OUT: Ending offset of token */ | 
| +  int *piPosition                     /* OUT: Position integer of token */ | 
| +){ | 
| +  simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; | 
| +  simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer; | 
| +  unsigned char *p = (unsigned char *)c->pInput; | 
| + | 
| +  while( c->iOffset<c->nBytes ){ | 
| +    int iStartOffset; | 
| + | 
| +    /* Scan past delimiter characters */ | 
| +    while( c->iOffset<c->nBytes && simpleDelim(t, p[c->iOffset]) ){ | 
| +      c->iOffset++; | 
| +    } | 
| + | 
| +    /* Count non-delimiter characters. */ | 
| +    iStartOffset = c->iOffset; | 
| +    while( c->iOffset<c->nBytes && !simpleDelim(t, p[c->iOffset]) ){ | 
| +      c->iOffset++; | 
| +    } | 
| + | 
| +    if( c->iOffset>iStartOffset ){ | 
| +      int i, n = c->iOffset-iStartOffset; | 
| +      if( n>c->nTokenAllocated ){ | 
| +        char *pNew; | 
| +        c->nTokenAllocated = n+20; | 
| +        pNew = sqlite3_realloc(c->pToken, c->nTokenAllocated); | 
| +        if( !pNew ) return SQLITE_NOMEM; | 
| +        c->pToken = pNew; | 
| +      } | 
| +      for(i=0; i<n; i++){ | 
| +        /* TODO(shess) This needs expansion to handle UTF-8 | 
| +        ** case-insensitivity. | 
| +        */ | 
| +        unsigned char ch = p[iStartOffset+i]; | 
| +        c->pToken[i] = (char)((ch>='A' && ch<='Z') ? ch-'A'+'a' : ch); | 
| +      } | 
| +      *ppToken = c->pToken; | 
| +      *pnBytes = n; | 
| +      *piStartOffset = iStartOffset; | 
| +      *piEndOffset = c->iOffset; | 
| +      *piPosition = c->iToken++; | 
| + | 
| +      return SQLITE_OK; | 
| +    } | 
| +  } | 
| +  return SQLITE_DONE; | 
| +} | 
| + | 
| +/* | 
| +** The set of routines that implement the simple tokenizer | 
| +*/ | 
| +static const sqlite3_tokenizer_module simpleTokenizerModule = { | 
| +  0, | 
| +  simpleCreate, | 
| +  simpleDestroy, | 
| +  simpleOpen, | 
| +  simpleClose, | 
| +  simpleNext, | 
| +  0, | 
| +}; | 
| + | 
| +/* | 
| +** Allocate a new simple tokenizer.  Return a pointer to the new | 
| +** tokenizer in *ppModule | 
| +*/ | 
| +void sqlite3Fts3SimpleTokenizerModule( | 
| +  sqlite3_tokenizer_module const**ppModule | 
| +){ | 
| +  *ppModule = &simpleTokenizerModule; | 
| +} | 
| + | 
| +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ | 
|  |