third_party/sqlite/src/ext/fts1/simple_tokenizer.c - Issue 694353003: Get `gn gen` to succeed on Windows

Side by Side Diff: third_party/sqlite/src/ext/fts1/simple_tokenizer.c

Issue 694353003: Get `gn gen` to succeed on Windows (Closed) Base URL: https://github.com/domokit/mojo.git@master

Patch Set: remove GYP_DEFINES code Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 /*

	2 ** The author disclaims copyright to this source code.

	3 **

	4 *************************************************************************

	5 ** Implementation of the "simple" full-text-search tokenizer.

	6 */

	7

	8 #include <assert.h>

	9 #if !defined(__APPLE__)

	10 #include <malloc.h>

	11 #else

	12 #include <stdlib.h>

	13 #endif

	14 #include <stdio.h>

	15 #include <string.h>

	16 #include <ctype.h>

	17

	18 #include "tokenizer.h"

	19

	20 /* Duplicate a string; the caller must free() the returned string.

	21 * (We don't use strdup() since it's not part of the standard C library and

	22 * may not be available everywhere.) */

	23 /* TODO(shess) Copied from fulltext.c, consider util.c for such

	24 ** things. */

	25 static char string_dup(const char s){

	26 char *str = malloc(strlen(s) + 1);

	27 strcpy(str, s);

	28 return str;

	29 }

	30

	31 typedef struct simple_tokenizer {

	32 sqlite3_tokenizer base;

	33 const char zDelim; / token delimiters */

	34 } simple_tokenizer;

	35

	36 typedef struct simple_tokenizer_cursor {

	37 sqlite3_tokenizer_cursor base;

	38 const char pInput; / input we are tokenizing */

	39 int nBytes; /* size of the input */

	40 const char pCurrent; / current position in pInput */

	41 int iToken; /* index of next token to be returned */

	42 char zToken; / storage for current token */

	43 int nTokenBytes; /* actual size of current token */

	44 int nTokenAllocated; /* space allocated to zToken buffer */

	45 } simple_tokenizer_cursor;

	46

	47 static sqlite3_tokenizer_module simpleTokenizerModule;/* forward declaration */

	48

	49 static int simpleCreate(

	50 int argc, const char **argv,

	51 sqlite3_tokenizer **ppTokenizer

	52 ){

	53 simple_tokenizer *t;

	54

	55 t = (simple_tokenizer *) malloc(sizeof(simple_tokenizer));

	56 /* TODO(shess) Delimiters need to remain the same from run to run,

	57 ** else we need to reindex. One solution would be a meta-table to

	58 ** track such information in the database, then we'd only want this

	59 ** information on the initial create.

	60 */

	61 if( argc>1 ){

	62 t->zDelim = string_dup(argv[1]);

	63 } else {

	64 /* Build a string excluding alphanumeric ASCII characters */

	65 char zDelim[0x80]; /* nul-terminated, so nul not a member */

	66 int i, j;

	67 for(i=1, j=0; i<0x80; i++){

	68 if( !isalnum(i) ){

	69 zDelim[j++] = i;

	70 }

	71 }

	72 zDelim[j++] = '\0';

	73 assert( j<=sizeof(zDelim) );

	74 t->zDelim = string_dup(zDelim);

	75 }

	76

	77 *ppTokenizer = &t->base;

	78 return SQLITE_OK;

	79 }

	80

	81 static int simpleDestroy(sqlite3_tokenizer *pTokenizer){

	82 simple_tokenizer t = (simple_tokenizer ) pTokenizer;

	83

	84 free((void *) t->zDelim);

	85 free(t);

	86

	87 return SQLITE_OK;

	88 }

	89

	90 static int simpleOpen(

	91 sqlite3_tokenizer *pTokenizer,

	92 const char *pInput, int nBytes,

	93 sqlite3_tokenizer_cursor **ppCursor

	94 ){

	95 simple_tokenizer_cursor *c;

	96

	97 c = (simple_tokenizer_cursor *) malloc(sizeof(simple_tokenizer_cursor));

	98 c->pInput = pInput;

	99 c->nBytes = nBytes<0 ? (int) strlen(pInput) : nBytes;

	100 c->pCurrent = c->pInput; /* start tokenizing at the beginning */

	101 c->iToken = 0;

	102 c->zToken = NULL; /* no space allocated, yet. */

	103 c->nTokenBytes = 0;

	104 c->nTokenAllocated = 0;

	105

	106 *ppCursor = &c->base;

	107 return SQLITE_OK;

	108 }

	109

	110 static int simpleClose(sqlite3_tokenizer_cursor *pCursor){

	111 simple_tokenizer_cursor c = (simple_tokenizer_cursor ) pCursor;

	112

	113 if( NULL!=c->zToken ){

	114 free(c->zToken);

	115 }

	116 free(c);

	117

	118 return SQLITE_OK;

	119 }

	120

	121 static int simpleNext(

	122 sqlite3_tokenizer_cursor *pCursor,

	123 const char *ppToken, int pnBytes,

	124 int piStartOffset, int piEndOffset, int *piPosition

	125 ){

	126 simple_tokenizer_cursor c = (simple_tokenizer_cursor ) pCursor;

	127 simple_tokenizer t = (simple_tokenizer ) pCursor->pTokenizer;

	128 int ii;

	129

	130 while( c->pCurrent-c->pInput<c->nBytes ){

	131 int n = (int) strcspn(c->pCurrent, t->zDelim);

	132 if( n>0 ){

	133 if( n+1>c->nTokenAllocated ){

	134 c->zToken = realloc(c->zToken, n+1);

	135 }

	136 for(ii=0; ii<n; ii++){

	137 /* TODO(shess) This needs expansion to handle UTF-8

	138 ** case-insensitivity.

	139 */

	140 char ch = c->pCurrent[ii];

	141 c->zToken[ii] = (unsigned char)ch<0x80 ? tolower(ch) : ch;

	142 }

	143 c->zToken[n] = '\0';

	144 *ppToken = c->zToken;

	145 *pnBytes = n;

	146 *piStartOffset = (int) (c->pCurrent-c->pInput);

	147 piEndOffset = piStartOffset+n;

	148 *piPosition = c->iToken++;

	149 c->pCurrent += n + 1;

	150

	151 return SQLITE_OK;

	152 }

	153 c->pCurrent += n + 1;

	154 /* TODO(shess) could strspn() to skip delimiters en masse. Needs

	155 ** to happen in two places, though, which is annoying.

	156 */

	157 }

	158 return SQLITE_DONE;

	159 }

	160

	161 static sqlite3_tokenizer_module simpleTokenizerModule = {

	162 0,

	163 simpleCreate,

	164 simpleDestroy,

	165 simpleOpen,

	166 simpleClose,

	167 simpleNext,

	168 };

	169

	170 void get_simple_tokenizer_module(

	171 sqlite3_tokenizer_module **ppModule

	172 ){

	173 *ppModule = &simpleTokenizerModule;

	174 }

OLD	NEW

« no previous file with comments | « third_party/sqlite/src/ext/fts1/fulltext.c ('k') | third_party/sqlite/src/ext/fts1/tokenizer.h » ('j') | no next file with comments »