third_party/sqlite/src/ext/fts2/fts2_icu.c - Issue 694353003: Get `gn gen` to succeed on Windows

Side by Side Diff: third_party/sqlite/src/ext/fts2/fts2_icu.c

Issue 694353003: Get `gn gen` to succeed on Windows (Closed) Base URL: https://github.com/domokit/mojo.git@master

Patch Set: remove GYP_DEFINES code Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 /*

	2 ** 2007 June 22

	3 **

	4 ** The author disclaims copyright to this source code. In place of

	5 ** a legal notice, here is a blessing:

	6 **

	7 ** May you do good and not evil.

	8 ** May you find forgiveness for yourself and forgive others.

	9 ** May you share freely, never taking more than you give.

	10 **

	11 *************************************************************************

	12 ** This file implements a tokenizer for fts2 based on the ICU library.

	13 **

	14 ** $Id: fts2_icu.c,v 1.3 2008/12/18 05:30:26 danielk1977 Exp $

	15 */

	16

	17 #if !defined(SQLITE_CORE) \|\| defined(SQLITE_ENABLE_FTS2)

	18 #ifdef SQLITE_ENABLE_ICU

	19

	20 #include <assert.h>

	21 #include <string.h>

	22 #include "fts2_tokenizer.h"

	23

	24 #include <unicode/ubrk.h>

	25 #include <unicode/ucol.h>

	26 #include <unicode/ustring.h>

	27 #include <unicode/utf16.h>

	28

	29 typedef struct IcuTokenizer IcuTokenizer;

	30 typedef struct IcuCursor IcuCursor;

	31

	32 struct IcuTokenizer {

	33 sqlite3_tokenizer base;

	34 char *zLocale;

	35 };

	36

	37 struct IcuCursor {

	38 sqlite3_tokenizer_cursor base;

	39

	40 UBreakIterator pIter; / ICU break-iterator object */

	41 int nChar; /* Number of UChar elements in pInput */

	42 UChar aChar; / Copy of input using utf-16 encoding */

	43 int aOffset; / Offsets of each character in utf-8 input */

	44

	45 int nBuffer;

	46 char *zBuffer;

	47

	48 int iToken;

	49 };

	50

	51 /*

	52 ** Create a new tokenizer instance.

	53 */

	54 static int icuCreate(

	55 int argc, /* Number of entries in argv[] */

	56 const char * const argv, / Tokenizer creation arguments */

	57 sqlite3_tokenizer *ppTokenizer / OUT: Created tokenizer */

	58 ){

	59 IcuTokenizer *p;

	60 int n = 0;

	61

	62 if( argc>0 ){

	63 n = strlen(argv[0])+1;

	64 }

	65 p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n);

	66 if( !p ){

	67 return SQLITE_NOMEM;

	68 }

	69 memset(p, 0, sizeof(IcuTokenizer));

	70

	71 if( n ){

	72 p->zLocale = (char *)&p[1];

	73 memcpy(p->zLocale, argv[0], n);

	74 }

	75

	76 ppTokenizer = (sqlite3_tokenizer )p;

	77

	78 return SQLITE_OK;

	79 }

	80

	81 /*

	82 ** Destroy a tokenizer

	83 */

	84 static int icuDestroy(sqlite3_tokenizer *pTokenizer){

	85 IcuTokenizer p = (IcuTokenizer )pTokenizer;

	86 sqlite3_free(p);

	87 return SQLITE_OK;

	88 }

	89

	90 /*

	91 ** Prepare to begin tokenizing a particular string. The input

	92 ** string to be tokenized is pInput[0..nBytes-1]. A cursor

	93 ** used to incrementally tokenize this string is returned in

	94 ** *ppCursor.

	95 */

	96 static int icuOpen(

	97 sqlite3_tokenizer pTokenizer, / The tokenizer */

	98 const char zInput, / Input string */

	99 int nInput, /* Length of zInput in bytes */

	100 sqlite3_tokenizer_cursor *ppCursor / OUT: Tokenization cursor */

	101 ){

	102 IcuTokenizer p = (IcuTokenizer )pTokenizer;

	103 IcuCursor *pCsr;

	104

	105 const int32_t opt = U_FOLD_CASE_DEFAULT;

	106 UErrorCode status = U_ZERO_ERROR;

	107 int nChar;

	108

	109 UChar32 c;

	110 int iInput = 0;

	111 int iOut = 0;

	112

	113 *ppCursor = 0;

	114

	115 if( nInput<0 ){

	116 nInput = strlen(zInput);

	117 }

	118 nChar = nInput+1;

	119 pCsr = (IcuCursor *)sqlite3_malloc(

	120 sizeof(IcuCursor) + /* IcuCursor */

	121 (nChar+1) * sizeof(int) + /* IcuCursor.aOffset[] */

	122 nChar * sizeof(UChar) /* IcuCursor.aChar[] */

	123 );

	124 if( !pCsr ){

	125 return SQLITE_NOMEM;

	126 }

	127 memset(pCsr, 0, sizeof(IcuCursor));

	128 pCsr->aOffset = (int *)&pCsr[1];

	129 pCsr->aChar = (UChar *)&pCsr->aOffset[nChar+1];

	130

	131 pCsr->aOffset[iOut] = iInput;

	132 U8_NEXT(zInput, iInput, nInput, c);

	133 while( c>0 ){

	134 int isError = 0;

	135 c = u_foldCase(c, opt);

	136 U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);

	137 if( isError ){

	138 sqlite3_free(pCsr);

	139 return SQLITE_ERROR;

	140 }

	141 pCsr->aOffset[iOut] = iInput;

	142

	143 if( iInput<nInput ){

	144 U8_NEXT(zInput, iInput, nInput, c);

	145 }else{

	146 c = 0;

	147 }

	148 }

	149

	150 pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);

	151 if( !U_SUCCESS(status) ){

	152 sqlite3_free(pCsr);

	153 return SQLITE_ERROR;

	154 }

	155 pCsr->nChar = iOut;

	156

	157 ubrk_first(pCsr->pIter);

	158 ppCursor = (sqlite3_tokenizer_cursor )pCsr;

	159 return SQLITE_OK;

	160 }

	161

	162 /*

	163 ** Close a tokenization cursor previously opened by a call to icuOpen().

	164 */

	165 static int icuClose(sqlite3_tokenizer_cursor *pCursor){

	166 IcuCursor pCsr = (IcuCursor )pCursor;

	167 ubrk_close(pCsr->pIter);

	168 sqlite3_free(pCsr->zBuffer);

	169 sqlite3_free(pCsr);

	170 return SQLITE_OK;

	171 }

	172

	173 /*

	174 ** Extract the next token from a tokenization cursor.

	175 */

	176 static int icuNext(

	177 sqlite3_tokenizer_cursor pCursor, / Cursor returned by simpleOpen */

	178 const char *ppToken, / OUT: ppToken is the token text /

	179 int pnBytes, / OUT: Number of bytes in token */

	180 int piStartOffset, / OUT: Starting offset of token */

	181 int piEndOffset, / OUT: Ending offset of token */

	182 int piPosition / OUT: Position integer of token */

	183 ){

	184 IcuCursor pCsr = (IcuCursor )pCursor;

	185

	186 int iStart = 0;

	187 int iEnd = 0;

	188 int nByte = 0;

	189

	190 while( iStart==iEnd ){

	191 UChar32 c;

	192

	193 iStart = ubrk_current(pCsr->pIter);

	194 iEnd = ubrk_next(pCsr->pIter);

	195 if( iEnd==UBRK_DONE ){

	196 return SQLITE_DONE;

	197 }

	198

	199 while( iStart<iEnd ){

	200 int iWhite = iStart;

	201 U16_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);

	202 if( u_isspace(c) ){

	203 iStart = iWhite;

	204 }else{

	205 break;

	206 }

	207 }

	208 assert(iStart<=iEnd);

	209 }

	210

	211 do {

	212 UErrorCode status = U_ZERO_ERROR;

	213 if( nByte ){

	214 char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);

	215 if( !zNew ){

	216 return SQLITE_NOMEM;

	217 }

	218 pCsr->zBuffer = zNew;

	219 pCsr->nBuffer = nByte;

	220 }

	221

	222 u_strToUTF8(

	223 pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */

	224 &pCsr->aChar[iStart], iEnd-iStart, /* Input vars */

	225 &status /* Output success/failure */

	226 );

	227 } while( nByte>pCsr->nBuffer );

	228

	229 *ppToken = pCsr->zBuffer;

	230 *pnBytes = nByte;

	231 *piStartOffset = pCsr->aOffset[iStart];

	232 *piEndOffset = pCsr->aOffset[iEnd];

	233 *piPosition = pCsr->iToken++;

	234

	235 return SQLITE_OK;

	236 }

	237

	238 /*

	239 ** The set of routines that implement the simple tokenizer

	240 */

	241 static const sqlite3_tokenizer_module icuTokenizerModule = {

	242 0, /* iVersion */

	243 icuCreate, /* xCreate */

	244 icuDestroy, /* xCreate */

	245 icuOpen, /* xOpen */

	246 icuClose, /* xClose */

	247 icuNext, /* xNext */

	248 };

	249

	250 /*

	251 ** Set *ppModule to point at the implementation of the ICU tokenizer.

	252 */

	253 void sqlite3Fts2IcuTokenizerModule(

	254 sqlite3_tokenizer_module const**ppModule

	255 ){

	256 *ppModule = &icuTokenizerModule;

	257 }

	258

	259 #endif /* defined(SQLITE_ENABLE_ICU) */

	260 #endif /* !defined(SQLITE_CORE) \|\| defined(SQLITE_ENABLE_FTS2) */

OLD	NEW

« no previous file with comments | « third_party/sqlite/src/ext/fts2/fts2_hash.c ('k') | third_party/sqlite/src/ext/fts2/fts2_porter.c » ('j') | no next file with comments »