third_party/sqlite/sqlite-src-3070603/ext/fts3/fts3_icu.c - Issue 949043002: Add //third_party/sqlite to dirs_to_snapshot, remove net_sql.patch

Unified Diff: third_party/sqlite/sqlite-src-3070603/ext/fts3/fts3_icu.c

Issue 949043002: Add //third_party/sqlite to dirs_to_snapshot, remove net_sql.patch (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 5 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « third_party/sqlite/sqlite-src-3070603/ext/fts3/fts3_hash.c ('k') | third_party/sqlite/sqlite-src-3070603/ext/fts3/fts3_porter.c » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: third_party/sqlite/sqlite-src-3070603/ext/fts3/fts3_icu.c

diff --git a/third_party/sqlite/sqlite-src-3070603/ext/fts3/fts3_icu.c b/third_party/sqlite/sqlite-src-3070603/ext/fts3/fts3_icu.c

new file mode 100644

index 0000000000000000000000000000000000000000..85390d3b065b5c422fb07d2bdd69614893f56ccf

--- /dev/null

+++ b/third_party/sqlite/sqlite-src-3070603/ext/fts3/fts3_icu.c

@@ -0,0 +1,260 @@

+/*

+** 2007 June 22

+**

+** The author disclaims copyright to this source code. In place of

+** a legal notice, here is a blessing:

+**

+** May you do good and not evil.

+** May you find forgiveness for yourself and forgive others.

+** May you share freely, never taking more than you give.

+**

+*************************************************************************

+** This file implements a tokenizer for fts3 based on the ICU library.

+**

+** $Id: fts3_icu.c,v 1.3 2008/09/01 18:34:20 danielk1977 Exp $

+*/

+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)

+#ifdef SQLITE_ENABLE_ICU

+#include <assert.h>

+#include <string.h>

+#include "fts3_tokenizer.h"

+#include <unicode/ubrk.h>

+#include <unicode/ucol.h>

+#include <unicode/ustring.h>

+#include <unicode/utf16.h>

+typedef struct IcuTokenizer IcuTokenizer;

+typedef struct IcuCursor IcuCursor;

+struct IcuTokenizer {

+ sqlite3_tokenizer base;

+ char *zLocale;

+};

+struct IcuCursor {

+ sqlite3_tokenizer_cursor base;

+ UBreakIterator *pIter; /* ICU break-iterator object */

+ int nChar; /* Number of UChar elements in pInput */

+ UChar *aChar; /* Copy of input using utf-16 encoding */

+ int *aOffset; /* Offsets of each character in utf-8 input */

+ int nBuffer;

+ char *zBuffer;

+ int iToken;

+};

+/*

+** Create a new tokenizer instance.

+*/

+static int icuCreate(

+ int argc, /* Number of entries in argv[] */

+ const char * const *argv, /* Tokenizer creation arguments */

+ sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */

+){

+ IcuTokenizer *p;

+ int n = 0;

+ if( argc>0 ){

+ n = strlen(argv[0])+1;

+ }

+ p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n);

+ if( !p ){

+ return SQLITE_NOMEM;

+ }

+ memset(p, 0, sizeof(IcuTokenizer));

+ if( n ){

+ p->zLocale = (char *)&p[1];

+ memcpy(p->zLocale, argv[0], n);

+ }

+ *ppTokenizer = (sqlite3_tokenizer *)p;

+ return SQLITE_OK;

+/*

+** Destroy a tokenizer

+*/

+static int icuDestroy(sqlite3_tokenizer *pTokenizer){

+ IcuTokenizer *p = (IcuTokenizer *)pTokenizer;

+ sqlite3_free(p);

+ return SQLITE_OK;

+/*

+** Prepare to begin tokenizing a particular string. The input

+** string to be tokenized is pInput[0..nBytes-1]. A cursor

+** used to incrementally tokenize this string is returned in

+** *ppCursor.

+*/

+static int icuOpen(

+ sqlite3_tokenizer *pTokenizer, /* The tokenizer */

+ const char *zInput, /* Input string */

+ int nInput, /* Length of zInput in bytes */

+ sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */

+){

+ IcuTokenizer *p = (IcuTokenizer *)pTokenizer;

+ IcuCursor *pCsr;

+ const int32_t opt = U_FOLD_CASE_DEFAULT;

+ UErrorCode status = U_ZERO_ERROR;

+ int nChar;

+ UChar32 c;

+ int iInput = 0;

+ int iOut = 0;

+ *ppCursor = 0;

+ if( nInput<0 ){

+ nInput = strlen(zInput);

+ }

+ nChar = nInput+1;

+ pCsr = (IcuCursor *)sqlite3_malloc(

+ sizeof(IcuCursor) + /* IcuCursor */

+ nChar * sizeof(UChar) + /* IcuCursor.aChar[] */

+ (nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */

+ );

+ if( !pCsr ){

+ return SQLITE_NOMEM;

+ }

+ memset(pCsr, 0, sizeof(IcuCursor));

+ pCsr->aChar = (UChar *)&pCsr[1];

+ pCsr->aOffset = (int *)&pCsr->aChar[nChar];

+ pCsr->aOffset[iOut] = iInput;

+ U8_NEXT(zInput, iInput, nInput, c);

+ while( c>0 ){

+ int isError = 0;

+ c = u_foldCase(c, opt);

+ U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);

+ if( isError ){

+ sqlite3_free(pCsr);

+ return SQLITE_ERROR;

+ }

+ pCsr->aOffset[iOut] = iInput;

+ if( iInput<nInput ){

+ U8_NEXT(zInput, iInput, nInput, c);

+ }else{

+ c = 0;

+ }

+ pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);

+ if( !U_SUCCESS(status) ){

+ sqlite3_free(pCsr);

+ return SQLITE_ERROR;

+ }

+ pCsr->nChar = iOut;

+ ubrk_first(pCsr->pIter);

+ *ppCursor = (sqlite3_tokenizer_cursor *)pCsr;

+ return SQLITE_OK;

+/*

+** Close a tokenization cursor previously opened by a call to icuOpen().

+*/

+static int icuClose(sqlite3_tokenizer_cursor *pCursor){

+ IcuCursor *pCsr = (IcuCursor *)pCursor;

+ ubrk_close(pCsr->pIter);

+ sqlite3_free(pCsr->zBuffer);

+ sqlite3_free(pCsr);

+ return SQLITE_OK;

+/*

+** Extract the next token from a tokenization cursor.

+*/

+static int icuNext(

+ sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */

+ const char **ppToken, /* OUT: *ppToken is the token text */

+ int *pnBytes, /* OUT: Number of bytes in token */

+ int *piStartOffset, /* OUT: Starting offset of token */

+ int *piEndOffset, /* OUT: Ending offset of token */

+ int *piPosition /* OUT: Position integer of token */

+){

+ IcuCursor *pCsr = (IcuCursor *)pCursor;

+ int iStart = 0;

+ int iEnd = 0;

+ int nByte = 0;

+ while( iStart==iEnd ){

+ UChar32 c;

+ iStart = ubrk_current(pCsr->pIter);

+ iEnd = ubrk_next(pCsr->pIter);

+ if( iEnd==UBRK_DONE ){

+ return SQLITE_DONE;

+ }

+ while( iStart<iEnd ){

+ int iWhite = iStart;

+ U8_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);

+ if( u_isspace(c) ){

+ iStart = iWhite;

+ }else{

+ break;

+ }

+ assert(iStart<=iEnd);

+ }

+ do {

+ UErrorCode status = U_ZERO_ERROR;

+ if( nByte ){

+ char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);

+ if( !zNew ){

+ return SQLITE_NOMEM;

+ }

+ pCsr->zBuffer = zNew;

+ pCsr->nBuffer = nByte;

+ }

+ u_strToUTF8(

+ pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */

+ &pCsr->aChar[iStart], iEnd-iStart, /* Input vars */

+ &status /* Output success/failure */

+ );

+ } while( nByte>pCsr->nBuffer );

+ *ppToken = pCsr->zBuffer;

+ *pnBytes = nByte;

+ *piStartOffset = pCsr->aOffset[iStart];

+ *piEndOffset = pCsr->aOffset[iEnd];

+ *piPosition = pCsr->iToken++;

+ return SQLITE_OK;

+/*

+** The set of routines that implement the simple tokenizer

+*/

+static const sqlite3_tokenizer_module icuTokenizerModule = {

+ 0, /* iVersion */

+ icuCreate, /* xCreate */

+ icuDestroy, /* xCreate */

+ icuOpen, /* xOpen */

+ icuClose, /* xClose */

+ icuNext, /* xNext */

+};

+/*

+** Set *ppModule to point at the implementation of the ICU tokenizer.

+*/

+void sqlite3Fts3IcuTokenizerModule(

+ sqlite3_tokenizer_module const**ppModule

+){

+ *ppModule = &icuTokenizerModule;

+#endif /* defined(SQLITE_ENABLE_ICU) */

+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */