third_party/sqlite/sqlite-src-3170000/ext/fts3/fts3_porter.c - Issue 2747283002: [sql] Import reference version of SQLite 3.17..

Side by Side Diff: third_party/sqlite/sqlite-src-3170000/ext/fts3/fts3_porter.c

Issue 2747283002: [sql] Import reference version of SQLite 3.17.. (Closed)

Patch Set: Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « third_party/sqlite/sqlite-src-3170000/ext/fts3/fts3_icu.c ('k') | third_party/sqlite/sqlite-src-3170000/ext/fts3/fts3_snippet.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 /*

	2 ** 2006 September 30

	3 **

	4 ** The author disclaims copyright to this source code. In place of

	5 ** a legal notice, here is a blessing:

	6 **

	7 ** May you do good and not evil.

	8 ** May you find forgiveness for yourself and forgive others.

	9 ** May you share freely, never taking more than you give.

	10 **

	11 *************************************************************************

	12 ** Implementation of the full-text-search tokenizer that implements

	13 ** a Porter stemmer.

	14 */

	15

	16 /*

	17 ** The code in this file is only compiled if:

	18 **

	19 ** * The FTS3 module is being built as an extension

	20 ** (in which case SQLITE_CORE is not defined), or

	21 **

	22 ** * The FTS3 module is being built into the core of

	23 ** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).

	24 */

	25 #include "fts3Int.h"

	26 #if !defined(SQLITE_CORE) \|\| defined(SQLITE_ENABLE_FTS3)

	27

	28 #include <assert.h>

	29 #include <stdlib.h>

	30 #include <stdio.h>

	31 #include <string.h>

	32

	33 #include "fts3_tokenizer.h"

	34

	35 /*

	36 ** Class derived from sqlite3_tokenizer

	37 */

	38 typedef struct porter_tokenizer {

	39 sqlite3_tokenizer base; /* Base class */

	40 } porter_tokenizer;

	41

	42 /*

	43 ** Class derived from sqlite3_tokenizer_cursor

	44 */

	45 typedef struct porter_tokenizer_cursor {

	46 sqlite3_tokenizer_cursor base;

	47 const char zInput; / input we are tokenizing */

	48 int nInput; /* size of the input */

	49 int iOffset; /* current position in zInput */

	50 int iToken; /* index of next token to be returned */

	51 char zToken; / storage for current token */

	52 int nAllocated; /* space allocated to zToken buffer */

	53 } porter_tokenizer_cursor;

	54

	55

	56 /*

	57 ** Create a new tokenizer instance.

	58 */

	59 static int porterCreate(

	60 int argc, const char * const *argv,

	61 sqlite3_tokenizer **ppTokenizer

	62 ){

	63 porter_tokenizer *t;

	64

	65 UNUSED_PARAMETER(argc);

	66 UNUSED_PARAMETER(argv);

	67

	68 t = (porter_tokenizer ) sqlite3_malloc(sizeof(t));

	69 if( t==NULL ) return SQLITE_NOMEM;

	70 memset(t, 0, sizeof(*t));

	71 *ppTokenizer = &t->base;

	72 return SQLITE_OK;

	73 }

	74

	75 /*

	76 ** Destroy a tokenizer

	77 */

	78 static int porterDestroy(sqlite3_tokenizer *pTokenizer){

	79 sqlite3_free(pTokenizer);

	80 return SQLITE_OK;

	81 }

	82

	83 /*

	84 ** Prepare to begin tokenizing a particular string. The input

	85 ** string to be tokenized is zInput[0..nInput-1]. A cursor

	86 ** used to incrementally tokenize this string is returned in

	87 ** *ppCursor.

	88 */

	89 static int porterOpen(

	90 sqlite3_tokenizer pTokenizer, / The tokenizer */

	91 const char zInput, int nInput, / String to be tokenized */

	92 sqlite3_tokenizer_cursor *ppCursor / OUT: Tokenization cursor */

	93 ){

	94 porter_tokenizer_cursor *c;

	95

	96 UNUSED_PARAMETER(pTokenizer);

	97

	98 c = (porter_tokenizer_cursor ) sqlite3_malloc(sizeof(c));

	99 if( c==NULL ) return SQLITE_NOMEM;

	100

	101 c->zInput = zInput;

	102 if( zInput==0 ){

	103 c->nInput = 0;

	104 }else if( nInput<0 ){

	105 c->nInput = (int)strlen(zInput);

	106 }else{

	107 c->nInput = nInput;

	108 }

	109 c->iOffset = 0; /* start tokenizing at the beginning */

	110 c->iToken = 0;

	111 c->zToken = NULL; /* no space allocated, yet. */

	112 c->nAllocated = 0;

	113

	114 *ppCursor = &c->base;

	115 return SQLITE_OK;

	116 }

	117

	118 /*

	119 ** Close a tokenization cursor previously opened by a call to

	120 ** porterOpen() above.

	121 */

	122 static int porterClose(sqlite3_tokenizer_cursor *pCursor){

	123 porter_tokenizer_cursor c = (porter_tokenizer_cursor ) pCursor;

	124 sqlite3_free(c->zToken);

	125 sqlite3_free(c);

	126 return SQLITE_OK;

	127 }

	128 /*

	129 ** Vowel or consonant

	130 */

	131 static const char cType[] = {

	132 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,

	133 1, 1, 1, 2, 1

	134 };

	135

	136 /*

	137 ** isConsonant() and isVowel() determine if their first character in

	138 ** the string they point to is a consonant or a vowel, according

	139 ** to Porter ruls.

	140 **

	141 ** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'.

	142 ** 'Y' is a consonant unless it follows another consonant,

	143 ** in which case it is a vowel.

	144 **

	145 ** In these routine, the letters are in reverse order. So the 'y' rule

	146 ** is that 'y' is a consonant unless it is followed by another

	147 ** consonent.

	148 */

	149 static int isVowel(const char*);

	150 static int isConsonant(const char *z){

	151 int j;

	152 char x = *z;

	153 if( x==0 ) return 0;

	154 assert( x>='a' && x<='z' );

	155 j = cType[x-'a'];

	156 if( j<2 ) return j;

	157 return z[1]==0 \|\| isVowel(z + 1);

	158 }

	159 static int isVowel(const char *z){

	160 int j;

	161 char x = *z;

	162 if( x==0 ) return 0;

	163 assert( x>='a' && x<='z' );

	164 j = cType[x-'a'];

	165 if( j<2 ) return 1-j;

	166 return isConsonant(z + 1);

	167 }

	168

	169 /*

	170 ** Let any sequence of one or more vowels be represented by V and let

	171 ** C be sequence of one or more consonants. Then every word can be

	172 ** represented as:

	173 **

	174 ** [C] (VC){m} [V]

	175 **

	176 ** In prose: A word is an optional consonant followed by zero or

	177 ** vowel-consonant pairs followed by an optional vowel. "m" is the

	178 ** number of vowel consonant pairs. This routine computes the value

	179 ** of m for the first i bytes of a word.

	180 **

	181 ** Return true if the m-value for z is 1 or more. In other words,

	182 ** return true if z contains at least one vowel that is followed

	183 ** by a consonant.

	184 **

	185 ** In this routine z[] is in reverse order. So we are really looking

	186 ** for an instance of a consonant followed by a vowel.

	187 */

	188 static int m_gt_0(const char *z){

	189 while( isVowel(z) ){ z++; }

	190 if( *z==0 ) return 0;

	191 while( isConsonant(z) ){ z++; }

	192 return *z!=0;

	193 }

	194

	195 /* Like mgt0 above except we are looking for a value of m which is

	196 ** exactly 1

	197 */

	198 static int m_eq_1(const char *z){

	199 while( isVowel(z) ){ z++; }

	200 if( *z==0 ) return 0;

	201 while( isConsonant(z) ){ z++; }

	202 if( *z==0 ) return 0;

	203 while( isVowel(z) ){ z++; }

	204 if( *z==0 ) return 1;

	205 while( isConsonant(z) ){ z++; }

	206 return *z==0;

	207 }

	208

	209 /* Like mgt0 above except we are looking for a value of m>1 instead

	210 ** or m>0

	211 */

	212 static int m_gt_1(const char *z){

	213 while( isVowel(z) ){ z++; }

	214 if( *z==0 ) return 0;

	215 while( isConsonant(z) ){ z++; }

	216 if( *z==0 ) return 0;

	217 while( isVowel(z) ){ z++; }

	218 if( *z==0 ) return 0;

	219 while( isConsonant(z) ){ z++; }

	220 return *z!=0;

	221 }

	222

	223 /*

	224 ** Return TRUE if there is a vowel anywhere within z[0..n-1]

	225 */

	226 static int hasVowel(const char *z){

	227 while( isConsonant(z) ){ z++; }

	228 return *z!=0;

	229 }

	230

	231 /*

	232 ** Return TRUE if the word ends in a double consonant.

	233 **

	234 ** The text is reversed here. So we are really looking at

	235 ** the first two characters of z[].

	236 */

	237 static int doubleConsonant(const char *z){

	238 return isConsonant(z) && z[0]==z[1];

	239 }

	240

	241 /*

	242 ** Return TRUE if the word ends with three letters which

	243 ** are consonant-vowel-consonent and where the final consonant

	244 ** is not 'w', 'x', or 'y'.

	245 **

	246 ** The word is reversed here. So we are really checking the

	247 ** first three letters and the first one cannot be in [wxy].

	248 */

	249 static int star_oh(const char *z){

	250 return

	251 isConsonant(z) &&

	252 z[0]!='w' && z[0]!='x' && z[0]!='y' &&

	253 isVowel(z+1) &&

	254 isConsonant(z+2);

	255 }

	256

	257 /*

	258 ** If the word ends with zFrom and xCond() is true for the stem

	259 ** of the word that preceeds the zFrom ending, then change the

	260 ** ending to zTo.

	261 **

	262 ** The input word *pz and zFrom are both in reverse order. zTo

	263 ** is in normal order.

	264 **

	265 ** Return TRUE if zFrom matches. Return FALSE if zFrom does not

	266 ** match. Not that TRUE is returned even if xCond() fails and

	267 ** no substitution occurs.

	268 */

	269 static int stem(

	270 char *pz, / The word being stemmed (Reversed) */

	271 const char zFrom, / If the ending matches this... (Reversed) */

	272 const char zTo, / ... change the ending to this (not reversed) */

	273 int (xCond)(const char) /* Condition that must be true */

	274 ){

	275 char z = pz;

	276 while( zFrom && zFrom==*z ){ z++; zFrom++; }

	277 if( *zFrom!=0 ) return 0;

	278 if( xCond && !xCond(z) ) return 1;

	279 while( *zTo ){

	280 (--z) = (zTo++);

	281 }

	282 *pz = z;

	283 return 1;

	284 }

	285

	286 /*

	287 ** This is the fallback stemmer used when the porter stemmer is

	288 ** inappropriate. The input word is copied into the output with

	289 ** US-ASCII case folding. If the input word is too long (more

	290 ** than 20 bytes if it contains no digits or more than 6 bytes if

	291 ** it contains digits) then word is truncated to 20 or 6 bytes

	292 ** by taking 10 or 3 bytes from the beginning and end.

	293 */

	294 static void copy_stemmer(const char zIn, int nIn, char zOut, int *pnOut){

	295 int i, mx, j;

	296 int hasDigit = 0;

	297 for(i=0; i<nIn; i++){

	298 char c = zIn[i];

	299 if( c>='A' && c<='Z' ){

	300 zOut[i] = c - 'A' + 'a';

	301 }else{

	302 if( c>='0' && c<='9' ) hasDigit = 1;

	303 zOut[i] = c;

	304 }

	305 }

	306 mx = hasDigit ? 3 : 10;

	307 if( nIn>mx*2 ){

	308 for(j=mx, i=nIn-mx; i<nIn; i++, j++){

	309 zOut[j] = zOut[i];

	310 }

	311 i = j;

	312 }

	313 zOut[i] = 0;

	314 *pnOut = i;

	315 }

	316

	317

	318 /*

	319 ** Stem the input word zIn[0..nIn-1]. Store the output in zOut.

	320 ** zOut is at least big enough to hold nIn bytes. Write the actual

	321 ** size of the output word (exclusive of the '\0' terminator) into *pnOut.

	322 **

	323 ** Any upper-case characters in the US-ASCII character set ([A-Z])

	324 ** are converted to lower case. Upper-case UTF characters are

	325 ** unchanged.

	326 **

	327 ** Words that are longer than about 20 bytes are stemmed by retaining

	328 ** a few bytes from the beginning and the end of the word. If the

	329 ** word contains digits, 3 bytes are taken from the beginning and

	330 ** 3 bytes from the end. For long words without digits, 10 bytes

	331 ** are taken from each end. US-ASCII case folding still applies.

	332 **

	333 ** If the input word contains not digits but does characters not

	334 ** in [a-zA-Z] then no stemming is attempted and this routine just

	335 ** copies the input into the input into the output with US-ASCII

	336 ** case folding.

	337 **

	338 ** Stemming never increases the length of the word. So there is

	339 ** no chance of overflowing the zOut buffer.

	340 */

	341 static void porter_stemmer(const char zIn, int nIn, char zOut, int *pnOut){

	342 int i, j;

	343 char zReverse[28];

	344 char z, z2;

	345 if( nIn<3 \|\| nIn>=(int)sizeof(zReverse)-7 ){

	346 /* The word is too big or too small for the porter stemmer.

	347 ** Fallback to the copy stemmer */

	348 copy_stemmer(zIn, nIn, zOut, pnOut);

	349 return;

	350 }

	351 for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){

	352 char c = zIn[i];

	353 if( c>='A' && c<='Z' ){

	354 zReverse[j] = c + 'a' - 'A';

	355 }else if( c>='a' && c<='z' ){

	356 zReverse[j] = c;

	357 }else{

	358 /* The use of a character not in [a-zA-Z] means that we fallback

	359 ** to the copy stemmer */

	360 copy_stemmer(zIn, nIn, zOut, pnOut);

	361 return;

	362 }

	363 }

	364 memset(&zReverse[sizeof(zReverse)-5], 0, 5);

	365 z = &zReverse[j+1];

	366

	367

	368 /* Step 1a */

	369 if( z[0]=='s' ){

	370 if(

	371 !stem(&z, "sess", "ss", 0) &&

	372 !stem(&z, "sei", "i", 0) &&

	373 !stem(&z, "ss", "ss", 0)

	374 ){

	375 z++;

	376 }

	377 }

	378

	379 /* Step 1b */

	380 z2 = z;

	381 if( stem(&z, "dee", "ee", m_gt_0) ){

	382 /* Do nothing. The work was all in the test */

	383 }else if(

	384 (stem(&z, "gni", "", hasVowel) \|\| stem(&z, "de", "", hasVowel))

	385 && z!=z2

	386 ){

	387 if( stem(&z, "ta", "ate", 0) \|\|

	388 stem(&z, "lb", "ble", 0) \|\|

	389 stem(&z, "zi", "ize", 0) ){

	390 /* Do nothing. The work was all in the test */

	391 }else if( doubleConsonant(z) && (z!='l' && z!='s' && *z!='z') ){

	392 z++;

	393 }else if( m_eq_1(z) && star_oh(z) ){

	394 *(--z) = 'e';

	395 }

	396 }

	397

	398 /* Step 1c */

	399 if( z[0]=='y' && hasVowel(z+1) ){

	400 z[0] = 'i';

	401 }

	402

	403 /* Step 2 */

	404 switch( z[1] ){

	405 case 'a':

	406 if( !stem(&z, "lanoita", "ate", m_gt_0) ){

	407 stem(&z, "lanoit", "tion", m_gt_0);

	408 }

	409 break;

	410 case 'c':

	411 if( !stem(&z, "icne", "ence", m_gt_0) ){

	412 stem(&z, "icna", "ance", m_gt_0);

	413 }

	414 break;

	415 case 'e':

	416 stem(&z, "rezi", "ize", m_gt_0);

	417 break;

	418 case 'g':

	419 stem(&z, "igol", "log", m_gt_0);

	420 break;

	421 case 'l':

	422 if( !stem(&z, "ilb", "ble", m_gt_0)

	423 && !stem(&z, "illa", "al", m_gt_0)

	424 && !stem(&z, "iltne", "ent", m_gt_0)

	425 && !stem(&z, "ile", "e", m_gt_0)

	426 ){

	427 stem(&z, "ilsuo", "ous", m_gt_0);

	428 }

	429 break;

	430 case 'o':

	431 if( !stem(&z, "noitazi", "ize", m_gt_0)

	432 && !stem(&z, "noita", "ate", m_gt_0)

	433 ){

	434 stem(&z, "rota", "ate", m_gt_0);

	435 }

	436 break;

	437 case 's':

	438 if( !stem(&z, "msila", "al", m_gt_0)

	439 && !stem(&z, "ssenevi", "ive", m_gt_0)

	440 && !stem(&z, "ssenluf", "ful", m_gt_0)

	441 ){

	442 stem(&z, "ssensuo", "ous", m_gt_0);

	443 }

	444 break;

	445 case 't':

	446 if( !stem(&z, "itila", "al", m_gt_0)

	447 && !stem(&z, "itivi", "ive", m_gt_0)

	448 ){

	449 stem(&z, "itilib", "ble", m_gt_0);

	450 }

	451 break;

	452 }

	453

	454 /* Step 3 */

	455 switch( z[0] ){

	456 case 'e':

	457 if( !stem(&z, "etaci", "ic", m_gt_0)

	458 && !stem(&z, "evita", "", m_gt_0)

	459 ){

	460 stem(&z, "ezila", "al", m_gt_0);

	461 }

	462 break;

	463 case 'i':

	464 stem(&z, "itici", "ic", m_gt_0);

	465 break;

	466 case 'l':

	467 if( !stem(&z, "laci", "ic", m_gt_0) ){

	468 stem(&z, "luf", "", m_gt_0);

	469 }

	470 break;

	471 case 's':

	472 stem(&z, "ssen", "", m_gt_0);

	473 break;

	474 }

	475

	476 /* Step 4 */

	477 switch( z[1] ){

	478 case 'a':

	479 if( z[0]=='l' && m_gt_1(z+2) ){

	480 z += 2;

	481 }

	482 break;

	483 case 'c':

	484 if( z[0]=='e' && z[2]=='n' && (z[3]=='a' \|\| z[3]=='e') && m_gt_1(z+4) ){

	485 z += 4;

	486 }

	487 break;

	488 case 'e':

	489 if( z[0]=='r' && m_gt_1(z+2) ){

	490 z += 2;

	491 }

	492 break;

	493 case 'i':

	494 if( z[0]=='c' && m_gt_1(z+2) ){

	495 z += 2;

	496 }

	497 break;

	498 case 'l':

	499 if( z[0]=='e' && z[2]=='b' && (z[3]=='a' \|\| z[3]=='i') && m_gt_1(z+4) ){

	500 z += 4;

	501 }

	502 break;

	503 case 'n':

	504 if( z[0]=='t' ){

	505 if( z[2]=='a' ){

	506 if( m_gt_1(z+3) ){

	507 z += 3;

	508 }

	509 }else if( z[2]=='e' ){

	510 if( !stem(&z, "tneme", "", m_gt_1)

	511 && !stem(&z, "tnem", "", m_gt_1)

	512 ){

	513 stem(&z, "tne", "", m_gt_1);

	514 }

	515 }

	516 }

	517 break;

	518 case 'o':

	519 if( z[0]=='u' ){

	520 if( m_gt_1(z+2) ){

	521 z += 2;

	522 }

	523 }else if( z[3]=='s' \|\| z[3]=='t' ){

	524 stem(&z, "noi", "", m_gt_1);

	525 }

	526 break;

	527 case 's':

	528 if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){

	529 z += 3;

	530 }

	531 break;

	532 case 't':

	533 if( !stem(&z, "eta", "", m_gt_1) ){

	534 stem(&z, "iti", "", m_gt_1);

	535 }

	536 break;

	537 case 'u':

	538 if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){

	539 z += 3;

	540 }

	541 break;

	542 case 'v':

	543 case 'z':

	544 if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){

	545 z += 3;

	546 }

	547 break;

	548 }

	549

	550 /* Step 5a */

	551 if( z[0]=='e' ){

	552 if( m_gt_1(z+1) ){

	553 z++;

	554 }else if( m_eq_1(z+1) && !star_oh(z+1) ){

	555 z++;

	556 }

	557 }

	558

	559 /* Step 5b */

	560 if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){

	561 z++;

	562 }

	563

	564 /* z[] is now the stemmed word in reverse order. Flip it back

	565 ** around into forward order and return.

	566 */

	567 *pnOut = i = (int)strlen(z);

	568 zOut[i] = 0;

	569 while( *z ){

	570 zOut[--i] = *(z++);

	571 }

	572 }

	573

	574 /*

	575 ** Characters that can be part of a token. We assume any character

	576 ** whose value is greater than 0x80 (any UTF character) can be

	577 ** part of a token. In other words, delimiters all must have

	578 ** values of 0x7f or lower.

	579 */

	580 static const char porterIdChar[] = {

	581 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */

	582 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */

	583 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */

	584 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */

	585 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */

	586 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */

	587 };

	588 #define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 \|\| !porterIdChar[ch-0x30]))

	589

	590 /*

	591 ** Extract the next token from a tokenization cursor. The cursor must

	592 ** have been opened by a prior call to porterOpen().

	593 */

	594 static int porterNext(

	595 sqlite3_tokenizer_cursor pCursor, / Cursor returned by porterOpen */

	596 const char *pzToken, / OUT: pzToken is the token text /

	597 int pnBytes, / OUT: Number of bytes in token */

	598 int piStartOffset, / OUT: Starting offset of token */

	599 int piEndOffset, / OUT: Ending offset of token */

	600 int piPosition / OUT: Position integer of token */

	601 ){

	602 porter_tokenizer_cursor c = (porter_tokenizer_cursor ) pCursor;

	603 const char *z = c->zInput;

	604

	605 while( c->iOffset<c->nInput ){

	606 int iStartOffset, ch;

	607

	608 /* Scan past delimiter characters */

	609 while( c->iOffset<c->nInput && isDelim(z[c->iOffset]) ){

	610 c->iOffset++;

	611 }

	612

	613 /* Count non-delimiter characters. */

	614 iStartOffset = c->iOffset;

	615 while( c->iOffset<c->nInput && !isDelim(z[c->iOffset]) ){

	616 c->iOffset++;

	617 }

	618

	619 if( c->iOffset>iStartOffset ){

	620 int n = c->iOffset-iStartOffset;

	621 if( n>c->nAllocated ){

	622 char *pNew;

	623 c->nAllocated = n+20;

	624 pNew = sqlite3_realloc(c->zToken, c->nAllocated);

	625 if( !pNew ) return SQLITE_NOMEM;

	626 c->zToken = pNew;

	627 }

	628 porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);

	629 *pzToken = c->zToken;

	630 *piStartOffset = iStartOffset;

	631 *piEndOffset = c->iOffset;

	632 *piPosition = c->iToken++;

	633 return SQLITE_OK;

	634 }

	635 }

	636 return SQLITE_DONE;

	637 }

	638

	639 /*

	640 ** The set of routines that implement the porter-stemmer tokenizer

	641 */

	642 static const sqlite3_tokenizer_module porterTokenizerModule = {

	643 0,

	644 porterCreate,

	645 porterDestroy,

	646 porterOpen,

	647 porterClose,

	648 porterNext,

	649 0

	650 };

	651

	652 /*

	653 ** Allocate a new porter tokenizer. Return a pointer to the new

	654 ** tokenizer in *ppModule

	655 */

	656 void sqlite3Fts3PorterTokenizerModule(

	657 sqlite3_tokenizer_module const**ppModule

	658 ){

	659 *ppModule = &porterTokenizerModule;

	660 }

	661

	662 #endif /* !defined(SQLITE_CORE) \|\| defined(SQLITE_ENABLE_FTS3) */

OLD	NEW