Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(207)

Side by Side Diff: third_party/sqlite/src/ext/fts3/fts3_porter.c

Issue 949043002: Add //third_party/sqlite to dirs_to_snapshot, remove net_sql.patch (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 ** 2006 September 30 2 ** 2006 September 30
3 ** 3 **
4 ** The author disclaims copyright to this source code. In place of 4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing: 5 ** a legal notice, here is a blessing:
6 ** 6 **
7 ** May you do good and not evil. 7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others. 8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give. 9 ** May you share freely, never taking more than you give.
10 ** 10 **
11 ************************************************************************* 11 *************************************************************************
12 ** Implementation of the full-text-search tokenizer that implements 12 ** Implementation of the full-text-search tokenizer that implements
13 ** a Porter stemmer. 13 ** a Porter stemmer.
14 */ 14 */
15 15
16 /* 16 /*
17 ** The code in this file is only compiled if: 17 ** The code in this file is only compiled if:
18 ** 18 **
19 ** * The FTS3 module is being built as an extension 19 ** * The FTS3 module is being built as an extension
20 ** (in which case SQLITE_CORE is not defined), or 20 ** (in which case SQLITE_CORE is not defined), or
21 ** 21 **
22 ** * The FTS3 module is being built into the core of 22 ** * The FTS3 module is being built into the core of
23 ** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). 23 ** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
24 */ 24 */
25 #include "fts3Int.h"
25 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) 26 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
26 27
27 #include "fts3Int.h"
28
29 #include <assert.h> 28 #include <assert.h>
30 #include <stdlib.h> 29 #include <stdlib.h>
31 #include <stdio.h> 30 #include <stdio.h>
32 #include <string.h> 31 #include <string.h>
33 32
34 #include "fts3_tokenizer.h" 33 #include "fts3_tokenizer.h"
35 34
36 /* 35 /*
37 ** Class derived from sqlite3_tokenizer 36 ** Class derived from sqlite3_tokenizer
38 */ 37 */
39 typedef struct porter_tokenizer { 38 typedef struct porter_tokenizer {
40 sqlite3_tokenizer base; /* Base class */ 39 sqlite3_tokenizer base; /* Base class */
41 } porter_tokenizer; 40 } porter_tokenizer;
42 41
43 /* 42 /*
44 ** Class derived from sqlit3_tokenizer_cursor 43 ** Class derived from sqlite3_tokenizer_cursor
45 */ 44 */
46 typedef struct porter_tokenizer_cursor { 45 typedef struct porter_tokenizer_cursor {
47 sqlite3_tokenizer_cursor base; 46 sqlite3_tokenizer_cursor base;
48 const char *zInput; /* input we are tokenizing */ 47 const char *zInput; /* input we are tokenizing */
49 int nInput; /* size of the input */ 48 int nInput; /* size of the input */
50 int iOffset; /* current position in zInput */ 49 int iOffset; /* current position in zInput */
51 int iToken; /* index of next token to be returned */ 50 int iToken; /* index of next token to be returned */
52 char *zToken; /* storage for current token */ 51 char *zToken; /* storage for current token */
53 int nAllocated; /* space allocated to zToken buffer */ 52 int nAllocated; /* space allocated to zToken buffer */
54 } porter_tokenizer_cursor; 53 } porter_tokenizer_cursor;
(...skipping 342 matching lines...) Expand 10 before | Expand all | Expand 10 after
397 } 396 }
398 397
399 /* Step 1c */ 398 /* Step 1c */
400 if( z[0]=='y' && hasVowel(z+1) ){ 399 if( z[0]=='y' && hasVowel(z+1) ){
401 z[0] = 'i'; 400 z[0] = 'i';
402 } 401 }
403 402
404 /* Step 2 */ 403 /* Step 2 */
405 switch( z[1] ){ 404 switch( z[1] ){
406 case 'a': 405 case 'a':
407 stem(&z, "lanoita", "ate", m_gt_0) || 406 if( !stem(&z, "lanoita", "ate", m_gt_0) ){
408 stem(&z, "lanoit", "tion", m_gt_0); 407 stem(&z, "lanoit", "tion", m_gt_0);
408 }
409 break; 409 break;
410 case 'c': 410 case 'c':
411 stem(&z, "icne", "ence", m_gt_0) || 411 if( !stem(&z, "icne", "ence", m_gt_0) ){
412 stem(&z, "icna", "ance", m_gt_0); 412 stem(&z, "icna", "ance", m_gt_0);
413 }
413 break; 414 break;
414 case 'e': 415 case 'e':
415 stem(&z, "rezi", "ize", m_gt_0); 416 stem(&z, "rezi", "ize", m_gt_0);
416 break; 417 break;
417 case 'g': 418 case 'g':
418 stem(&z, "igol", "log", m_gt_0); 419 stem(&z, "igol", "log", m_gt_0);
419 break; 420 break;
420 case 'l': 421 case 'l':
421 stem(&z, "ilb", "ble", m_gt_0) || 422 if( !stem(&z, "ilb", "ble", m_gt_0)
422 stem(&z, "illa", "al", m_gt_0) || 423 && !stem(&z, "illa", "al", m_gt_0)
423 stem(&z, "iltne", "ent", m_gt_0) || 424 && !stem(&z, "iltne", "ent", m_gt_0)
424 stem(&z, "ile", "e", m_gt_0) || 425 && !stem(&z, "ile", "e", m_gt_0)
425 stem(&z, "ilsuo", "ous", m_gt_0); 426 ){
427 stem(&z, "ilsuo", "ous", m_gt_0);
428 }
426 break; 429 break;
427 case 'o': 430 case 'o':
428 stem(&z, "noitazi", "ize", m_gt_0) || 431 if( !stem(&z, "noitazi", "ize", m_gt_0)
429 stem(&z, "noita", "ate", m_gt_0) || 432 && !stem(&z, "noita", "ate", m_gt_0)
430 stem(&z, "rota", "ate", m_gt_0); 433 ){
434 stem(&z, "rota", "ate", m_gt_0);
435 }
431 break; 436 break;
432 case 's': 437 case 's':
433 stem(&z, "msila", "al", m_gt_0) || 438 if( !stem(&z, "msila", "al", m_gt_0)
434 stem(&z, "ssenevi", "ive", m_gt_0) || 439 && !stem(&z, "ssenevi", "ive", m_gt_0)
435 stem(&z, "ssenluf", "ful", m_gt_0) || 440 && !stem(&z, "ssenluf", "ful", m_gt_0)
436 stem(&z, "ssensuo", "ous", m_gt_0); 441 ){
442 stem(&z, "ssensuo", "ous", m_gt_0);
443 }
437 break; 444 break;
438 case 't': 445 case 't':
439 stem(&z, "itila", "al", m_gt_0) || 446 if( !stem(&z, "itila", "al", m_gt_0)
440 stem(&z, "itivi", "ive", m_gt_0) || 447 && !stem(&z, "itivi", "ive", m_gt_0)
441 stem(&z, "itilib", "ble", m_gt_0); 448 ){
449 stem(&z, "itilib", "ble", m_gt_0);
450 }
442 break; 451 break;
443 } 452 }
444 453
445 /* Step 3 */ 454 /* Step 3 */
446 switch( z[0] ){ 455 switch( z[0] ){
447 case 'e': 456 case 'e':
448 stem(&z, "etaci", "ic", m_gt_0) || 457 if( !stem(&z, "etaci", "ic", m_gt_0)
449 stem(&z, "evita", "", m_gt_0) || 458 && !stem(&z, "evita", "", m_gt_0)
450 stem(&z, "ezila", "al", m_gt_0); 459 ){
460 stem(&z, "ezila", "al", m_gt_0);
461 }
451 break; 462 break;
452 case 'i': 463 case 'i':
453 stem(&z, "itici", "ic", m_gt_0); 464 stem(&z, "itici", "ic", m_gt_0);
454 break; 465 break;
455 case 'l': 466 case 'l':
456 stem(&z, "laci", "ic", m_gt_0) || 467 if( !stem(&z, "laci", "ic", m_gt_0) ){
457 stem(&z, "luf", "", m_gt_0); 468 stem(&z, "luf", "", m_gt_0);
469 }
458 break; 470 break;
459 case 's': 471 case 's':
460 stem(&z, "ssen", "", m_gt_0); 472 stem(&z, "ssen", "", m_gt_0);
461 break; 473 break;
462 } 474 }
463 475
464 /* Step 4 */ 476 /* Step 4 */
465 switch( z[1] ){ 477 switch( z[1] ){
466 case 'a': 478 case 'a':
467 if( z[0]=='l' && m_gt_1(z+2) ){ 479 if( z[0]=='l' && m_gt_1(z+2) ){
(...skipping 20 matching lines...) Expand all
488 z += 4; 500 z += 4;
489 } 501 }
490 break; 502 break;
491 case 'n': 503 case 'n':
492 if( z[0]=='t' ){ 504 if( z[0]=='t' ){
493 if( z[2]=='a' ){ 505 if( z[2]=='a' ){
494 if( m_gt_1(z+3) ){ 506 if( m_gt_1(z+3) ){
495 z += 3; 507 z += 3;
496 } 508 }
497 }else if( z[2]=='e' ){ 509 }else if( z[2]=='e' ){
498 stem(&z, "tneme", "", m_gt_1) || 510 if( !stem(&z, "tneme", "", m_gt_1)
499 stem(&z, "tnem", "", m_gt_1) || 511 && !stem(&z, "tnem", "", m_gt_1)
500 stem(&z, "tne", "", m_gt_1); 512 ){
513 stem(&z, "tne", "", m_gt_1);
514 }
501 } 515 }
502 } 516 }
503 break; 517 break;
504 case 'o': 518 case 'o':
505 if( z[0]=='u' ){ 519 if( z[0]=='u' ){
506 if( m_gt_1(z+2) ){ 520 if( m_gt_1(z+2) ){
507 z += 2; 521 z += 2;
508 } 522 }
509 }else if( z[3]=='s' || z[3]=='t' ){ 523 }else if( z[3]=='s' || z[3]=='t' ){
510 stem(&z, "noi", "", m_gt_1); 524 stem(&z, "noi", "", m_gt_1);
511 } 525 }
512 break; 526 break;
513 case 's': 527 case 's':
514 if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){ 528 if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){
515 z += 3; 529 z += 3;
516 } 530 }
517 break; 531 break;
518 case 't': 532 case 't':
519 stem(&z, "eta", "", m_gt_1) || 533 if( !stem(&z, "eta", "", m_gt_1) ){
520 stem(&z, "iti", "", m_gt_1); 534 stem(&z, "iti", "", m_gt_1);
535 }
521 break; 536 break;
522 case 'u': 537 case 'u':
523 if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){ 538 if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){
524 z += 3; 539 z += 3;
525 } 540 }
526 break; 541 break;
527 case 'v': 542 case 'v':
528 case 'z': 543 case 'z':
529 if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){ 544 if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){
530 z += 3; 545 z += 3;
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
624 /* 639 /*
625 ** The set of routines that implement the porter-stemmer tokenizer 640 ** The set of routines that implement the porter-stemmer tokenizer
626 */ 641 */
627 static const sqlite3_tokenizer_module porterTokenizerModule = { 642 static const sqlite3_tokenizer_module porterTokenizerModule = {
628 0, 643 0,
629 porterCreate, 644 porterCreate,
630 porterDestroy, 645 porterDestroy,
631 porterOpen, 646 porterOpen,
632 porterClose, 647 porterClose,
633 porterNext, 648 porterNext,
649 0
634 }; 650 };
635 651
636 /* 652 /*
637 ** Allocate a new porter tokenizer. Return a pointer to the new 653 ** Allocate a new porter tokenizer. Return a pointer to the new
638 ** tokenizer in *ppModule 654 ** tokenizer in *ppModule
639 */ 655 */
640 void sqlite3Fts3PorterTokenizerModule( 656 void sqlite3Fts3PorterTokenizerModule(
641 sqlite3_tokenizer_module const**ppModule 657 sqlite3_tokenizer_module const**ppModule
642 ){ 658 ){
643 *ppModule = &porterTokenizerModule; 659 *ppModule = &porterTokenizerModule;
644 } 660 }
645 661
646 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ 662 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
OLDNEW
« no previous file with comments | « third_party/sqlite/src/ext/fts3/fts3_icu.c ('k') | third_party/sqlite/src/ext/fts3/fts3_snippet.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698