Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(59)

Side by Side Diff: third_party/sqlite/src/ext/misc/spellfix.c

Issue 2765553002: [sql] Import SQLite 3.17.0. (Closed)
Patch Set: Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 ** 2012 April 10 2 ** 2012 April 10
3 ** 3 **
4 ** The author disclaims copyright to this source code. In place of 4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing: 5 ** a legal notice, here is a blessing:
6 ** 6 **
7 ** May you do good and not evil. 7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others. 8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give. 9 ** May you share freely, never taking more than you give.
10 ** 10 **
(...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after
179 ** * Omit T when followed by CH 179 ** * Omit T when followed by CH
180 ** * Omit W when followed by R 180 ** * Omit W when followed by R
181 ** * Omit D when followed by J or G 181 ** * Omit D when followed by J or G
182 ** * Omit K in KN or G in GN at the beginning of a word 182 ** * Omit K in KN or G in GN at the beginning of a word
183 ** 183 **
184 ** Space to hold the result is obtained from sqlite3_malloc() 184 ** Space to hold the result is obtained from sqlite3_malloc()
185 ** 185 **
186 ** Return NULL if memory allocation fails. 186 ** Return NULL if memory allocation fails.
187 */ 187 */
188 static unsigned char *phoneticHash(const unsigned char *zIn, int nIn){ 188 static unsigned char *phoneticHash(const unsigned char *zIn, int nIn){
189 unsigned char *zOut = sqlite3_malloc( nIn + 1 ); 189 unsigned char *zOut = sqlite3_malloc64( nIn + 1 );
190 int i; 190 int i;
191 int nOut = 0; 191 int nOut = 0;
192 char cPrev = 0x77; 192 char cPrev = 0x77;
193 char cPrevX = 0x77; 193 char cPrevX = 0x77;
194 const unsigned char *aClass = initClass; 194 const unsigned char *aClass = initClass;
195 195
196 if( zOut==0 ) return 0; 196 if( zOut==0 ) return 0;
197 if( nIn>2 ){ 197 if( nIn>2 ){
198 switch( zIn[0] ){ 198 switch( zIn[0] ){
199 case 'g': 199 case 'g':
(...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after
358 int xA, xB; /* Loop counters for zA[] and zB[] */ 358 int xA, xB; /* Loop counters for zA[] and zB[] */
359 char cA = 0, cB; /* Current character of zA and zB */ 359 char cA = 0, cB; /* Current character of zA and zB */
360 char cAprev, cBprev; /* Previous character of zA and zB */ 360 char cAprev, cBprev; /* Previous character of zA and zB */
361 char cAnext, cBnext; /* Next character in zA and zB */ 361 char cAnext, cBnext; /* Next character in zA and zB */
362 int d; /* North-west cost value */ 362 int d; /* North-west cost value */
363 int dc = 0; /* North-west character value */ 363 int dc = 0; /* North-west character value */
364 int res; /* Final result */ 364 int res; /* Final result */
365 int *m; /* The cost matrix */ 365 int *m; /* The cost matrix */
366 char *cx; /* Corresponding character values */ 366 char *cx; /* Corresponding character values */
367 int *toFree = 0; /* Malloced space */ 367 int *toFree = 0; /* Malloced space */
368 int nMatch = 0;
368 int mStack[60+15]; /* Stack space to use if not too much is needed */ 369 int mStack[60+15]; /* Stack space to use if not too much is needed */
369 int nMatch = 0;
370 370
371 /* Early out if either input is NULL */ 371 /* Early out if either input is NULL */
372 if( zA==0 || zB==0 ) return -1; 372 if( zA==0 || zB==0 ) return -1;
373 373
374 /* Skip any common prefix */ 374 /* Skip any common prefix */
375 while( zA[0] && zA[0]==zB[0] ){ dc = zA[0]; zA++; zB++; nMatch++; } 375 while( zA[0] && zA[0]==zB[0] ){ dc = zA[0]; zA++; zB++; nMatch++; }
376 if( pnMatch ) *pnMatch = nMatch; 376 if( pnMatch ) *pnMatch = nMatch;
377 if( zA[0]==0 && zB[0]==0 ) return 0; 377 if( zA[0]==0 && zB[0]==0 ) return 0;
378 378
379 #if 0 379 #if 0
380 printf("A=\"%s\" B=\"%s\" dc=%c\n", zA, zB, dc?dc:' '); 380 printf("A=\"%s\" B=\"%s\" dc=%c\n", zA, zB, dc?dc:' ');
381 #endif 381 #endif
382 382
383 /* Verify input strings and measure their lengths */ 383 /* Verify input strings and measure their lengths */
384 for(nA=0; zA[nA]; nA++){ 384 for(nA=0; zA[nA]; nA++){
385 if( zA[nA]&0x80 ) return -2; 385 if( zA[nA]&0x80 ) return -2;
386 } 386 }
387 for(nB=0; zB[nB]; nB++){ 387 for(nB=0; zB[nB]; nB++){
388 if( zB[nB]&0x80 ) return -2; 388 if( zB[nB]&0x80 ) return -2;
389 } 389 }
390 390
391 /* Special processing if either string is empty */ 391 /* Special processing if either string is empty */
392 if( nA==0 ){ 392 if( nA==0 ){
393 cBprev = dc; 393 cBprev = (char)dc;
394 for(xB=res=0; (cB = zB[xB])!=0; xB++){ 394 for(xB=res=0; (cB = zB[xB])!=0; xB++){
395 res += insertOrDeleteCost(cBprev, cB, zB[xB+1])/FINAL_INS_COST_DIV; 395 res += insertOrDeleteCost(cBprev, cB, zB[xB+1])/FINAL_INS_COST_DIV;
396 cBprev = cB; 396 cBprev = cB;
397 } 397 }
398 return res; 398 return res;
399 } 399 }
400 if( nB==0 ){ 400 if( nB==0 ){
401 cAprev = dc; 401 cAprev = (char)dc;
402 for(xA=res=0; (cA = zA[xA])!=0; xA++){ 402 for(xA=res=0; (cA = zA[xA])!=0; xA++){
403 res += insertOrDeleteCost(cAprev, cA, zA[xA+1]); 403 res += insertOrDeleteCost(cAprev, cA, zA[xA+1]);
404 cAprev = cA; 404 cAprev = cA;
405 } 405 }
406 return res; 406 return res;
407 } 407 }
408 408
409 /* A is a prefix of B */ 409 /* A is a prefix of B */
410 if( zA[0]=='*' && zA[1]==0 ) return 0; 410 if( zA[0]=='*' && zA[1]==0 ) return 0;
411 411
412 /* Allocate and initialize the Wagner matrix */ 412 /* Allocate and initialize the Wagner matrix */
413 if( nB<(sizeof(mStack)*4)/(sizeof(mStack[0])*5) ){ 413 if( nB<(sizeof(mStack)*4)/(sizeof(mStack[0])*5) ){
414 m = mStack; 414 m = mStack;
415 }else{ 415 }else{
416 m = toFree = sqlite3_malloc( (nB+1)*5*sizeof(m[0])/4 ); 416 m = toFree = sqlite3_malloc64( (nB+1)*5*sizeof(m[0])/4 );
417 if( m==0 ) return -3; 417 if( m==0 ) return -3;
418 } 418 }
419 cx = (char*)&m[nB+1]; 419 cx = (char*)&m[nB+1];
420 420
421 /* Compute the Wagner edit distance */ 421 /* Compute the Wagner edit distance */
422 m[0] = 0; 422 m[0] = 0;
423 cx[0] = dc; 423 cx[0] = (char)dc;
424 cBprev = dc; 424 cBprev = (char)dc;
425 for(xB=1; xB<=nB; xB++){ 425 for(xB=1; xB<=nB; xB++){
426 cBnext = zB[xB]; 426 cBnext = zB[xB];
427 cB = zB[xB-1]; 427 cB = zB[xB-1];
428 cx[xB] = cB; 428 cx[xB] = cB;
429 m[xB] = m[xB-1] + insertOrDeleteCost(cBprev, cB, cBnext); 429 m[xB] = m[xB-1] + insertOrDeleteCost(cBprev, cB, cBnext);
430 cBprev = cB; 430 cBprev = cB;
431 } 431 }
432 cAprev = dc; 432 cAprev = (char)dc;
433 for(xA=1; xA<=nA; xA++){ 433 for(xA=1; xA<=nA; xA++){
434 int lastA = (xA==nA); 434 int lastA = (xA==nA);
435 cA = zA[xA-1]; 435 cA = zA[xA-1];
436 cAnext = zA[xA]; 436 cAnext = zA[xA];
437 if( cA=='*' && lastA ) break; 437 if( cA=='*' && lastA ) break;
438 d = m[0]; 438 d = m[0];
439 dc = cx[0]; 439 dc = cx[0];
440 m[0] = d + insertOrDeleteCost(cAprev, cA, cAnext); 440 m[0] = d + insertOrDeleteCost(cAprev, cA, cAnext);
441 cBprev = 0; 441 cBprev = 0;
442 for(xB=1; xB<=nB; xB++){ 442 for(xB=1; xB<=nB; xB++){
(...skipping 26 matching lines...) Expand all
469 printf("%d,%d d=%4d u=%4d r=%4d dc=%c cA=%c cB=%c" 469 printf("%d,%d d=%4d u=%4d r=%4d dc=%c cA=%c cB=%c"
470 " ins=%4d del=%4d sub=%4d t=%4d ncx=%c\n", 470 " ins=%4d del=%4d sub=%4d t=%4d ncx=%c\n",
471 xA, xB, d, m[xB], m[xB-1], dc?dc:' ', cA, cB, 471 xA, xB, d, m[xB], m[xB-1], dc?dc:' ', cA, cB,
472 insCost, delCost, subCost, totalCost, ncx?ncx:' '); 472 insCost, delCost, subCost, totalCost, ncx?ncx:' ');
473 #endif 473 #endif
474 474
475 /* Update the matrix */ 475 /* Update the matrix */
476 d = m[xB]; 476 d = m[xB];
477 dc = cx[xB]; 477 dc = cx[xB];
478 m[xB] = totalCost; 478 m[xB] = totalCost;
479 cx[xB] = ncx; 479 cx[xB] = (char)ncx;
480 cBprev = cB; 480 cBprev = cB;
481 } 481 }
482 cAprev = cA; 482 cAprev = cA;
483 } 483 }
484 484
485 /* Free the wagner matrix and return the result */ 485 /* Free the wagner matrix and return the result */
486 if( cA=='*' ){ 486 if( cA=='*' ){
487 res = m[1]; 487 res = m[1];
488 for(xB=1; xB<=nB; xB++){ 488 for(xB=1; xB<=nB; xB++){
489 if( m[xB]<res ){ 489 if( m[xB]<res ){
(...skipping 190 matching lines...) Expand 10 before | Expand all | Expand 10 after
680 const char *zTo = (const char*)sqlite3_column_text(pStmt, 2); 680 const char *zTo = (const char*)sqlite3_column_text(pStmt, 2);
681 int nTo = zTo ? sqlite3_column_bytes(pStmt, 2) : 0; 681 int nTo = zTo ? sqlite3_column_bytes(pStmt, 2) : 0;
682 int iCost = sqlite3_column_int(pStmt, 3); 682 int iCost = sqlite3_column_int(pStmt, 3);
683 683
684 assert( zFrom!=0 || nFrom==0 ); 684 assert( zFrom!=0 || nFrom==0 );
685 assert( zTo!=0 || nTo==0 ); 685 assert( zTo!=0 || nTo==0 );
686 if( nFrom>100 || nTo>100 ) continue; 686 if( nFrom>100 || nTo>100 ) continue;
687 if( iCost<0 ) continue; 687 if( iCost<0 ) continue;
688 if( pLang==0 || iLang!=iLangPrev ){ 688 if( pLang==0 || iLang!=iLangPrev ){
689 EditDist3Lang *pNew; 689 EditDist3Lang *pNew;
690 pNew = sqlite3_realloc(p->a, (p->nLang+1)*sizeof(p->a[0])); 690 pNew = sqlite3_realloc64(p->a, (p->nLang+1)*sizeof(p->a[0]));
691 if( pNew==0 ){ rc = SQLITE_NOMEM; break; } 691 if( pNew==0 ){ rc = SQLITE_NOMEM; break; }
692 p->a = pNew; 692 p->a = pNew;
693 pLang = &p->a[p->nLang]; 693 pLang = &p->a[p->nLang];
694 p->nLang++; 694 p->nLang++;
695 pLang->iLang = iLang; 695 pLang->iLang = iLang;
696 pLang->iInsCost = 100; 696 pLang->iInsCost = 100;
697 pLang->iDelCost = 100; 697 pLang->iDelCost = 100;
698 pLang->iSubCost = 150; 698 pLang->iSubCost = 150;
699 pLang->pCost = 0; 699 pLang->pCost = 0;
700 iLangPrev = iLang; 700 iLangPrev = iLang;
701 } 701 }
702 if( nFrom==1 && zFrom[0]=='?' && nTo==0 ){ 702 if( nFrom==1 && zFrom[0]=='?' && nTo==0 ){
703 pLang->iDelCost = iCost; 703 pLang->iDelCost = iCost;
704 }else if( nFrom==0 && nTo==1 && zTo[0]=='?' ){ 704 }else if( nFrom==0 && nTo==1 && zTo[0]=='?' ){
705 pLang->iInsCost = iCost; 705 pLang->iInsCost = iCost;
706 }else if( nFrom==1 && nTo==1 && zFrom[0]=='?' && zTo[0]=='?' ){ 706 }else if( nFrom==1 && nTo==1 && zFrom[0]=='?' && zTo[0]=='?' ){
707 pLang->iSubCost = iCost; 707 pLang->iSubCost = iCost;
708 }else{ 708 }else{
709 EditDist3Cost *pCost; 709 EditDist3Cost *pCost;
710 int nExtra = nFrom + nTo - 4; 710 int nExtra = nFrom + nTo - 4;
711 if( nExtra<0 ) nExtra = 0; 711 if( nExtra<0 ) nExtra = 0;
712 pCost = sqlite3_malloc( sizeof(*pCost) + nExtra ); 712 pCost = sqlite3_malloc64( sizeof(*pCost) + nExtra );
713 if( pCost==0 ){ rc = SQLITE_NOMEM; break; } 713 if( pCost==0 ){ rc = SQLITE_NOMEM; break; }
714 pCost->nFrom = nFrom; 714 pCost->nFrom = (u8)nFrom;
715 pCost->nTo = nTo; 715 pCost->nTo = (u8)nTo;
716 pCost->iCost = iCost; 716 pCost->iCost = (u16)iCost;
717 memcpy(pCost->a, zFrom, nFrom); 717 memcpy(pCost->a, zFrom, nFrom);
718 memcpy(pCost->a + nFrom, zTo, nTo); 718 memcpy(pCost->a + nFrom, zTo, nTo);
719 pCost->pNext = pLang->pCost; 719 pCost->pNext = pLang->pCost;
720 pLang->pCost = pCost; 720 pLang->pCost = pCost;
721 } 721 }
722 } 722 }
723 rc2 = sqlite3_finalize(pStmt); 723 rc2 = sqlite3_finalize(pStmt);
724 if( rc==SQLITE_OK ) rc = rc2; 724 if( rc==SQLITE_OK ) rc = rc2;
725 return rc; 725 return rc;
726 } 726 }
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
801 const EditDist3Lang *pLang, 801 const EditDist3Lang *pLang,
802 const char *z, 802 const char *z,
803 int n 803 int n
804 ){ 804 ){
805 EditDist3FromString *pStr; 805 EditDist3FromString *pStr;
806 EditDist3Cost *p; 806 EditDist3Cost *p;
807 int i; 807 int i;
808 808
809 if( z==0 ) return 0; 809 if( z==0 ) return 0;
810 if( n<0 ) n = (int)strlen(z); 810 if( n<0 ) n = (int)strlen(z);
811 pStr = sqlite3_malloc( sizeof(*pStr) + sizeof(pStr->a[0])*n + n + 1 ); 811 pStr = sqlite3_malloc64( sizeof(*pStr) + sizeof(pStr->a[0])*n + n + 1 );
812 if( pStr==0 ) return 0; 812 if( pStr==0 ) return 0;
813 pStr->a = (EditDist3From*)&pStr[1]; 813 pStr->a = (EditDist3From*)&pStr[1];
814 memset(pStr->a, 0, sizeof(pStr->a[0])*n); 814 memset(pStr->a, 0, sizeof(pStr->a[0])*n);
815 pStr->n = n; 815 pStr->n = n;
816 pStr->z = (char*)&pStr->a[n]; 816 pStr->z = (char*)&pStr->a[n];
817 memcpy(pStr->z, z, n+1); 817 memcpy(pStr->z, z, n+1);
818 if( n && z[n-1]=='*' ){ 818 if( n && z[n-1]=='*' ){
819 pStr->isPrefix = 1; 819 pStr->isPrefix = 1;
820 n--; 820 n--;
821 pStr->n--; 821 pStr->n--;
822 pStr->z[n] = 0; 822 pStr->z[n] = 0;
823 }else{ 823 }else{
824 pStr->isPrefix = 0; 824 pStr->isPrefix = 0;
825 } 825 }
826 826
827 for(i=0; i<n; i++){ 827 for(i=0; i<n; i++){
828 EditDist3From *pFrom = &pStr->a[i]; 828 EditDist3From *pFrom = &pStr->a[i];
829 memset(pFrom, 0, sizeof(*pFrom)); 829 memset(pFrom, 0, sizeof(*pFrom));
830 pFrom->nByte = utf8Len((unsigned char)z[i], n-i); 830 pFrom->nByte = utf8Len((unsigned char)z[i], n-i);
831 for(p=pLang->pCost; p; p=p->pNext){ 831 for(p=pLang->pCost; p; p=p->pNext){
832 EditDist3Cost **apNew; 832 EditDist3Cost **apNew;
833 if( i+p->nFrom>n ) continue; 833 if( i+p->nFrom>n ) continue;
834 if( matchFrom(p, z+i, n-i)==0 ) continue; 834 if( matchFrom(p, z+i, n-i)==0 ) continue;
835 if( p->nTo==0 ){ 835 if( p->nTo==0 ){
836 apNew = sqlite3_realloc(pFrom->apDel, 836 apNew = sqlite3_realloc64(pFrom->apDel,
837 sizeof(*apNew)*(pFrom->nDel+1)); 837 sizeof(*apNew)*(pFrom->nDel+1));
838 if( apNew==0 ) break; 838 if( apNew==0 ) break;
839 pFrom->apDel = apNew; 839 pFrom->apDel = apNew;
840 apNew[pFrom->nDel++] = p; 840 apNew[pFrom->nDel++] = p;
841 }else{ 841 }else{
842 apNew = sqlite3_realloc(pFrom->apSubst, 842 apNew = sqlite3_realloc64(pFrom->apSubst,
843 sizeof(*apNew)*(pFrom->nSubst+1)); 843 sizeof(*apNew)*(pFrom->nSubst+1));
844 if( apNew==0 ) break; 844 if( apNew==0 ) break;
845 pFrom->apSubst = apNew; 845 pFrom->apSubst = apNew;
846 apNew[pFrom->nSubst++] = p; 846 apNew[pFrom->nSubst++] = p;
847 } 847 }
848 } 848 }
849 if( p ){ 849 if( p ){
850 editDist3FromStringDelete(pStr); 850 editDist3FromStringDelete(pStr);
851 pStr = 0; 851 pStr = 0;
852 break; 852 break;
(...skipping 15 matching lines...) Expand all
868 int j, 868 int j,
869 int iCost 869 int iCost
870 ){ 870 ){
871 assert( iCost>=0 ); 871 assert( iCost>=0 );
872 if( iCost<10000 ){ 872 if( iCost<10000 ){
873 unsigned int b = m[j] + iCost; 873 unsigned int b = m[j] + iCost;
874 if( b<m[i] ) m[i] = b; 874 if( b<m[i] ) m[i] = b;
875 } 875 }
876 } 876 }
877 877
878 /*
879 ** How much stack space (int bytes) to use for Wagner matrix in
880 ** editDist3Core(). If more space than this is required, the entire
881 ** matrix is taken from the heap. To reduce the load on the memory
882 ** allocator, make this value as large as practical for the
883 ** architecture in use.
884 */
885 #ifndef SQLITE_SPELLFIX_STACKALLOC_SZ
886 # define SQLITE_SPELLFIX_STACKALLOC_SZ (1024)
887 #endif
888
878 /* Compute the edit distance between two strings. 889 /* Compute the edit distance between two strings.
879 ** 890 **
880 ** If an error occurs, return a negative number which is the error code. 891 ** If an error occurs, return a negative number which is the error code.
881 ** 892 **
882 ** If pnMatch is not NULL, then *pnMatch is set to the number of characters 893 ** If pnMatch is not NULL, then *pnMatch is set to the number of characters
883 ** (not bytes) in z2 that matched the search pattern in *pFrom. If pFrom does 894 ** (not bytes) in z2 that matched the search pattern in *pFrom. If pFrom does
884 ** not contain the pattern for a prefix-search, then this is always the number 895 ** not contain the pattern for a prefix-search, then this is always the number
885 ** of characters in z2. If pFrom does contain a prefix search pattern, then 896 ** of characters in z2. If pFrom does contain a prefix search pattern, then
886 ** it is the number of characters in the prefix of z2 that was deemed to 897 ** it is the number of characters in the prefix of z2 that was deemed to
887 ** match pFrom. 898 ** match pFrom.
888 */ 899 */
889 static int editDist3Core( 900 static int editDist3Core(
890 EditDist3FromString *pFrom, /* The FROM string */ 901 EditDist3FromString *pFrom, /* The FROM string */
891 const char *z2, /* The TO string */ 902 const char *z2, /* The TO string */
892 int n2, /* Length of the TO string */ 903 int n2, /* Length of the TO string */
893 const EditDist3Lang *pLang, /* Edit weights for a particular language ID */ 904 const EditDist3Lang *pLang, /* Edit weights for a particular language ID */
894 int *pnMatch /* OUT: Characters in matched prefix */ 905 int *pnMatch /* OUT: Characters in matched prefix */
895 ){ 906 ){
896 int k, n; 907 int k, n;
897 int i1, b1; 908 int i1, b1;
898 int i2, b2; 909 int i2, b2;
899 EditDist3FromString f = *pFrom; 910 EditDist3FromString f = *pFrom;
900 EditDist3To *a2; 911 EditDist3To *a2;
901 unsigned int *m; 912 unsigned int *m;
913 unsigned int *pToFree;
902 int szRow; 914 int szRow;
903 EditDist3Cost *p; 915 EditDist3Cost *p;
904 int res; 916 int res;
917 sqlite3_uint64 nByte;
918 unsigned int stackSpace[SQLITE_SPELLFIX_STACKALLOC_SZ/sizeof(unsigned int)];
905 919
906 /* allocate the Wagner matrix and the aTo[] array for the TO string */ 920 /* allocate the Wagner matrix and the aTo[] array for the TO string */
907 n = (f.n+1)*(n2+1); 921 n = (f.n+1)*(n2+1);
908 n = (n+1)&~1; 922 n = (n+1)&~1;
909 m = sqlite3_malloc( n*sizeof(m[0]) + sizeof(a2[0])*n2 ); 923 nByte = n*sizeof(m[0]) + sizeof(a2[0])*n2;
910 if( m==0 ) return -1; /* Out of memory */ 924 if( nByte<=sizeof(stackSpace) ){
925 m = stackSpace;
926 pToFree = 0;
927 }else{
928 m = pToFree = sqlite3_malloc64( nByte );
929 if( m==0 ) return -1; /* Out of memory */
930 }
911 a2 = (EditDist3To*)&m[n]; 931 a2 = (EditDist3To*)&m[n];
912 memset(a2, 0, sizeof(a2[0])*n2); 932 memset(a2, 0, sizeof(a2[0])*n2);
913 933
914 /* Fill in the a1[] matrix for all characters of the TO string */ 934 /* Fill in the a1[] matrix for all characters of the TO string */
915 for(i2=0; i2<n2; i2++){ 935 for(i2=0; i2<n2; i2++){
916 a2[i2].nByte = utf8Len((unsigned char)z2[i2], n2-i2); 936 a2[i2].nByte = utf8Len((unsigned char)z2[i2], n2-i2);
917 for(p=pLang->pCost; p; p=p->pNext){ 937 for(p=pLang->pCost; p; p=p->pNext){
918 EditDist3Cost **apNew; 938 EditDist3Cost **apNew;
919 if( p->nFrom>0 ) continue; 939 if( p->nFrom>0 ) continue;
920 if( i2+p->nTo>n2 ) continue; 940 if( i2+p->nTo>n2 ) continue;
921 if( matchTo(p, z2+i2, n2-i2)==0 ) continue; 941 if( matchTo(p, z2+i2, n2-i2)==0 ) continue;
922 a2[i2].nIns++; 942 a2[i2].nIns++;
923 apNew = sqlite3_realloc(a2[i2].apIns, sizeof(*apNew)*a2[i2].nIns); 943 apNew = sqlite3_realloc64(a2[i2].apIns, sizeof(*apNew)*a2[i2].nIns);
924 if( apNew==0 ){ 944 if( apNew==0 ){
925 res = -1; /* Out of memory */ 945 res = -1; /* Out of memory */
926 goto editDist3Abort; 946 goto editDist3Abort;
927 } 947 }
928 a2[i2].apIns = apNew; 948 a2[i2].apIns = apNew;
929 a2[i2].apIns[a2[i2].nIns-1] = p; 949 a2[i2].apIns[a2[i2].nIns-1] = p;
930 } 950 }
931 } 951 }
932 952
933 /* Prepare to compute the minimum edit distance */ 953 /* Prepare to compute the minimum edit distance */
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after
1022 if( pnMatch ){ 1042 if( pnMatch ){
1023 int nExtra = 0; 1043 int nExtra = 0;
1024 for(k=0; k<n; k++){ 1044 for(k=0; k<n; k++){
1025 if( (z2[k] & 0xc0)==0x80 ) nExtra++; 1045 if( (z2[k] & 0xc0)==0x80 ) nExtra++;
1026 } 1046 }
1027 *pnMatch = n - nExtra; 1047 *pnMatch = n - nExtra;
1028 } 1048 }
1029 1049
1030 editDist3Abort: 1050 editDist3Abort:
1031 for(i2=0; i2<n2; i2++) sqlite3_free(a2[i2].apIns); 1051 for(i2=0; i2<n2; i2++) sqlite3_free(a2[i2].apIns);
1032 sqlite3_free(m); 1052 sqlite3_free(pToFree);
1033 return res; 1053 return res;
1034 } 1054 }
1035 1055
1036 /* 1056 /*
1037 ** Get an appropriate EditDist3Lang object. 1057 ** Get an appropriate EditDist3Lang object.
1038 */ 1058 */
1039 static const EditDist3Lang *editDist3FindLang( 1059 static const EditDist3Lang *editDist3FindLang(
1040 EditDist3Config *pConfig, 1060 EditDist3Config *pConfig,
1041 int iLang 1061 int iLang
1042 ){ 1062 ){
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
1091 sqlite3_result_int(context, dist); 1111 sqlite3_result_int(context, dist);
1092 } 1112 }
1093 } 1113 }
1094 } 1114 }
1095 1115
1096 /* 1116 /*
1097 ** Register the editDist3 function with SQLite 1117 ** Register the editDist3 function with SQLite
1098 */ 1118 */
1099 static int editDist3Install(sqlite3 *db){ 1119 static int editDist3Install(sqlite3 *db){
1100 int rc; 1120 int rc;
1101 EditDist3Config *pConfig = sqlite3_malloc( sizeof(*pConfig) ); 1121 EditDist3Config *pConfig = sqlite3_malloc64( sizeof(*pConfig) );
1102 if( pConfig==0 ) return SQLITE_NOMEM; 1122 if( pConfig==0 ) return SQLITE_NOMEM;
1103 memset(pConfig, 0, sizeof(*pConfig)); 1123 memset(pConfig, 0, sizeof(*pConfig));
1104 rc = sqlite3_create_function_v2(db, "editdist3", 1124 rc = sqlite3_create_function_v2(db, "editdist3",
1105 2, SQLITE_UTF8, pConfig, editDist3SqlFunc, 0, 0, 0); 1125 2, SQLITE_UTF8, pConfig, editDist3SqlFunc, 0, 0, 0);
1106 if( rc==SQLITE_OK ){ 1126 if( rc==SQLITE_OK ){
1107 rc = sqlite3_create_function_v2(db, "editdist3", 1127 rc = sqlite3_create_function_v2(db, "editdist3",
1108 3, SQLITE_UTF8, pConfig, editDist3SqlFunc, 0, 0, 0); 1128 3, SQLITE_UTF8, pConfig, editDist3SqlFunc, 0, 0, 0);
1109 } 1129 }
1110 if( rc==SQLITE_OK ){ 1130 if( rc==SQLITE_OK ){
1111 rc = sqlite3_create_function_v2(db, "editdist3", 1131 rc = sqlite3_create_function_v2(db, "editdist3",
(...skipping 468 matching lines...) Expand 10 before | Expand all | Expand 10 after
1580 ** Convert the input string from UTF-8 into pure ASCII by converting 1600 ** Convert the input string from UTF-8 into pure ASCII by converting
1581 ** all non-ASCII characters to some combination of characters in the 1601 ** all non-ASCII characters to some combination of characters in the
1582 ** ASCII subset. 1602 ** ASCII subset.
1583 ** 1603 **
1584 ** The returned string might contain more characters than the input. 1604 ** The returned string might contain more characters than the input.
1585 ** 1605 **
1586 ** Space to hold the returned string comes from sqlite3_malloc() and 1606 ** Space to hold the returned string comes from sqlite3_malloc() and
1587 ** should be freed by the caller. 1607 ** should be freed by the caller.
1588 */ 1608 */
1589 static unsigned char *transliterate(const unsigned char *zIn, int nIn){ 1609 static unsigned char *transliterate(const unsigned char *zIn, int nIn){
1590 unsigned char *zOut = sqlite3_malloc( nIn*4 + 1 ); 1610 unsigned char *zOut = sqlite3_malloc64( nIn*4 + 1 );
1591 int c, sz, nOut; 1611 int c, sz, nOut;
1592 if( zOut==0 ) return 0; 1612 if( zOut==0 ) return 0;
1593 nOut = 0; 1613 nOut = 0;
1594 while( nIn>0 ){ 1614 while( nIn>0 ){
1595 c = utf8Read(zIn, nIn, &sz); 1615 c = utf8Read(zIn, nIn, &sz);
1596 zIn += sz; 1616 zIn += sz;
1597 nIn -= sz; 1617 nIn -= sz;
1598 if( c<=127 ){ 1618 if( c<=127 ){
1599 zOut[nOut++] = c; 1619 zOut[nOut++] = (unsigned char)c;
1600 }else{ 1620 }else{
1601 int xTop, xBtm, x; 1621 int xTop, xBtm, x;
1602 xTop = sizeof(translit)/sizeof(translit[0]) - 1; 1622 xTop = sizeof(translit)/sizeof(translit[0]) - 1;
1603 xBtm = 0; 1623 xBtm = 0;
1604 while( xTop>=xBtm ){ 1624 while( xTop>=xBtm ){
1605 x = (xTop + xBtm)/2; 1625 x = (xTop + xBtm)/2;
1606 if( translit[x].cFrom==c ){ 1626 if( translit[x].cFrom==c ){
1607 zOut[nOut++] = translit[x].cTo0; 1627 zOut[nOut++] = translit[x].cTo0;
1608 if( translit[x].cTo1 ){ 1628 if( translit[x].cTo1 ){
1609 zOut[nOut++] = translit[x].cTo1; 1629 zOut[nOut++] = translit[x].cTo1;
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
1707 static void scriptCodeSqlFunc( 1727 static void scriptCodeSqlFunc(
1708 sqlite3_context *context, 1728 sqlite3_context *context,
1709 int argc, 1729 int argc,
1710 sqlite3_value **argv 1730 sqlite3_value **argv
1711 ){ 1731 ){
1712 const unsigned char *zIn = sqlite3_value_text(argv[0]); 1732 const unsigned char *zIn = sqlite3_value_text(argv[0]);
1713 int nIn = sqlite3_value_bytes(argv[0]); 1733 int nIn = sqlite3_value_bytes(argv[0]);
1714 int c, sz; 1734 int c, sz;
1715 int scriptMask = 0; 1735 int scriptMask = 0;
1716 int res; 1736 int res;
1737 int seenDigit = 0;
1717 # define SCRIPT_LATIN 0x0001 1738 # define SCRIPT_LATIN 0x0001
1718 # define SCRIPT_CYRILLIC 0x0002 1739 # define SCRIPT_CYRILLIC 0x0002
1719 # define SCRIPT_GREEK 0x0004 1740 # define SCRIPT_GREEK 0x0004
1720 # define SCRIPT_HEBREW 0x0008 1741 # define SCRIPT_HEBREW 0x0008
1721 # define SCRIPT_ARABIC 0x0010 1742 # define SCRIPT_ARABIC 0x0010
1722 1743
1723 while( nIn>0 ){ 1744 while( nIn>0 ){
1724 c = utf8Read(zIn, nIn, &sz); 1745 c = utf8Read(zIn, nIn, &sz);
1725 zIn += sz; 1746 zIn += sz;
1726 nIn -= sz; 1747 nIn -= sz;
1727 if( c<0x02af && (c>=0x80 || midClass[c&0x7f]<CCLASS_DIGIT) ){ 1748 if( c<0x02af ){
1728 scriptMask |= SCRIPT_LATIN; 1749 if( c>=0x80 || midClass[c&0x7f]<CCLASS_DIGIT ){
1750 scriptMask |= SCRIPT_LATIN;
1751 }else if( c>='0' && c<='9' ){
1752 seenDigit = 1;
1753 }
1729 }else if( c>=0x0400 && c<=0x04ff ){ 1754 }else if( c>=0x0400 && c<=0x04ff ){
1730 scriptMask |= SCRIPT_CYRILLIC; 1755 scriptMask |= SCRIPT_CYRILLIC;
1731 }else if( c>=0x0386 && c<=0x03ce ){ 1756 }else if( c>=0x0386 && c<=0x03ce ){
1732 scriptMask |= SCRIPT_GREEK; 1757 scriptMask |= SCRIPT_GREEK;
1733 }else if( c>=0x0590 && c<=0x05ff ){ 1758 }else if( c>=0x0590 && c<=0x05ff ){
1734 scriptMask |= SCRIPT_HEBREW; 1759 scriptMask |= SCRIPT_HEBREW;
1735 }else if( c>=0x0600 && c<=0x06ff ){ 1760 }else if( c>=0x0600 && c<=0x06ff ){
1736 scriptMask |= SCRIPT_ARABIC; 1761 scriptMask |= SCRIPT_ARABIC;
1737 } 1762 }
1738 } 1763 }
1764 if( scriptMask==0 && seenDigit ) scriptMask = SCRIPT_LATIN;
1739 switch( scriptMask ){ 1765 switch( scriptMask ){
1740 case 0: res = 999; break; 1766 case 0: res = 999; break;
1741 case SCRIPT_LATIN: res = 215; break; 1767 case SCRIPT_LATIN: res = 215; break;
1742 case SCRIPT_CYRILLIC: res = 220; break; 1768 case SCRIPT_CYRILLIC: res = 220; break;
1743 case SCRIPT_GREEK: res = 200; break; 1769 case SCRIPT_GREEK: res = 200; break;
1744 case SCRIPT_HEBREW: res = 125; break; 1770 case SCRIPT_HEBREW: res = 125; break;
1745 case SCRIPT_ARABIC: res = 160; break; 1771 case SCRIPT_ARABIC: res = 160; break;
1746 default: res = 998; break; 1772 default: res = 998; break;
1747 } 1773 }
1748 sqlite3_result_int(context, res); 1774 sqlite3_result_int(context, res);
1749 } 1775 }
1750 1776
1751 /* End transliterate 1777 /* End transliterate
1752 ****************************************************************************** 1778 ******************************************************************************
1753 ****************************************************************************** 1779 ******************************************************************************
1754 ** Begin spellfix1 virtual table. 1780 ** Begin spellfix1 virtual table.
1755 */ 1781 */
1756 1782
1757 /* Maximum length of a phonehash used for querying the shadow table */ 1783 /* Maximum length of a phonehash used for querying the shadow table */
1758 #define SPELLFIX_MX_HASH 8 1784 #define SPELLFIX_MX_HASH 32
1759 1785
1760 /* Maximum number of hash strings to examine per query */ 1786 /* Maximum number of hash strings to examine per query */
1761 #define SPELLFIX_MX_RUN 1 1787 #define SPELLFIX_MX_RUN 1
1762 1788
1763 typedef struct spellfix1_vtab spellfix1_vtab; 1789 typedef struct spellfix1_vtab spellfix1_vtab;
1764 typedef struct spellfix1_cursor spellfix1_cursor; 1790 typedef struct spellfix1_cursor spellfix1_cursor;
1765 1791
1766 /* Fuzzy-search virtual table object */ 1792 /* Fuzzy-search virtual table object */
1767 struct spellfix1_vtab { 1793 struct spellfix1_vtab {
1768 sqlite3_vtab base; /* Base class - must be first */ 1794 sqlite3_vtab base; /* Base class - must be first */
(...skipping 134 matching lines...) Expand 10 before | Expand all | Expand 10 after
1903 ){ 1929 ){
1904 spellfix1_vtab *pNew = 0; 1930 spellfix1_vtab *pNew = 0;
1905 /* const char *zModule = argv[0]; // not used */ 1931 /* const char *zModule = argv[0]; // not used */
1906 const char *zDbName = argv[1]; 1932 const char *zDbName = argv[1];
1907 const char *zTableName = argv[2]; 1933 const char *zTableName = argv[2];
1908 int nDbName; 1934 int nDbName;
1909 int rc = SQLITE_OK; 1935 int rc = SQLITE_OK;
1910 int i; 1936 int i;
1911 1937
1912 nDbName = (int)strlen(zDbName); 1938 nDbName = (int)strlen(zDbName);
1913 pNew = sqlite3_malloc( sizeof(*pNew) + nDbName + 1); 1939 pNew = sqlite3_malloc64( sizeof(*pNew) + nDbName + 1);
1914 if( pNew==0 ){ 1940 if( pNew==0 ){
1915 rc = SQLITE_NOMEM; 1941 rc = SQLITE_NOMEM;
1916 }else{ 1942 }else{
1917 memset(pNew, 0, sizeof(*pNew)); 1943 memset(pNew, 0, sizeof(*pNew));
1918 pNew->zDbName = (char*)&pNew[1]; 1944 pNew->zDbName = (char*)&pNew[1];
1919 memcpy(pNew->zDbName, zDbName, nDbName+1); 1945 memcpy(pNew->zDbName, zDbName, nDbName+1);
1920 pNew->zTableName = sqlite3_mprintf("%s", zTableName); 1946 pNew->zTableName = sqlite3_mprintf("%s", zTableName);
1921 pNew->db = db; 1947 pNew->db = db;
1922 if( pNew->zTableName==0 ){ 1948 if( pNew->zTableName==0 ){
1923 rc = SQLITE_NOMEM; 1949 rc = SQLITE_NOMEM;
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
2017 pCur->pFullScan = 0; 2043 pCur->pFullScan = 0;
2018 } 2044 }
2019 } 2045 }
2020 2046
2021 /* 2047 /*
2022 ** Resize the cursor to hold up to N rows of content 2048 ** Resize the cursor to hold up to N rows of content
2023 */ 2049 */
2024 static void spellfix1ResizeCursor(spellfix1_cursor *pCur, int N){ 2050 static void spellfix1ResizeCursor(spellfix1_cursor *pCur, int N){
2025 struct spellfix1_row *aNew; 2051 struct spellfix1_row *aNew;
2026 assert( N>=pCur->nRow ); 2052 assert( N>=pCur->nRow );
2027 aNew = sqlite3_realloc(pCur->a, sizeof(pCur->a[0])*N); 2053 aNew = sqlite3_realloc64(pCur->a, sizeof(pCur->a[0])*N);
2028 if( aNew==0 && N>0 ){ 2054 if( aNew==0 && N>0 ){
2029 spellfix1ResetCursor(pCur); 2055 spellfix1ResetCursor(pCur);
2030 sqlite3_free(pCur->a); 2056 sqlite3_free(pCur->a);
2031 pCur->nAlloc = 0; 2057 pCur->nAlloc = 0;
2032 pCur->a = 0; 2058 pCur->a = 0;
2033 }else{ 2059 }else{
2034 pCur->nAlloc = N; 2060 pCur->nAlloc = N;
2035 pCur->a = aNew; 2061 pCur->a = aNew;
2036 } 2062 }
2037 } 2063 }
(...skipping 138 matching lines...) Expand 10 before | Expand all | Expand 10 after
2176 } 2202 }
2177 return SQLITE_OK; 2203 return SQLITE_OK;
2178 } 2204 }
2179 2205
2180 /* 2206 /*
2181 ** Open a new fuzzy-search cursor. 2207 ** Open a new fuzzy-search cursor.
2182 */ 2208 */
2183 static int spellfix1Open(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ 2209 static int spellfix1Open(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
2184 spellfix1_vtab *p = (spellfix1_vtab*)pVTab; 2210 spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2185 spellfix1_cursor *pCur; 2211 spellfix1_cursor *pCur;
2186 pCur = sqlite3_malloc( sizeof(*pCur) ); 2212 pCur = sqlite3_malloc64( sizeof(*pCur) );
2187 if( pCur==0 ) return SQLITE_NOMEM; 2213 if( pCur==0 ) return SQLITE_NOMEM;
2188 memset(pCur, 0, sizeof(*pCur)); 2214 memset(pCur, 0, sizeof(*pCur));
2189 pCur->pVTab = p; 2215 pCur->pVTab = p;
2190 *ppCursor = &pCur->base; 2216 *ppCursor = &pCur->base;
2191 return SQLITE_OK; 2217 return SQLITE_OK;
2192 } 2218 }
2193 2219
2194 /* 2220 /*
2195 ** Adjust a distance measurement by the words rank in order to show 2221 ** Adjust a distance measurement by the words rank in order to show
2196 ** preference to common words. 2222 ** preference to common words.
2197 */ 2223 */
2198 static int spellfix1Score(int iDistance, int iRank){ 2224 static int spellfix1Score(int iDistance, int iRank){
2199 int iLog2; 2225 int iLog2;
2200 for(iLog2=0; iRank>0; iLog2++, iRank>>=1){} 2226 for(iLog2=0; iRank>0; iLog2++, iRank>>=1){}
2201 return iDistance + 32 - iLog2; 2227 return iDistance + 32 - iLog2;
2202 } 2228 }
2203 2229
2204 /* 2230 /*
2205 ** Compare two spellfix1_row objects for sorting purposes in qsort() such 2231 ** Compare two spellfix1_row objects for sorting purposes in qsort() such
2206 ** that they sort in order of increasing distance. 2232 ** that they sort in order of increasing distance.
2207 */ 2233 */
2208 static int spellfix1RowCompare(const void *A, const void *B){ 2234 static int SQLITE_CDECL spellfix1RowCompare(const void *A, const void *B){
2209 const struct spellfix1_row *a = (const struct spellfix1_row*)A; 2235 const struct spellfix1_row *a = (const struct spellfix1_row*)A;
2210 const struct spellfix1_row *b = (const struct spellfix1_row*)B; 2236 const struct spellfix1_row *b = (const struct spellfix1_row*)B;
2211 return a->iScore - b->iScore; 2237 return a->iScore - b->iScore;
2212 } 2238 }
2213 2239
2214 /* 2240 /*
2215 ** A structure used to pass information from spellfix1FilterForMatch() 2241 ** A structure used to pass information from spellfix1FilterForMatch()
2216 ** into spellfix1RunQuery(). 2242 ** into spellfix1RunQuery().
2217 */ 2243 */
2218 typedef struct MatchQuery { 2244 typedef struct MatchQuery {
(...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after
2390 int iLang = 0; /* Language code */ 2416 int iLang = 0; /* Language code */
2391 char *zSql; /* SQL of shadow table query */ 2417 char *zSql; /* SQL of shadow table query */
2392 sqlite3_stmt *pStmt = 0; /* Shadow table query */ 2418 sqlite3_stmt *pStmt = 0; /* Shadow table query */
2393 int rc; /* Result code */ 2419 int rc; /* Result code */
2394 int idx = 1; /* Next available filter parameter */ 2420 int idx = 1; /* Next available filter parameter */
2395 spellfix1_vtab *p = pCur->pVTab; /* The virtual table that owns pCur */ 2421 spellfix1_vtab *p = pCur->pVTab; /* The virtual table that owns pCur */
2396 MatchQuery x; /* For passing info to RunQuery() */ 2422 MatchQuery x; /* For passing info to RunQuery() */
2397 2423
2398 /* Load the cost table if we have not already done so */ 2424 /* Load the cost table if we have not already done so */
2399 if( p->zCostTable!=0 && p->pConfig3==0 ){ 2425 if( p->zCostTable!=0 && p->pConfig3==0 ){
2400 p->pConfig3 = sqlite3_malloc( sizeof(p->pConfig3[0]) ); 2426 p->pConfig3 = sqlite3_malloc64( sizeof(p->pConfig3[0]) );
2401 if( p->pConfig3==0 ) return SQLITE_NOMEM; 2427 if( p->pConfig3==0 ) return SQLITE_NOMEM;
2402 memset(p->pConfig3, 0, sizeof(p->pConfig3[0])); 2428 memset(p->pConfig3, 0, sizeof(p->pConfig3[0]));
2403 rc = editDist3ConfigLoad(p->pConfig3, p->db, p->zCostTable); 2429 rc = editDist3ConfigLoad(p->pConfig3, p->db, p->zCostTable);
2404 if( rc ) return rc; 2430 if( rc ) return rc;
2405 } 2431 }
2406 memset(&x, 0, sizeof(x)); 2432 memset(&x, 0, sizeof(x));
2407 x.iScope = 3; /* Default scope if none specified by "WHERE scope=N" */ 2433 x.iScope = 3; /* Default scope if none specified by "WHERE scope=N" */
2408 x.iMaxDist = -1; /* Maximum allowed edit distance */ 2434 x.iMaxDist = -1; /* Maximum allowed edit distance */
2409 2435
2410 if( idxNum&2 ){ 2436 if( idxNum&2 ){
(...skipping 499 matching lines...) Expand 10 before | Expand all | Expand 10 after
2910 sqlite3 *db, 2936 sqlite3 *db,
2911 char **pzErrMsg, 2937 char **pzErrMsg,
2912 const sqlite3_api_routines *pApi 2938 const sqlite3_api_routines *pApi
2913 ){ 2939 ){
2914 SQLITE_EXTENSION_INIT2(pApi); 2940 SQLITE_EXTENSION_INIT2(pApi);
2915 #ifndef SQLITE_OMIT_VIRTUALTABLE 2941 #ifndef SQLITE_OMIT_VIRTUALTABLE
2916 return spellfix1Register(db); 2942 return spellfix1Register(db);
2917 #endif 2943 #endif
2918 return SQLITE_OK; 2944 return SQLITE_OK;
2919 } 2945 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698