OLD | NEW |
| (Empty) |
1 /* | |
2 ** 2012 April 10 | |
3 ** | |
4 ** The author disclaims copyright to this source code. In place of | |
5 ** a legal notice, here is a blessing: | |
6 ** | |
7 ** May you do good and not evil. | |
8 ** May you find forgiveness for yourself and forgive others. | |
9 ** May you share freely, never taking more than you give. | |
10 ** | |
11 ************************************************************************* | |
12 ** | |
13 ** This module implements the spellfix1 VIRTUAL TABLE that can be used | |
14 ** to search a large vocabulary for close matches. See separate | |
15 ** documentation (http://www.sqlite.org/spellfix1.html) for details. | |
16 */ | |
17 #include "sqlite3ext.h" | |
18 SQLITE_EXTENSION_INIT1 | |
19 | |
20 #ifndef SQLITE_AMALGAMATION | |
21 # include <string.h> | |
22 # include <stdio.h> | |
23 # include <stdlib.h> | |
24 # include <assert.h> | |
25 # define ALWAYS(X) 1 | |
26 # define NEVER(X) 0 | |
27 typedef unsigned char u8; | |
28 typedef unsigned short u16; | |
29 #endif | |
30 #include <ctype.h> | |
31 | |
32 #ifndef SQLITE_OMIT_VIRTUALTABLE | |
33 | |
34 /* | |
35 ** Character classes for ASCII characters: | |
36 ** | |
37 ** 0 '' Silent letters: H W | |
38 ** 1 'A' Any vowel: A E I O U (Y) | |
39 ** 2 'B' A bilabeal stop or fricative: B F P V W | |
40 ** 3 'C' Other fricatives or back stops: C G J K Q S X Z | |
41 ** 4 'D' Alveolar stops: D T | |
42 ** 5 'H' Letter H at the beginning of a word | |
43 ** 6 'L' Glide: L | |
44 ** 7 'R' Semivowel: R | |
45 ** 8 'M' Nasals: M N | |
46 ** 9 'Y' Letter Y at the beginning of a word. | |
47 ** 10 '9' Digits: 0 1 2 3 4 5 6 7 8 9 | |
48 ** 11 ' ' White space | |
49 ** 12 '?' Other. | |
50 */ | |
51 #define CCLASS_SILENT 0 | |
52 #define CCLASS_VOWEL 1 | |
53 #define CCLASS_B 2 | |
54 #define CCLASS_C 3 | |
55 #define CCLASS_D 4 | |
56 #define CCLASS_H 5 | |
57 #define CCLASS_L 6 | |
58 #define CCLASS_R 7 | |
59 #define CCLASS_M 8 | |
60 #define CCLASS_Y 9 | |
61 #define CCLASS_DIGIT 10 | |
62 #define CCLASS_SPACE 11 | |
63 #define CCLASS_OTHER 12 | |
64 | |
65 /* | |
66 ** The following table gives the character class for non-initial ASCII | |
67 ** characters. | |
68 */ | |
69 static const unsigned char midClass[] = { | |
70 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, | |
71 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, | |
72 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, | |
73 /* */ CCLASS_SPACE, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, | |
74 /* */ CCLASS_SPACE, /* */ CCLASS_SPACE, /* */ CCLASS_OTHER, | |
75 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, | |
76 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, | |
77 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, | |
78 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, | |
79 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, | |
80 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_SPACE, | |
81 /* ! */ CCLASS_OTHER, /* " */ CCLASS_OTHER, /* # */ CCLASS_OTHER, | |
82 /* $ */ CCLASS_OTHER, /* % */ CCLASS_OTHER, /* & */ CCLASS_OTHER, | |
83 /* ' */ CCLASS_SILENT, /* ( */ CCLASS_OTHER, /* ) */ CCLASS_OTHER, | |
84 /* * */ CCLASS_OTHER, /* + */ CCLASS_OTHER, /* , */ CCLASS_OTHER, | |
85 /* - */ CCLASS_OTHER, /* . */ CCLASS_OTHER, /* / */ CCLASS_OTHER, | |
86 /* 0 */ CCLASS_DIGIT, /* 1 */ CCLASS_DIGIT, /* 2 */ CCLASS_DIGIT, | |
87 /* 3 */ CCLASS_DIGIT, /* 4 */ CCLASS_DIGIT, /* 5 */ CCLASS_DIGIT, | |
88 /* 6 */ CCLASS_DIGIT, /* 7 */ CCLASS_DIGIT, /* 8 */ CCLASS_DIGIT, | |
89 /* 9 */ CCLASS_DIGIT, /* : */ CCLASS_OTHER, /* ; */ CCLASS_OTHER, | |
90 /* < */ CCLASS_OTHER, /* = */ CCLASS_OTHER, /* > */ CCLASS_OTHER, | |
91 /* ? */ CCLASS_OTHER, /* @ */ CCLASS_OTHER, /* A */ CCLASS_VOWEL, | |
92 /* B */ CCLASS_B, /* C */ CCLASS_C, /* D */ CCLASS_D, | |
93 /* E */ CCLASS_VOWEL, /* F */ CCLASS_B, /* G */ CCLASS_C, | |
94 /* H */ CCLASS_SILENT, /* I */ CCLASS_VOWEL, /* J */ CCLASS_C, | |
95 /* K */ CCLASS_C, /* L */ CCLASS_L, /* M */ CCLASS_M, | |
96 /* N */ CCLASS_M, /* O */ CCLASS_VOWEL, /* P */ CCLASS_B, | |
97 /* Q */ CCLASS_C, /* R */ CCLASS_R, /* S */ CCLASS_C, | |
98 /* T */ CCLASS_D, /* U */ CCLASS_VOWEL, /* V */ CCLASS_B, | |
99 /* W */ CCLASS_B, /* X */ CCLASS_C, /* Y */ CCLASS_VOWEL, | |
100 /* Z */ CCLASS_C, /* [ */ CCLASS_OTHER, /* \ */ CCLASS_OTHER, | |
101 /* ] */ CCLASS_OTHER, /* ^ */ CCLASS_OTHER, /* _ */ CCLASS_OTHER, | |
102 /* ` */ CCLASS_OTHER, /* a */ CCLASS_VOWEL, /* b */ CCLASS_B, | |
103 /* c */ CCLASS_C, /* d */ CCLASS_D, /* e */ CCLASS_VOWEL, | |
104 /* f */ CCLASS_B, /* g */ CCLASS_C, /* h */ CCLASS_SILENT, | |
105 /* i */ CCLASS_VOWEL, /* j */ CCLASS_C, /* k */ CCLASS_C, | |
106 /* l */ CCLASS_L, /* m */ CCLASS_M, /* n */ CCLASS_M, | |
107 /* o */ CCLASS_VOWEL, /* p */ CCLASS_B, /* q */ CCLASS_C, | |
108 /* r */ CCLASS_R, /* s */ CCLASS_C, /* t */ CCLASS_D, | |
109 /* u */ CCLASS_VOWEL, /* v */ CCLASS_B, /* w */ CCLASS_B, | |
110 /* x */ CCLASS_C, /* y */ CCLASS_VOWEL, /* z */ CCLASS_C, | |
111 /* { */ CCLASS_OTHER, /* | */ CCLASS_OTHER, /* } */ CCLASS_OTHER, | |
112 /* ~ */ CCLASS_OTHER, /* */ CCLASS_OTHER, | |
113 }; | |
114 /* | |
115 ** This tables gives the character class for ASCII characters that form the | |
116 ** initial character of a word. The only difference from midClass is with | |
117 ** the letters H, W, and Y. | |
118 */ | |
119 static const unsigned char initClass[] = { | |
120 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, | |
121 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, | |
122 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, | |
123 /* */ CCLASS_SPACE, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, | |
124 /* */ CCLASS_SPACE, /* */ CCLASS_SPACE, /* */ CCLASS_OTHER, | |
125 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, | |
126 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, | |
127 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, | |
128 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, | |
129 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, | |
130 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_SPACE, | |
131 /* ! */ CCLASS_OTHER, /* " */ CCLASS_OTHER, /* # */ CCLASS_OTHER, | |
132 /* $ */ CCLASS_OTHER, /* % */ CCLASS_OTHER, /* & */ CCLASS_OTHER, | |
133 /* ' */ CCLASS_OTHER, /* ( */ CCLASS_OTHER, /* ) */ CCLASS_OTHER, | |
134 /* * */ CCLASS_OTHER, /* + */ CCLASS_OTHER, /* , */ CCLASS_OTHER, | |
135 /* - */ CCLASS_OTHER, /* . */ CCLASS_OTHER, /* / */ CCLASS_OTHER, | |
136 /* 0 */ CCLASS_DIGIT, /* 1 */ CCLASS_DIGIT, /* 2 */ CCLASS_DIGIT, | |
137 /* 3 */ CCLASS_DIGIT, /* 4 */ CCLASS_DIGIT, /* 5 */ CCLASS_DIGIT, | |
138 /* 6 */ CCLASS_DIGIT, /* 7 */ CCLASS_DIGIT, /* 8 */ CCLASS_DIGIT, | |
139 /* 9 */ CCLASS_DIGIT, /* : */ CCLASS_OTHER, /* ; */ CCLASS_OTHER, | |
140 /* < */ CCLASS_OTHER, /* = */ CCLASS_OTHER, /* > */ CCLASS_OTHER, | |
141 /* ? */ CCLASS_OTHER, /* @ */ CCLASS_OTHER, /* A */ CCLASS_VOWEL, | |
142 /* B */ CCLASS_B, /* C */ CCLASS_C, /* D */ CCLASS_D, | |
143 /* E */ CCLASS_VOWEL, /* F */ CCLASS_B, /* G */ CCLASS_C, | |
144 /* H */ CCLASS_SILENT, /* I */ CCLASS_VOWEL, /* J */ CCLASS_C, | |
145 /* K */ CCLASS_C, /* L */ CCLASS_L, /* M */ CCLASS_M, | |
146 /* N */ CCLASS_M, /* O */ CCLASS_VOWEL, /* P */ CCLASS_B, | |
147 /* Q */ CCLASS_C, /* R */ CCLASS_R, /* S */ CCLASS_C, | |
148 /* T */ CCLASS_D, /* U */ CCLASS_VOWEL, /* V */ CCLASS_B, | |
149 /* W */ CCLASS_B, /* X */ CCLASS_C, /* Y */ CCLASS_Y, | |
150 /* Z */ CCLASS_C, /* [ */ CCLASS_OTHER, /* \ */ CCLASS_OTHER, | |
151 /* ] */ CCLASS_OTHER, /* ^ */ CCLASS_OTHER, /* _ */ CCLASS_OTHER, | |
152 /* ` */ CCLASS_OTHER, /* a */ CCLASS_VOWEL, /* b */ CCLASS_B, | |
153 /* c */ CCLASS_C, /* d */ CCLASS_D, /* e */ CCLASS_VOWEL, | |
154 /* f */ CCLASS_B, /* g */ CCLASS_C, /* h */ CCLASS_SILENT, | |
155 /* i */ CCLASS_VOWEL, /* j */ CCLASS_C, /* k */ CCLASS_C, | |
156 /* l */ CCLASS_L, /* m */ CCLASS_M, /* n */ CCLASS_M, | |
157 /* o */ CCLASS_VOWEL, /* p */ CCLASS_B, /* q */ CCLASS_C, | |
158 /* r */ CCLASS_R, /* s */ CCLASS_C, /* t */ CCLASS_D, | |
159 /* u */ CCLASS_VOWEL, /* v */ CCLASS_B, /* w */ CCLASS_B, | |
160 /* x */ CCLASS_C, /* y */ CCLASS_Y, /* z */ CCLASS_C, | |
161 /* { */ CCLASS_OTHER, /* | */ CCLASS_OTHER, /* } */ CCLASS_OTHER, | |
162 /* ~ */ CCLASS_OTHER, /* */ CCLASS_OTHER, | |
163 }; | |
164 | |
165 /* | |
166 ** Mapping from the character class number (0-13) to a symbol for each | |
167 ** character class. Note that initClass[] can be used to map the class | |
168 ** symbol back into the class number. | |
169 */ | |
170 static const unsigned char className[] = ".ABCDHLRMY9 ?"; | |
171 | |
172 /* | |
173 ** Generate a "phonetic hash" from a string of ASCII characters | |
174 ** in zIn[0..nIn-1]. | |
175 ** | |
176 ** * Map characters by character class as defined above. | |
177 ** * Omit double-letters | |
178 ** * Omit vowels beside R and L | |
179 ** * Omit T when followed by CH | |
180 ** * Omit W when followed by R | |
181 ** * Omit D when followed by J or G | |
182 ** * Omit K in KN or G in GN at the beginning of a word | |
183 ** | |
184 ** Space to hold the result is obtained from sqlite3_malloc() | |
185 ** | |
186 ** Return NULL if memory allocation fails. | |
187 */ | |
188 static unsigned char *phoneticHash(const unsigned char *zIn, int nIn){ | |
189 unsigned char *zOut = sqlite3_malloc( nIn + 1 ); | |
190 int i; | |
191 int nOut = 0; | |
192 char cPrev = 0x77; | |
193 char cPrevX = 0x77; | |
194 const unsigned char *aClass = initClass; | |
195 | |
196 if( zOut==0 ) return 0; | |
197 if( nIn>2 ){ | |
198 switch( zIn[0] ){ | |
199 case 'g': | |
200 case 'k': { | |
201 if( zIn[1]=='n' ){ zIn++; nIn--; } | |
202 break; | |
203 } | |
204 } | |
205 } | |
206 for(i=0; i<nIn; i++){ | |
207 unsigned char c = zIn[i]; | |
208 if( i+1<nIn ){ | |
209 if( c=='w' && zIn[i+1]=='r' ) continue; | |
210 if( c=='d' && (zIn[i+1]=='j' || zIn[i+1]=='g') ) continue; | |
211 if( i+2<nIn ){ | |
212 if( c=='t' && zIn[i+1]=='c' && zIn[i+2]=='h' ) continue; | |
213 } | |
214 } | |
215 c = aClass[c&0x7f]; | |
216 if( c==CCLASS_SPACE ) continue; | |
217 if( c==CCLASS_OTHER && cPrev!=CCLASS_DIGIT ) continue; | |
218 aClass = midClass; | |
219 if( c==CCLASS_VOWEL && (cPrevX==CCLASS_R || cPrevX==CCLASS_L) ){ | |
220 continue; /* No vowels beside L or R */ | |
221 } | |
222 if( (c==CCLASS_R || c==CCLASS_L) && cPrevX==CCLASS_VOWEL ){ | |
223 nOut--; /* No vowels beside L or R */ | |
224 } | |
225 cPrev = c; | |
226 if( c==CCLASS_SILENT ) continue; | |
227 cPrevX = c; | |
228 c = className[c]; | |
229 assert( nOut>=0 ); | |
230 if( nOut==0 || c!=zOut[nOut-1] ) zOut[nOut++] = c; | |
231 } | |
232 zOut[nOut] = 0; | |
233 return zOut; | |
234 } | |
235 | |
236 /* | |
237 ** This is an SQL function wrapper around phoneticHash(). See | |
238 ** the description of phoneticHash() for additional information. | |
239 */ | |
240 static void phoneticHashSqlFunc( | |
241 sqlite3_context *context, | |
242 int argc, | |
243 sqlite3_value **argv | |
244 ){ | |
245 const unsigned char *zIn; | |
246 unsigned char *zOut; | |
247 | |
248 zIn = sqlite3_value_text(argv[0]); | |
249 if( zIn==0 ) return; | |
250 zOut = phoneticHash(zIn, sqlite3_value_bytes(argv[0])); | |
251 if( zOut==0 ){ | |
252 sqlite3_result_error_nomem(context); | |
253 }else{ | |
254 sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free); | |
255 } | |
256 } | |
257 | |
258 /* | |
259 ** Return the character class number for a character given its | |
260 ** context. | |
261 */ | |
262 static char characterClass(char cPrev, char c){ | |
263 return cPrev==0 ? initClass[c&0x7f] : midClass[c&0x7f]; | |
264 } | |
265 | |
266 /* | |
267 ** Return the cost of inserting or deleting character c immediately | |
268 ** following character cPrev. If cPrev==0, that means c is the first | |
269 ** character of the word. | |
270 */ | |
271 static int insertOrDeleteCost(char cPrev, char c, char cNext){ | |
272 char classC = characterClass(cPrev, c); | |
273 char classCprev; | |
274 | |
275 if( classC==CCLASS_SILENT ){ | |
276 /* Insert or delete "silent" characters such as H or W */ | |
277 return 1; | |
278 } | |
279 if( cPrev==c ){ | |
280 /* Repeated characters, or miss a repeat */ | |
281 return 10; | |
282 } | |
283 if( classC==CCLASS_VOWEL && (cPrev=='r' || cNext=='r') ){ | |
284 return 20; /* Insert a vowel before or after 'r' */ | |
285 } | |
286 classCprev = characterClass(cPrev, cPrev); | |
287 if( classC==classCprev ){ | |
288 if( classC==CCLASS_VOWEL ){ | |
289 /* Remove or add a new vowel to a vowel cluster */ | |
290 return 15; | |
291 }else{ | |
292 /* Remove or add a consonant not in the same class */ | |
293 return 50; | |
294 } | |
295 } | |
296 | |
297 /* any other character insertion or deletion */ | |
298 return 100; | |
299 } | |
300 | |
301 /* | |
302 ** Divide the insertion cost by this factor when appending to the | |
303 ** end of the word. | |
304 */ | |
305 #define FINAL_INS_COST_DIV 4 | |
306 | |
307 /* | |
308 ** Return the cost of substituting cTo in place of cFrom assuming | |
309 ** the previous character is cPrev. If cPrev==0 then cTo is the first | |
310 ** character of the word. | |
311 */ | |
312 static int substituteCost(char cPrev, char cFrom, char cTo){ | |
313 char classFrom, classTo; | |
314 if( cFrom==cTo ){ | |
315 /* Exact match */ | |
316 return 0; | |
317 } | |
318 if( cFrom==(cTo^0x20) && ((cTo>='A' && cTo<='Z') || (cTo>='a' && cTo<='z')) ){ | |
319 /* differ only in case */ | |
320 return 0; | |
321 } | |
322 classFrom = characterClass(cPrev, cFrom); | |
323 classTo = characterClass(cPrev, cTo); | |
324 if( classFrom==classTo ){ | |
325 /* Same character class */ | |
326 return 40; | |
327 } | |
328 if( classFrom>=CCLASS_B && classFrom<=CCLASS_Y | |
329 && classTo>=CCLASS_B && classTo<=CCLASS_Y ){ | |
330 /* Convert from one consonant to another, but in a different class */ | |
331 return 75; | |
332 } | |
333 /* Any other subsitution */ | |
334 return 100; | |
335 } | |
336 | |
337 /* | |
338 ** Given two strings zA and zB which are pure ASCII, return the cost | |
339 ** of transforming zA into zB. If zA ends with '*' assume that it is | |
340 ** a prefix of zB and give only minimal penalty for extra characters | |
341 ** on the end of zB. | |
342 ** | |
343 ** Smaller numbers mean a closer match. | |
344 ** | |
345 ** Negative values indicate an error: | |
346 ** -1 One of the inputs is NULL | |
347 ** -2 Non-ASCII characters on input | |
348 ** -3 Unable to allocate memory | |
349 ** | |
350 ** If pnMatch is not NULL, then *pnMatch is set to the number of bytes | |
351 ** of zB that matched the pattern in zA. If zA does not end with a '*', | |
352 ** then this value is always the number of bytes in zB (i.e. strlen(zB)). | |
353 ** If zA does end in a '*', then it is the number of bytes in the prefix | |
354 ** of zB that was deemed to match zA. | |
355 */ | |
356 static int editdist1(const char *zA, const char *zB, int *pnMatch){ | |
357 int nA, nB; /* Number of characters in zA[] and zB[] */ | |
358 int xA, xB; /* Loop counters for zA[] and zB[] */ | |
359 char cA, cB; /* Current character of zA and zB */ | |
360 char cAprev, cBprev; /* Previous character of zA and zB */ | |
361 char cAnext, cBnext; /* Next character in zA and zB */ | |
362 int d; /* North-west cost value */ | |
363 int dc = 0; /* North-west character value */ | |
364 int res; /* Final result */ | |
365 int *m; /* The cost matrix */ | |
366 char *cx; /* Corresponding character values */ | |
367 int *toFree = 0; /* Malloced space */ | |
368 int mStack[60+15]; /* Stack space to use if not too much is needed */ | |
369 int nMatch = 0; | |
370 | |
371 /* Early out if either input is NULL */ | |
372 if( zA==0 || zB==0 ) return -1; | |
373 | |
374 /* Skip any common prefix */ | |
375 while( zA[0] && zA[0]==zB[0] ){ dc = zA[0]; zA++; zB++; nMatch++; } | |
376 if( pnMatch ) *pnMatch = nMatch; | |
377 if( zA[0]==0 && zB[0]==0 ) return 0; | |
378 | |
379 #if 0 | |
380 printf("A=\"%s\" B=\"%s\" dc=%c\n", zA, zB, dc?dc:' '); | |
381 #endif | |
382 | |
383 /* Verify input strings and measure their lengths */ | |
384 for(nA=0; zA[nA]; nA++){ | |
385 if( zA[nA]&0x80 ) return -2; | |
386 } | |
387 for(nB=0; zB[nB]; nB++){ | |
388 if( zB[nB]&0x80 ) return -2; | |
389 } | |
390 | |
391 /* Special processing if either string is empty */ | |
392 if( nA==0 ){ | |
393 cBprev = dc; | |
394 for(xB=res=0; (cB = zB[xB])!=0; xB++){ | |
395 res += insertOrDeleteCost(cBprev, cB, zB[xB+1])/FINAL_INS_COST_DIV; | |
396 cBprev = cB; | |
397 } | |
398 return res; | |
399 } | |
400 if( nB==0 ){ | |
401 cAprev = dc; | |
402 for(xA=res=0; (cA = zA[xA])!=0; xA++){ | |
403 res += insertOrDeleteCost(cAprev, cA, zA[xA+1]); | |
404 cAprev = cA; | |
405 } | |
406 return res; | |
407 } | |
408 | |
409 /* A is a prefix of B */ | |
410 if( zA[0]=='*' && zA[1]==0 ) return 0; | |
411 | |
412 /* Allocate and initialize the Wagner matrix */ | |
413 if( nB<(sizeof(mStack)*4)/(sizeof(mStack[0])*5) ){ | |
414 m = mStack; | |
415 }else{ | |
416 m = toFree = sqlite3_malloc( (nB+1)*5*sizeof(m[0])/4 ); | |
417 if( m==0 ) return -3; | |
418 } | |
419 cx = (char*)&m[nB+1]; | |
420 | |
421 /* Compute the Wagner edit distance */ | |
422 m[0] = 0; | |
423 cx[0] = dc; | |
424 cBprev = dc; | |
425 for(xB=1; xB<=nB; xB++){ | |
426 cBnext = zB[xB]; | |
427 cB = zB[xB-1]; | |
428 cx[xB] = cB; | |
429 m[xB] = m[xB-1] + insertOrDeleteCost(cBprev, cB, cBnext); | |
430 cBprev = cB; | |
431 } | |
432 cAprev = dc; | |
433 for(xA=1; xA<=nA; xA++){ | |
434 int lastA = (xA==nA); | |
435 cA = zA[xA-1]; | |
436 cAnext = zA[xA]; | |
437 if( cA=='*' && lastA ) break; | |
438 d = m[0]; | |
439 dc = cx[0]; | |
440 m[0] = d + insertOrDeleteCost(cAprev, cA, cAnext); | |
441 cBprev = 0; | |
442 for(xB=1; xB<=nB; xB++){ | |
443 int totalCost, insCost, delCost, subCost, ncx; | |
444 cB = zB[xB-1]; | |
445 cBnext = zB[xB]; | |
446 | |
447 /* Cost to insert cB */ | |
448 insCost = insertOrDeleteCost(cx[xB-1], cB, cBnext); | |
449 if( lastA ) insCost /= FINAL_INS_COST_DIV; | |
450 | |
451 /* Cost to delete cA */ | |
452 delCost = insertOrDeleteCost(cx[xB], cA, cBnext); | |
453 | |
454 /* Cost to substitute cA->cB */ | |
455 subCost = substituteCost(cx[xB-1], cA, cB); | |
456 | |
457 /* Best cost */ | |
458 totalCost = insCost + m[xB-1]; | |
459 ncx = cB; | |
460 if( (delCost + m[xB])<totalCost ){ | |
461 totalCost = delCost + m[xB]; | |
462 ncx = cA; | |
463 } | |
464 if( (subCost + d)<totalCost ){ | |
465 totalCost = subCost + d; | |
466 } | |
467 | |
468 #if 0 | |
469 printf("%d,%d d=%4d u=%4d r=%4d dc=%c cA=%c cB=%c" | |
470 " ins=%4d del=%4d sub=%4d t=%4d ncx=%c\n", | |
471 xA, xB, d, m[xB], m[xB-1], dc?dc:' ', cA, cB, | |
472 insCost, delCost, subCost, totalCost, ncx?ncx:' '); | |
473 #endif | |
474 | |
475 /* Update the matrix */ | |
476 d = m[xB]; | |
477 dc = cx[xB]; | |
478 m[xB] = totalCost; | |
479 cx[xB] = ncx; | |
480 cBprev = cB; | |
481 } | |
482 cAprev = cA; | |
483 } | |
484 | |
485 /* Free the wagner matrix and return the result */ | |
486 if( cA=='*' ){ | |
487 res = m[1]; | |
488 for(xB=1; xB<=nB; xB++){ | |
489 if( m[xB]<res ){ | |
490 res = m[xB]; | |
491 if( pnMatch ) *pnMatch = xB+nMatch; | |
492 } | |
493 } | |
494 }else{ | |
495 res = m[nB]; | |
496 /* In the current implementation, pnMatch is always NULL if zA does | |
497 ** not end in "*" */ | |
498 assert( pnMatch==0 ); | |
499 } | |
500 sqlite3_free(toFree); | |
501 return res; | |
502 } | |
503 | |
504 /* | |
505 ** Function: editdist(A,B) | |
506 ** | |
507 ** Return the cost of transforming string A into string B. Both strings | |
508 ** must be pure ASCII text. If A ends with '*' then it is assumed to be | |
509 ** a prefix of B and extra characters on the end of B have minimal additional | |
510 ** cost. | |
511 */ | |
512 static void editdistSqlFunc( | |
513 sqlite3_context *context, | |
514 int argc, | |
515 sqlite3_value **argv | |
516 ){ | |
517 int res = editdist1( | |
518 (const char*)sqlite3_value_text(argv[0]), | |
519 (const char*)sqlite3_value_text(argv[1]), | |
520 0); | |
521 if( res<0 ){ | |
522 if( res==(-3) ){ | |
523 sqlite3_result_error_nomem(context); | |
524 }else if( res==(-2) ){ | |
525 sqlite3_result_error(context, "non-ASCII input to editdist()", -1); | |
526 }else{ | |
527 sqlite3_result_error(context, "NULL input to editdist()", -1); | |
528 } | |
529 }else{ | |
530 sqlite3_result_int(context, res); | |
531 } | |
532 } | |
533 | |
534 /* End of the fixed-cost edit distance implementation | |
535 ****************************************************************************** | |
536 ***************************************************************************** | |
537 ** Begin: Configurable cost unicode edit distance routines | |
538 */ | |
539 /* Forward declaration of structures */ | |
540 typedef struct EditDist3Cost EditDist3Cost; | |
541 typedef struct EditDist3Config EditDist3Config; | |
542 typedef struct EditDist3Point EditDist3Point; | |
543 typedef struct EditDist3From EditDist3From; | |
544 typedef struct EditDist3FromString EditDist3FromString; | |
545 typedef struct EditDist3To EditDist3To; | |
546 typedef struct EditDist3ToString EditDist3ToString; | |
547 typedef struct EditDist3Lang EditDist3Lang; | |
548 | |
549 | |
550 /* | |
551 ** An entry in the edit cost table | |
552 */ | |
553 struct EditDist3Cost { | |
554 EditDist3Cost *pNext; /* Next cost element */ | |
555 u8 nFrom; /* Number of bytes in aFrom */ | |
556 u8 nTo; /* Number of bytes in aTo */ | |
557 u16 iCost; /* Cost of this transformation */ | |
558 char a[4] ; /* FROM string followed by TO string */ | |
559 /* Additional TO and FROM string bytes appended as necessary */ | |
560 }; | |
561 | |
562 /* | |
563 ** Edit costs for a particular language ID | |
564 */ | |
565 struct EditDist3Lang { | |
566 int iLang; /* Language ID */ | |
567 int iInsCost; /* Default insertion cost */ | |
568 int iDelCost; /* Default deletion cost */ | |
569 int iSubCost; /* Default substitution cost */ | |
570 EditDist3Cost *pCost; /* Costs */ | |
571 }; | |
572 | |
573 | |
574 /* | |
575 ** The default EditDist3Lang object, with default costs. | |
576 */ | |
577 static const EditDist3Lang editDist3Lang = { 0, 100, 100, 150, 0 }; | |
578 | |
579 /* | |
580 ** Complete configuration | |
581 */ | |
582 struct EditDist3Config { | |
583 int nLang; /* Number of language IDs. Size of a[] */ | |
584 EditDist3Lang *a; /* One for each distinct language ID */ | |
585 }; | |
586 | |
587 /* | |
588 ** Extra information about each character in the FROM string. | |
589 */ | |
590 struct EditDist3From { | |
591 int nSubst; /* Number of substitution cost entries */ | |
592 int nDel; /* Number of deletion cost entries */ | |
593 int nByte; /* Number of bytes in this character */ | |
594 EditDist3Cost **apSubst; /* Array of substitution costs for this element */ | |
595 EditDist3Cost **apDel; /* Array of deletion cost entries */ | |
596 }; | |
597 | |
598 /* | |
599 ** A precompiled FROM string. | |
600 * | |
601 ** In the common case we expect the FROM string to be reused multiple times. | |
602 ** In other words, the common case will be to measure the edit distance | |
603 ** from a single origin string to multiple target strings. | |
604 */ | |
605 struct EditDist3FromString { | |
606 char *z; /* The complete text of the FROM string */ | |
607 int n; /* Number of characters in the FROM string */ | |
608 int isPrefix; /* True if ends with '*' character */ | |
609 EditDist3From *a; /* Extra info about each char of the FROM string */ | |
610 }; | |
611 | |
612 /* | |
613 ** Extra information about each character in the TO string. | |
614 */ | |
615 struct EditDist3To { | |
616 int nIns; /* Number of insertion cost entries */ | |
617 int nByte; /* Number of bytes in this character */ | |
618 EditDist3Cost **apIns; /* Array of deletion cost entries */ | |
619 }; | |
620 | |
621 /* | |
622 ** A precompiled FROM string | |
623 */ | |
624 struct EditDist3ToString { | |
625 char *z; /* The complete text of the TO string */ | |
626 int n; /* Number of characters in the TO string */ | |
627 EditDist3To *a; /* Extra info about each char of the TO string */ | |
628 }; | |
629 | |
630 /* | |
631 ** Clear or delete an instance of the object that records all edit-distance | |
632 ** weights. | |
633 */ | |
634 static void editDist3ConfigClear(EditDist3Config *p){ | |
635 int i; | |
636 if( p==0 ) return; | |
637 for(i=0; i<p->nLang; i++){ | |
638 EditDist3Cost *pCost, *pNext; | |
639 pCost = p->a[i].pCost; | |
640 while( pCost ){ | |
641 pNext = pCost->pNext; | |
642 sqlite3_free(pCost); | |
643 pCost = pNext; | |
644 } | |
645 } | |
646 sqlite3_free(p->a); | |
647 memset(p, 0, sizeof(*p)); | |
648 } | |
649 static void editDist3ConfigDelete(void *pIn){ | |
650 EditDist3Config *p = (EditDist3Config*)pIn; | |
651 editDist3ConfigClear(p); | |
652 sqlite3_free(p); | |
653 } | |
654 | |
655 /* | |
656 ** Load all edit-distance weights from a table. | |
657 */ | |
658 static int editDist3ConfigLoad( | |
659 EditDist3Config *p, /* The edit distance configuration to load */ | |
660 sqlite3 *db, /* Load from this database */ | |
661 const char *zTable /* Name of the table from which to load */ | |
662 ){ | |
663 sqlite3_stmt *pStmt; | |
664 int rc, rc2; | |
665 char *zSql; | |
666 int iLangPrev = -9999; | |
667 EditDist3Lang *pLang = 0; | |
668 | |
669 zSql = sqlite3_mprintf("SELECT iLang, cFrom, cTo, iCost" | |
670 " FROM \"%w\" WHERE iLang>=0 ORDER BY iLang", zTable); | |
671 if( zSql==0 ) return SQLITE_NOMEM; | |
672 rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0); | |
673 sqlite3_free(zSql); | |
674 if( rc ) return rc; | |
675 editDist3ConfigClear(p); | |
676 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
677 int iLang = sqlite3_column_int(pStmt, 0); | |
678 const char *zFrom = (const char*)sqlite3_column_text(pStmt, 1); | |
679 int nFrom = zFrom ? sqlite3_column_bytes(pStmt, 1) : 0; | |
680 const char *zTo = (const char*)sqlite3_column_text(pStmt, 2); | |
681 int nTo = zTo ? sqlite3_column_bytes(pStmt, 2) : 0; | |
682 int iCost = sqlite3_column_int(pStmt, 3); | |
683 | |
684 assert( zFrom!=0 || nFrom==0 ); | |
685 assert( zTo!=0 || nTo==0 ); | |
686 if( nFrom>100 || nTo>100 ) continue; | |
687 if( iCost<0 ) continue; | |
688 if( pLang==0 || iLang!=iLangPrev ){ | |
689 EditDist3Lang *pNew; | |
690 pNew = sqlite3_realloc(p->a, (p->nLang+1)*sizeof(p->a[0])); | |
691 if( pNew==0 ){ rc = SQLITE_NOMEM; break; } | |
692 p->a = pNew; | |
693 pLang = &p->a[p->nLang]; | |
694 p->nLang++; | |
695 pLang->iLang = iLang; | |
696 pLang->iInsCost = 100; | |
697 pLang->iDelCost = 100; | |
698 pLang->iSubCost = 150; | |
699 pLang->pCost = 0; | |
700 iLangPrev = iLang; | |
701 } | |
702 if( nFrom==1 && zFrom[0]=='?' && nTo==0 ){ | |
703 pLang->iDelCost = iCost; | |
704 }else if( nFrom==0 && nTo==1 && zTo[0]=='?' ){ | |
705 pLang->iInsCost = iCost; | |
706 }else if( nFrom==1 && nTo==1 && zFrom[0]=='?' && zTo[0]=='?' ){ | |
707 pLang->iSubCost = iCost; | |
708 }else{ | |
709 EditDist3Cost *pCost; | |
710 int nExtra = nFrom + nTo - 4; | |
711 if( nExtra<0 ) nExtra = 0; | |
712 pCost = sqlite3_malloc( sizeof(*pCost) + nExtra ); | |
713 if( pCost==0 ){ rc = SQLITE_NOMEM; break; } | |
714 pCost->nFrom = nFrom; | |
715 pCost->nTo = nTo; | |
716 pCost->iCost = iCost; | |
717 memcpy(pCost->a, zFrom, nFrom); | |
718 memcpy(pCost->a + nFrom, zTo, nTo); | |
719 pCost->pNext = pLang->pCost; | |
720 pLang->pCost = pCost; | |
721 } | |
722 } | |
723 rc2 = sqlite3_finalize(pStmt); | |
724 if( rc==SQLITE_OK ) rc = rc2; | |
725 return rc; | |
726 } | |
727 | |
728 /* | |
729 ** Return the length (in bytes) of a utf-8 character. Or return a maximum | |
730 ** of N. | |
731 */ | |
732 static int utf8Len(unsigned char c, int N){ | |
733 int len = 1; | |
734 if( c>0x7f ){ | |
735 if( (c&0xe0)==0xc0 ){ | |
736 len = 2; | |
737 }else if( (c&0xf0)==0xe0 ){ | |
738 len = 3; | |
739 }else{ | |
740 len = 4; | |
741 } | |
742 } | |
743 if( len>N ) len = N; | |
744 return len; | |
745 } | |
746 | |
747 /* | |
748 ** Return TRUE (non-zero) if the To side of the given cost matches | |
749 ** the given string. | |
750 */ | |
751 static int matchTo(EditDist3Cost *p, const char *z, int n){ | |
752 if( p->nTo>n ) return 0; | |
753 if( strncmp(p->a+p->nFrom, z, p->nTo)!=0 ) return 0; | |
754 return 1; | |
755 } | |
756 | |
757 /* | |
758 ** Return TRUE (non-zero) if the From side of the given cost matches | |
759 ** the given string. | |
760 */ | |
761 static int matchFrom(EditDist3Cost *p, const char *z, int n){ | |
762 assert( p->nFrom<=n ); | |
763 if( strncmp(p->a, z, p->nFrom)!=0 ) return 0; | |
764 return 1; | |
765 } | |
766 | |
767 /* | |
768 ** Return TRUE (non-zero) of the next FROM character and the next TO | |
769 ** character are the same. | |
770 */ | |
771 static int matchFromTo( | |
772 EditDist3FromString *pStr, /* Left hand string */ | |
773 int n1, /* Index of comparison character on the left */ | |
774 const char *z2, /* Right-handl comparison character */ | |
775 int n2 /* Bytes remaining in z2[] */ | |
776 ){ | |
777 int b1 = pStr->a[n1].nByte; | |
778 if( b1>n2 ) return 0; | |
779 if( memcmp(pStr->z+n1, z2, b1)!=0 ) return 0; | |
780 return 1; | |
781 } | |
782 | |
783 /* | |
784 ** Delete an EditDist3FromString objecct | |
785 */ | |
786 static void editDist3FromStringDelete(EditDist3FromString *p){ | |
787 int i; | |
788 if( p ){ | |
789 for(i=0; i<p->n; i++){ | |
790 sqlite3_free(p->a[i].apDel); | |
791 sqlite3_free(p->a[i].apSubst); | |
792 } | |
793 sqlite3_free(p); | |
794 } | |
795 } | |
796 | |
797 /* | |
798 ** Create a EditDist3FromString object. | |
799 */ | |
800 static EditDist3FromString *editDist3FromStringNew( | |
801 const EditDist3Lang *pLang, | |
802 const char *z, | |
803 int n | |
804 ){ | |
805 EditDist3FromString *pStr; | |
806 EditDist3Cost *p; | |
807 int i; | |
808 | |
809 if( z==0 ) return 0; | |
810 if( n<0 ) n = (int)strlen(z); | |
811 pStr = sqlite3_malloc( sizeof(*pStr) + sizeof(pStr->a[0])*n + n + 1 ); | |
812 if( pStr==0 ) return 0; | |
813 pStr->a = (EditDist3From*)&pStr[1]; | |
814 memset(pStr->a, 0, sizeof(pStr->a[0])*n); | |
815 pStr->n = n; | |
816 pStr->z = (char*)&pStr->a[n]; | |
817 memcpy(pStr->z, z, n+1); | |
818 if( n && z[n-1]=='*' ){ | |
819 pStr->isPrefix = 1; | |
820 n--; | |
821 pStr->n--; | |
822 pStr->z[n] = 0; | |
823 }else{ | |
824 pStr->isPrefix = 0; | |
825 } | |
826 | |
827 for(i=0; i<n; i++){ | |
828 EditDist3From *pFrom = &pStr->a[i]; | |
829 memset(pFrom, 0, sizeof(*pFrom)); | |
830 pFrom->nByte = utf8Len((unsigned char)z[i], n-i); | |
831 for(p=pLang->pCost; p; p=p->pNext){ | |
832 EditDist3Cost **apNew; | |
833 if( i+p->nFrom>n ) continue; | |
834 if( matchFrom(p, z+i, n-i)==0 ) continue; | |
835 if( p->nTo==0 ){ | |
836 apNew = sqlite3_realloc(pFrom->apDel, | |
837 sizeof(*apNew)*(pFrom->nDel+1)); | |
838 if( apNew==0 ) break; | |
839 pFrom->apDel = apNew; | |
840 apNew[pFrom->nDel++] = p; | |
841 }else{ | |
842 apNew = sqlite3_realloc(pFrom->apSubst, | |
843 sizeof(*apNew)*(pFrom->nSubst+1)); | |
844 if( apNew==0 ) break; | |
845 pFrom->apSubst = apNew; | |
846 apNew[pFrom->nSubst++] = p; | |
847 } | |
848 } | |
849 if( p ){ | |
850 editDist3FromStringDelete(pStr); | |
851 pStr = 0; | |
852 break; | |
853 } | |
854 } | |
855 return pStr; | |
856 } | |
857 | |
858 /* | |
859 ** Update entry m[i] such that it is the minimum of its current value | |
860 ** and m[j]+iCost. | |
861 ** | |
862 ** If the iCost is 1,000,000 or greater, then consider the cost to be | |
863 ** infinite and skip the update. | |
864 */ | |
865 static void updateCost( | |
866 unsigned int *m, | |
867 int i, | |
868 int j, | |
869 int iCost | |
870 ){ | |
871 assert( iCost>=0 ); | |
872 if( iCost<10000 ){ | |
873 unsigned int b = m[j] + iCost; | |
874 if( b<m[i] ) m[i] = b; | |
875 } | |
876 } | |
877 | |
878 /* Compute the edit distance between two strings. | |
879 ** | |
880 ** If an error occurs, return a negative number which is the error code. | |
881 ** | |
882 ** If pnMatch is not NULL, then *pnMatch is set to the number of characters | |
883 ** (not bytes) in z2 that matched the search pattern in *pFrom. If pFrom does | |
884 ** not contain the pattern for a prefix-search, then this is always the number | |
885 ** of characters in z2. If pFrom does contain a prefix search pattern, then | |
886 ** it is the number of characters in the prefix of z2 that was deemed to | |
887 ** match pFrom. | |
888 */ | |
889 static int editDist3Core( | |
890 EditDist3FromString *pFrom, /* The FROM string */ | |
891 const char *z2, /* The TO string */ | |
892 int n2, /* Length of the TO string */ | |
893 const EditDist3Lang *pLang, /* Edit weights for a particular language ID */ | |
894 int *pnMatch /* OUT: Characters in matched prefix */ | |
895 ){ | |
896 int k, n; | |
897 int i1, b1; | |
898 int i2, b2; | |
899 EditDist3FromString f = *pFrom; | |
900 EditDist3To *a2; | |
901 unsigned int *m; | |
902 int szRow; | |
903 EditDist3Cost *p; | |
904 int res; | |
905 | |
906 /* allocate the Wagner matrix and the aTo[] array for the TO string */ | |
907 n = (f.n+1)*(n2+1); | |
908 n = (n+1)&~1; | |
909 m = sqlite3_malloc( n*sizeof(m[0]) + sizeof(a2[0])*n2 ); | |
910 if( m==0 ) return -1; /* Out of memory */ | |
911 a2 = (EditDist3To*)&m[n]; | |
912 memset(a2, 0, sizeof(a2[0])*n2); | |
913 | |
914 /* Fill in the a1[] matrix for all characters of the TO string */ | |
915 for(i2=0; i2<n2; i2++){ | |
916 a2[i2].nByte = utf8Len((unsigned char)z2[i2], n2-i2); | |
917 for(p=pLang->pCost; p; p=p->pNext){ | |
918 EditDist3Cost **apNew; | |
919 if( p->nFrom>0 ) continue; | |
920 if( i2+p->nTo>n2 ) continue; | |
921 if( matchTo(p, z2+i2, n2-i2)==0 ) continue; | |
922 a2[i2].nIns++; | |
923 apNew = sqlite3_realloc(a2[i2].apIns, sizeof(*apNew)*a2[i2].nIns); | |
924 if( apNew==0 ){ | |
925 res = -1; /* Out of memory */ | |
926 goto editDist3Abort; | |
927 } | |
928 a2[i2].apIns = apNew; | |
929 a2[i2].apIns[a2[i2].nIns-1] = p; | |
930 } | |
931 } | |
932 | |
933 /* Prepare to compute the minimum edit distance */ | |
934 szRow = f.n+1; | |
935 memset(m, 0x01, (n2+1)*szRow*sizeof(m[0])); | |
936 m[0] = 0; | |
937 | |
938 /* First fill in the top-row of the matrix with FROM deletion costs */ | |
939 for(i1=0; i1<f.n; i1 += b1){ | |
940 b1 = f.a[i1].nByte; | |
941 updateCost(m, i1+b1, i1, pLang->iDelCost); | |
942 for(k=0; k<f.a[i1].nDel; k++){ | |
943 p = f.a[i1].apDel[k]; | |
944 updateCost(m, i1+p->nFrom, i1, p->iCost); | |
945 } | |
946 } | |
947 | |
948 /* Fill in all subsequent rows, top-to-bottom, left-to-right */ | |
949 for(i2=0; i2<n2; i2 += b2){ | |
950 int rx; /* Starting index for current row */ | |
951 int rxp; /* Starting index for previous row */ | |
952 b2 = a2[i2].nByte; | |
953 rx = szRow*(i2+b2); | |
954 rxp = szRow*i2; | |
955 updateCost(m, rx, rxp, pLang->iInsCost); | |
956 for(k=0; k<a2[i2].nIns; k++){ | |
957 p = a2[i2].apIns[k]; | |
958 updateCost(m, szRow*(i2+p->nTo), rxp, p->iCost); | |
959 } | |
960 for(i1=0; i1<f.n; i1+=b1){ | |
961 int cx; /* Index of current cell */ | |
962 int cxp; /* Index of cell immediately to the left */ | |
963 int cxd; /* Index of cell to the left and one row above */ | |
964 int cxu; /* Index of cell immediately above */ | |
965 b1 = f.a[i1].nByte; | |
966 cxp = rx + i1; | |
967 cx = cxp + b1; | |
968 cxd = rxp + i1; | |
969 cxu = cxd + b1; | |
970 updateCost(m, cx, cxp, pLang->iDelCost); | |
971 for(k=0; k<f.a[i1].nDel; k++){ | |
972 p = f.a[i1].apDel[k]; | |
973 updateCost(m, cxp+p->nFrom, cxp, p->iCost); | |
974 } | |
975 updateCost(m, cx, cxu, pLang->iInsCost); | |
976 if( matchFromTo(&f, i1, z2+i2, n2-i2) ){ | |
977 updateCost(m, cx, cxd, 0); | |
978 } | |
979 updateCost(m, cx, cxd, pLang->iSubCost); | |
980 for(k=0; k<f.a[i1].nSubst; k++){ | |
981 p = f.a[i1].apSubst[k]; | |
982 if( matchTo(p, z2+i2, n2-i2) ){ | |
983 updateCost(m, cxd+p->nFrom+szRow*p->nTo, cxd, p->iCost); | |
984 } | |
985 } | |
986 } | |
987 } | |
988 | |
989 #if 0 /* Enable for debugging */ | |
990 printf(" ^"); | |
991 for(i1=0; i1<f.n; i1++) printf(" %c-%2x", f.z[i1], f.z[i1]&0xff); | |
992 printf("\n ^:"); | |
993 for(i1=0; i1<szRow; i1++){ | |
994 int v = m[i1]; | |
995 if( v>9999 ) printf(" ****"); | |
996 else printf(" %4d", v); | |
997 } | |
998 printf("\n"); | |
999 for(i2=0; i2<n2; i2++){ | |
1000 printf("%c-%02x:", z2[i2], z2[i2]&0xff); | |
1001 for(i1=0; i1<szRow; i1++){ | |
1002 int v = m[(i2+1)*szRow+i1]; | |
1003 if( v>9999 ) printf(" ****"); | |
1004 else printf(" %4d", v); | |
1005 } | |
1006 printf("\n"); | |
1007 } | |
1008 #endif | |
1009 | |
1010 /* Free memory allocations and return the result */ | |
1011 res = (int)m[szRow*(n2+1)-1]; | |
1012 n = n2; | |
1013 if( f.isPrefix ){ | |
1014 for(i2=1; i2<=n2; i2++){ | |
1015 int b = m[szRow*i2-1]; | |
1016 if( b<=res ){ | |
1017 res = b; | |
1018 n = i2 - 1; | |
1019 } | |
1020 } | |
1021 } | |
1022 if( pnMatch ){ | |
1023 int nExtra = 0; | |
1024 for(k=0; k<n; k++){ | |
1025 if( (z2[k] & 0xc0)==0x80 ) nExtra++; | |
1026 } | |
1027 *pnMatch = n - nExtra; | |
1028 } | |
1029 | |
1030 editDist3Abort: | |
1031 for(i2=0; i2<n2; i2++) sqlite3_free(a2[i2].apIns); | |
1032 sqlite3_free(m); | |
1033 return res; | |
1034 } | |
1035 | |
1036 /* | |
1037 ** Get an appropriate EditDist3Lang object. | |
1038 */ | |
1039 static const EditDist3Lang *editDist3FindLang( | |
1040 EditDist3Config *pConfig, | |
1041 int iLang | |
1042 ){ | |
1043 int i; | |
1044 for(i=0; i<pConfig->nLang; i++){ | |
1045 if( pConfig->a[i].iLang==iLang ) return &pConfig->a[i]; | |
1046 } | |
1047 return &editDist3Lang; | |
1048 } | |
1049 | |
1050 /* | |
1051 ** Function: editdist3(A,B,iLang) | |
1052 ** editdist3(tablename) | |
1053 ** | |
1054 ** Return the cost of transforming string A into string B using edit | |
1055 ** weights for iLang. | |
1056 ** | |
1057 ** The second form loads edit weights into memory from a table. | |
1058 */ | |
1059 static void editDist3SqlFunc( | |
1060 sqlite3_context *context, | |
1061 int argc, | |
1062 sqlite3_value **argv | |
1063 ){ | |
1064 EditDist3Config *pConfig = (EditDist3Config*)sqlite3_user_data(context); | |
1065 sqlite3 *db = sqlite3_context_db_handle(context); | |
1066 int rc; | |
1067 if( argc==1 ){ | |
1068 const char *zTable = (const char*)sqlite3_value_text(argv[0]); | |
1069 rc = editDist3ConfigLoad(pConfig, db, zTable); | |
1070 if( rc ) sqlite3_result_error_code(context, rc); | |
1071 }else{ | |
1072 const char *zA = (const char*)sqlite3_value_text(argv[0]); | |
1073 const char *zB = (const char*)sqlite3_value_text(argv[1]); | |
1074 int nA = sqlite3_value_bytes(argv[0]); | |
1075 int nB = sqlite3_value_bytes(argv[1]); | |
1076 int iLang = argc==3 ? sqlite3_value_int(argv[2]) : 0; | |
1077 const EditDist3Lang *pLang = editDist3FindLang(pConfig, iLang); | |
1078 EditDist3FromString *pFrom; | |
1079 int dist; | |
1080 | |
1081 pFrom = editDist3FromStringNew(pLang, zA, nA); | |
1082 if( pFrom==0 ){ | |
1083 sqlite3_result_error_nomem(context); | |
1084 return; | |
1085 } | |
1086 dist = editDist3Core(pFrom, zB, nB, pLang, 0); | |
1087 editDist3FromStringDelete(pFrom); | |
1088 if( dist==(-1) ){ | |
1089 sqlite3_result_error_nomem(context); | |
1090 }else{ | |
1091 sqlite3_result_int(context, dist); | |
1092 } | |
1093 } | |
1094 } | |
1095 | |
1096 /* | |
1097 ** Register the editDist3 function with SQLite | |
1098 */ | |
1099 static int editDist3Install(sqlite3 *db){ | |
1100 int rc; | |
1101 EditDist3Config *pConfig = sqlite3_malloc( sizeof(*pConfig) ); | |
1102 if( pConfig==0 ) return SQLITE_NOMEM; | |
1103 memset(pConfig, 0, sizeof(*pConfig)); | |
1104 rc = sqlite3_create_function_v2(db, "editdist3", | |
1105 2, SQLITE_UTF8, pConfig, editDist3SqlFunc, 0, 0, 0); | |
1106 if( rc==SQLITE_OK ){ | |
1107 rc = sqlite3_create_function_v2(db, "editdist3", | |
1108 3, SQLITE_UTF8, pConfig, editDist3SqlFunc, 0, 0, 0); | |
1109 } | |
1110 if( rc==SQLITE_OK ){ | |
1111 rc = sqlite3_create_function_v2(db, "editdist3", | |
1112 1, SQLITE_UTF8, pConfig, editDist3SqlFunc, 0, 0, | |
1113 editDist3ConfigDelete); | |
1114 }else{ | |
1115 sqlite3_free(pConfig); | |
1116 } | |
1117 return rc; | |
1118 } | |
1119 /* End configurable cost unicode edit distance routines | |
1120 ****************************************************************************** | |
1121 ****************************************************************************** | |
1122 ** Begin transliterate unicode-to-ascii implementation | |
1123 */ | |
1124 | |
1125 #if !SQLITE_AMALGAMATION | |
1126 /* | |
1127 ** This lookup table is used to help decode the first byte of | |
1128 ** a multi-byte UTF8 character. | |
1129 */ | |
1130 static const unsigned char sqlite3Utf8Trans1[] = { | |
1131 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | |
1132 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, | |
1133 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, | |
1134 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, | |
1135 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | |
1136 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, | |
1137 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | |
1138 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, | |
1139 }; | |
1140 #endif | |
1141 | |
1142 /* | |
1143 ** Return the value of the first UTF-8 character in the string. | |
1144 */ | |
1145 static int utf8Read(const unsigned char *z, int n, int *pSize){ | |
1146 int c, i; | |
1147 | |
1148 /* All callers to this routine (in the current implementation) | |
1149 ** always have n>0. */ | |
1150 if( NEVER(n==0) ){ | |
1151 c = i = 0; | |
1152 }else{ | |
1153 c = z[0]; | |
1154 i = 1; | |
1155 if( c>=0xc0 ){ | |
1156 c = sqlite3Utf8Trans1[c-0xc0]; | |
1157 while( i<n && (z[i] & 0xc0)==0x80 ){ | |
1158 c = (c<<6) + (0x3f & z[i++]); | |
1159 } | |
1160 } | |
1161 } | |
1162 *pSize = i; | |
1163 return c; | |
1164 } | |
1165 | |
1166 /* | |
1167 ** Return the number of characters in the utf-8 string in the nIn byte | |
1168 ** buffer pointed to by zIn. | |
1169 */ | |
1170 static int utf8Charlen(const char *zIn, int nIn){ | |
1171 int i; | |
1172 int nChar = 0; | |
1173 for(i=0; i<nIn; nChar++){ | |
1174 int sz; | |
1175 utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz); | |
1176 i += sz; | |
1177 } | |
1178 return nChar; | |
1179 } | |
1180 | |
1181 /* | |
1182 ** Table of translations from unicode characters into ASCII. | |
1183 */ | |
1184 static const struct { | |
1185 unsigned short int cFrom; | |
1186 unsigned char cTo0, cTo1; | |
1187 } translit[] = { | |
1188 { 0x00A0, 0x20, 0x00 }, /* to */ | |
1189 { 0x00B5, 0x75, 0x00 }, /* µ to u */ | |
1190 { 0x00C0, 0x41, 0x00 }, /* À to A */ | |
1191 { 0x00C1, 0x41, 0x00 }, /* Á to A */ | |
1192 { 0x00C2, 0x41, 0x00 }, /* Â to A */ | |
1193 { 0x00C3, 0x41, 0x00 }, /* Ã to A */ | |
1194 { 0x00C4, 0x41, 0x65 }, /* Ä to Ae */ | |
1195 { 0x00C5, 0x41, 0x61 }, /* Å to Aa */ | |
1196 { 0x00C6, 0x41, 0x45 }, /* Æ to AE */ | |
1197 { 0x00C7, 0x43, 0x00 }, /* Ç to C */ | |
1198 { 0x00C8, 0x45, 0x00 }, /* È to E */ | |
1199 { 0x00C9, 0x45, 0x00 }, /* É to E */ | |
1200 { 0x00CA, 0x45, 0x00 }, /* Ê to E */ | |
1201 { 0x00CB, 0x45, 0x00 }, /* Ë to E */ | |
1202 { 0x00CC, 0x49, 0x00 }, /* Ì to I */ | |
1203 { 0x00CD, 0x49, 0x00 }, /* Í to I */ | |
1204 { 0x00CE, 0x49, 0x00 }, /* Î to I */ | |
1205 { 0x00CF, 0x49, 0x00 }, /* Ï to I */ | |
1206 { 0x00D0, 0x44, 0x00 }, /* Ð to D */ | |
1207 { 0x00D1, 0x4E, 0x00 }, /* Ñ to N */ | |
1208 { 0x00D2, 0x4F, 0x00 }, /* Ò to O */ | |
1209 { 0x00D3, 0x4F, 0x00 }, /* Ó to O */ | |
1210 { 0x00D4, 0x4F, 0x00 }, /* Ô to O */ | |
1211 { 0x00D5, 0x4F, 0x00 }, /* Õ to O */ | |
1212 { 0x00D6, 0x4F, 0x65 }, /* Ö to Oe */ | |
1213 { 0x00D7, 0x78, 0x00 }, /* × to x */ | |
1214 { 0x00D8, 0x4F, 0x00 }, /* Ø to O */ | |
1215 { 0x00D9, 0x55, 0x00 }, /* Ù to U */ | |
1216 { 0x00DA, 0x55, 0x00 }, /* Ú to U */ | |
1217 { 0x00DB, 0x55, 0x00 }, /* Û to U */ | |
1218 { 0x00DC, 0x55, 0x65 }, /* Ü to Ue */ | |
1219 { 0x00DD, 0x59, 0x00 }, /* Ý to Y */ | |
1220 { 0x00DE, 0x54, 0x68 }, /* Þ to Th */ | |
1221 { 0x00DF, 0x73, 0x73 }, /* ß to ss */ | |
1222 { 0x00E0, 0x61, 0x00 }, /* à to a */ | |
1223 { 0x00E1, 0x61, 0x00 }, /* á to a */ | |
1224 { 0x00E2, 0x61, 0x00 }, /* â to a */ | |
1225 { 0x00E3, 0x61, 0x00 }, /* ã to a */ | |
1226 { 0x00E4, 0x61, 0x65 }, /* ä to ae */ | |
1227 { 0x00E5, 0x61, 0x61 }, /* å to aa */ | |
1228 { 0x00E6, 0x61, 0x65 }, /* æ to ae */ | |
1229 { 0x00E7, 0x63, 0x00 }, /* ç to c */ | |
1230 { 0x00E8, 0x65, 0x00 }, /* è to e */ | |
1231 { 0x00E9, 0x65, 0x00 }, /* é to e */ | |
1232 { 0x00EA, 0x65, 0x00 }, /* ê to e */ | |
1233 { 0x00EB, 0x65, 0x00 }, /* ë to e */ | |
1234 { 0x00EC, 0x69, 0x00 }, /* ì to i */ | |
1235 { 0x00ED, 0x69, 0x00 }, /* í to i */ | |
1236 { 0x00EE, 0x69, 0x00 }, /* î to i */ | |
1237 { 0x00EF, 0x69, 0x00 }, /* ï to i */ | |
1238 { 0x00F0, 0x64, 0x00 }, /* ð to d */ | |
1239 { 0x00F1, 0x6E, 0x00 }, /* ñ to n */ | |
1240 { 0x00F2, 0x6F, 0x00 }, /* ò to o */ | |
1241 { 0x00F3, 0x6F, 0x00 }, /* ó to o */ | |
1242 { 0x00F4, 0x6F, 0x00 }, /* ô to o */ | |
1243 { 0x00F5, 0x6F, 0x00 }, /* õ to o */ | |
1244 { 0x00F6, 0x6F, 0x65 }, /* ö to oe */ | |
1245 { 0x00F7, 0x3A, 0x00 }, /* ÷ to : */ | |
1246 { 0x00F8, 0x6F, 0x00 }, /* ø to o */ | |
1247 { 0x00F9, 0x75, 0x00 }, /* ù to u */ | |
1248 { 0x00FA, 0x75, 0x00 }, /* ú to u */ | |
1249 { 0x00FB, 0x75, 0x00 }, /* û to u */ | |
1250 { 0x00FC, 0x75, 0x65 }, /* ü to ue */ | |
1251 { 0x00FD, 0x79, 0x00 }, /* ý to y */ | |
1252 { 0x00FE, 0x74, 0x68 }, /* þ to th */ | |
1253 { 0x00FF, 0x79, 0x00 }, /* ÿ to y */ | |
1254 { 0x0100, 0x41, 0x00 }, /* Ā to A */ | |
1255 { 0x0101, 0x61, 0x00 }, /* ā to a */ | |
1256 { 0x0102, 0x41, 0x00 }, /* Ă to A */ | |
1257 { 0x0103, 0x61, 0x00 }, /* ă to a */ | |
1258 { 0x0104, 0x41, 0x00 }, /* Ą to A */ | |
1259 { 0x0105, 0x61, 0x00 }, /* ą to a */ | |
1260 { 0x0106, 0x43, 0x00 }, /* Ć to C */ | |
1261 { 0x0107, 0x63, 0x00 }, /* ć to c */ | |
1262 { 0x0108, 0x43, 0x68 }, /* Ĉ to Ch */ | |
1263 { 0x0109, 0x63, 0x68 }, /* ĉ to ch */ | |
1264 { 0x010A, 0x43, 0x00 }, /* Ċ to C */ | |
1265 { 0x010B, 0x63, 0x00 }, /* ċ to c */ | |
1266 { 0x010C, 0x43, 0x00 }, /* Č to C */ | |
1267 { 0x010D, 0x63, 0x00 }, /* č to c */ | |
1268 { 0x010E, 0x44, 0x00 }, /* Ď to D */ | |
1269 { 0x010F, 0x64, 0x00 }, /* ď to d */ | |
1270 { 0x0110, 0x44, 0x00 }, /* Đ to D */ | |
1271 { 0x0111, 0x64, 0x00 }, /* đ to d */ | |
1272 { 0x0112, 0x45, 0x00 }, /* Ē to E */ | |
1273 { 0x0113, 0x65, 0x00 }, /* ē to e */ | |
1274 { 0x0114, 0x45, 0x00 }, /* Ĕ to E */ | |
1275 { 0x0115, 0x65, 0x00 }, /* ĕ to e */ | |
1276 { 0x0116, 0x45, 0x00 }, /* Ė to E */ | |
1277 { 0x0117, 0x65, 0x00 }, /* ė to e */ | |
1278 { 0x0118, 0x45, 0x00 }, /* Ę to E */ | |
1279 { 0x0119, 0x65, 0x00 }, /* ę to e */ | |
1280 { 0x011A, 0x45, 0x00 }, /* Ě to E */ | |
1281 { 0x011B, 0x65, 0x00 }, /* ě to e */ | |
1282 { 0x011C, 0x47, 0x68 }, /* Ĝ to Gh */ | |
1283 { 0x011D, 0x67, 0x68 }, /* ĝ to gh */ | |
1284 { 0x011E, 0x47, 0x00 }, /* Ğ to G */ | |
1285 { 0x011F, 0x67, 0x00 }, /* ğ to g */ | |
1286 { 0x0120, 0x47, 0x00 }, /* Ġ to G */ | |
1287 { 0x0121, 0x67, 0x00 }, /* ġ to g */ | |
1288 { 0x0122, 0x47, 0x00 }, /* Ģ to G */ | |
1289 { 0x0123, 0x67, 0x00 }, /* ģ to g */ | |
1290 { 0x0124, 0x48, 0x68 }, /* Ĥ to Hh */ | |
1291 { 0x0125, 0x68, 0x68 }, /* ĥ to hh */ | |
1292 { 0x0126, 0x48, 0x00 }, /* Ħ to H */ | |
1293 { 0x0127, 0x68, 0x00 }, /* ħ to h */ | |
1294 { 0x0128, 0x49, 0x00 }, /* Ĩ to I */ | |
1295 { 0x0129, 0x69, 0x00 }, /* ĩ to i */ | |
1296 { 0x012A, 0x49, 0x00 }, /* Ī to I */ | |
1297 { 0x012B, 0x69, 0x00 }, /* ī to i */ | |
1298 { 0x012C, 0x49, 0x00 }, /* Ĭ to I */ | |
1299 { 0x012D, 0x69, 0x00 }, /* ĭ to i */ | |
1300 { 0x012E, 0x49, 0x00 }, /* Į to I */ | |
1301 { 0x012F, 0x69, 0x00 }, /* į to i */ | |
1302 { 0x0130, 0x49, 0x00 }, /* İ to I */ | |
1303 { 0x0131, 0x69, 0x00 }, /* ı to i */ | |
1304 { 0x0132, 0x49, 0x4A }, /* IJ to IJ */ | |
1305 { 0x0133, 0x69, 0x6A }, /* ij to ij */ | |
1306 { 0x0134, 0x4A, 0x68 }, /* Ĵ to Jh */ | |
1307 { 0x0135, 0x6A, 0x68 }, /* ĵ to jh */ | |
1308 { 0x0136, 0x4B, 0x00 }, /* Ķ to K */ | |
1309 { 0x0137, 0x6B, 0x00 }, /* ķ to k */ | |
1310 { 0x0138, 0x6B, 0x00 }, /* ĸ to k */ | |
1311 { 0x0139, 0x4C, 0x00 }, /* Ĺ to L */ | |
1312 { 0x013A, 0x6C, 0x00 }, /* ĺ to l */ | |
1313 { 0x013B, 0x4C, 0x00 }, /* Ļ to L */ | |
1314 { 0x013C, 0x6C, 0x00 }, /* ļ to l */ | |
1315 { 0x013D, 0x4C, 0x00 }, /* Ľ to L */ | |
1316 { 0x013E, 0x6C, 0x00 }, /* ľ to l */ | |
1317 { 0x013F, 0x4C, 0x2E }, /* Ŀ to L. */ | |
1318 { 0x0140, 0x6C, 0x2E }, /* ŀ to l. */ | |
1319 { 0x0141, 0x4C, 0x00 }, /* Ł to L */ | |
1320 { 0x0142, 0x6C, 0x00 }, /* ł to l */ | |
1321 { 0x0143, 0x4E, 0x00 }, /* Ń to N */ | |
1322 { 0x0144, 0x6E, 0x00 }, /* ń to n */ | |
1323 { 0x0145, 0x4E, 0x00 }, /* Ņ to N */ | |
1324 { 0x0146, 0x6E, 0x00 }, /* ņ to n */ | |
1325 { 0x0147, 0x4E, 0x00 }, /* Ň to N */ | |
1326 { 0x0148, 0x6E, 0x00 }, /* ň to n */ | |
1327 { 0x0149, 0x27, 0x6E }, /* ʼn to 'n */ | |
1328 { 0x014A, 0x4E, 0x47 }, /* Ŋ to NG */ | |
1329 { 0x014B, 0x6E, 0x67 }, /* ŋ to ng */ | |
1330 { 0x014C, 0x4F, 0x00 }, /* Ō to O */ | |
1331 { 0x014D, 0x6F, 0x00 }, /* ō to o */ | |
1332 { 0x014E, 0x4F, 0x00 }, /* Ŏ to O */ | |
1333 { 0x014F, 0x6F, 0x00 }, /* ŏ to o */ | |
1334 { 0x0150, 0x4F, 0x00 }, /* Ő to O */ | |
1335 { 0x0151, 0x6F, 0x00 }, /* ő to o */ | |
1336 { 0x0152, 0x4F, 0x45 }, /* Œ to OE */ | |
1337 { 0x0153, 0x6F, 0x65 }, /* œ to oe */ | |
1338 { 0x0154, 0x52, 0x00 }, /* Ŕ to R */ | |
1339 { 0x0155, 0x72, 0x00 }, /* ŕ to r */ | |
1340 { 0x0156, 0x52, 0x00 }, /* Ŗ to R */ | |
1341 { 0x0157, 0x72, 0x00 }, /* ŗ to r */ | |
1342 { 0x0158, 0x52, 0x00 }, /* Ř to R */ | |
1343 { 0x0159, 0x72, 0x00 }, /* ř to r */ | |
1344 { 0x015A, 0x53, 0x00 }, /* Ś to S */ | |
1345 { 0x015B, 0x73, 0x00 }, /* ś to s */ | |
1346 { 0x015C, 0x53, 0x68 }, /* Ŝ to Sh */ | |
1347 { 0x015D, 0x73, 0x68 }, /* ŝ to sh */ | |
1348 { 0x015E, 0x53, 0x00 }, /* Ş to S */ | |
1349 { 0x015F, 0x73, 0x00 }, /* ş to s */ | |
1350 { 0x0160, 0x53, 0x00 }, /* Š to S */ | |
1351 { 0x0161, 0x73, 0x00 }, /* š to s */ | |
1352 { 0x0162, 0x54, 0x00 }, /* Ţ to T */ | |
1353 { 0x0163, 0x74, 0x00 }, /* ţ to t */ | |
1354 { 0x0164, 0x54, 0x00 }, /* Ť to T */ | |
1355 { 0x0165, 0x74, 0x00 }, /* ť to t */ | |
1356 { 0x0166, 0x54, 0x00 }, /* Ŧ to T */ | |
1357 { 0x0167, 0x74, 0x00 }, /* ŧ to t */ | |
1358 { 0x0168, 0x55, 0x00 }, /* Ũ to U */ | |
1359 { 0x0169, 0x75, 0x00 }, /* ũ to u */ | |
1360 { 0x016A, 0x55, 0x00 }, /* Ū to U */ | |
1361 { 0x016B, 0x75, 0x00 }, /* ū to u */ | |
1362 { 0x016C, 0x55, 0x00 }, /* Ŭ to U */ | |
1363 { 0x016D, 0x75, 0x00 }, /* ŭ to u */ | |
1364 { 0x016E, 0x55, 0x00 }, /* Ů to U */ | |
1365 { 0x016F, 0x75, 0x00 }, /* ů to u */ | |
1366 { 0x0170, 0x55, 0x00 }, /* Ű to U */ | |
1367 { 0x0171, 0x75, 0x00 }, /* ű to u */ | |
1368 { 0x0172, 0x55, 0x00 }, /* Ų to U */ | |
1369 { 0x0173, 0x75, 0x00 }, /* ų to u */ | |
1370 { 0x0174, 0x57, 0x00 }, /* Ŵ to W */ | |
1371 { 0x0175, 0x77, 0x00 }, /* ŵ to w */ | |
1372 { 0x0176, 0x59, 0x00 }, /* Ŷ to Y */ | |
1373 { 0x0177, 0x79, 0x00 }, /* ŷ to y */ | |
1374 { 0x0178, 0x59, 0x00 }, /* Ÿ to Y */ | |
1375 { 0x0179, 0x5A, 0x00 }, /* Ź to Z */ | |
1376 { 0x017A, 0x7A, 0x00 }, /* ź to z */ | |
1377 { 0x017B, 0x5A, 0x00 }, /* Ż to Z */ | |
1378 { 0x017C, 0x7A, 0x00 }, /* ż to z */ | |
1379 { 0x017D, 0x5A, 0x00 }, /* Ž to Z */ | |
1380 { 0x017E, 0x7A, 0x00 }, /* ž to z */ | |
1381 { 0x017F, 0x73, 0x00 }, /* ſ to s */ | |
1382 { 0x0192, 0x66, 0x00 }, /* ƒ to f */ | |
1383 { 0x0218, 0x53, 0x00 }, /* Ș to S */ | |
1384 { 0x0219, 0x73, 0x00 }, /* ș to s */ | |
1385 { 0x021A, 0x54, 0x00 }, /* Ț to T */ | |
1386 { 0x021B, 0x74, 0x00 }, /* ț to t */ | |
1387 { 0x0386, 0x41, 0x00 }, /* Ά to A */ | |
1388 { 0x0388, 0x45, 0x00 }, /* Έ to E */ | |
1389 { 0x0389, 0x49, 0x00 }, /* Ή to I */ | |
1390 { 0x038A, 0x49, 0x00 }, /* Ί to I */ | |
1391 { 0x038C, 0x4f, 0x00 }, /* Ό to O */ | |
1392 { 0x038E, 0x59, 0x00 }, /* Ύ to Y */ | |
1393 { 0x038F, 0x4f, 0x00 }, /* Ώ to O */ | |
1394 { 0x0390, 0x69, 0x00 }, /* ΐ to i */ | |
1395 { 0x0391, 0x41, 0x00 }, /* Α to A */ | |
1396 { 0x0392, 0x42, 0x00 }, /* Β to B */ | |
1397 { 0x0393, 0x47, 0x00 }, /* Γ to G */ | |
1398 { 0x0394, 0x44, 0x00 }, /* Δ to D */ | |
1399 { 0x0395, 0x45, 0x00 }, /* Ε to E */ | |
1400 { 0x0396, 0x5a, 0x00 }, /* Ζ to Z */ | |
1401 { 0x0397, 0x49, 0x00 }, /* Η to I */ | |
1402 { 0x0398, 0x54, 0x68 }, /* Θ to Th */ | |
1403 { 0x0399, 0x49, 0x00 }, /* Ι to I */ | |
1404 { 0x039A, 0x4b, 0x00 }, /* Κ to K */ | |
1405 { 0x039B, 0x4c, 0x00 }, /* Λ to L */ | |
1406 { 0x039C, 0x4d, 0x00 }, /* Μ to M */ | |
1407 { 0x039D, 0x4e, 0x00 }, /* Ν to N */ | |
1408 { 0x039E, 0x58, 0x00 }, /* Ξ to X */ | |
1409 { 0x039F, 0x4f, 0x00 }, /* Ο to O */ | |
1410 { 0x03A0, 0x50, 0x00 }, /* Π to P */ | |
1411 { 0x03A1, 0x52, 0x00 }, /* Ρ to R */ | |
1412 { 0x03A3, 0x53, 0x00 }, /* Σ to S */ | |
1413 { 0x03A4, 0x54, 0x00 }, /* Τ to T */ | |
1414 { 0x03A5, 0x59, 0x00 }, /* Υ to Y */ | |
1415 { 0x03A6, 0x46, 0x00 }, /* Φ to F */ | |
1416 { 0x03A7, 0x43, 0x68 }, /* Χ to Ch */ | |
1417 { 0x03A8, 0x50, 0x73 }, /* Ψ to Ps */ | |
1418 { 0x03A9, 0x4f, 0x00 }, /* Ω to O */ | |
1419 { 0x03AA, 0x49, 0x00 }, /* Ϊ to I */ | |
1420 { 0x03AB, 0x59, 0x00 }, /* Ϋ to Y */ | |
1421 { 0x03AC, 0x61, 0x00 }, /* ά to a */ | |
1422 { 0x03AD, 0x65, 0x00 }, /* έ to e */ | |
1423 { 0x03AE, 0x69, 0x00 }, /* ή to i */ | |
1424 { 0x03AF, 0x69, 0x00 }, /* ί to i */ | |
1425 { 0x03B1, 0x61, 0x00 }, /* α to a */ | |
1426 { 0x03B2, 0x62, 0x00 }, /* β to b */ | |
1427 { 0x03B3, 0x67, 0x00 }, /* γ to g */ | |
1428 { 0x03B4, 0x64, 0x00 }, /* δ to d */ | |
1429 { 0x03B5, 0x65, 0x00 }, /* ε to e */ | |
1430 { 0x03B6, 0x7a, 0x00 }, /* ζ to z */ | |
1431 { 0x03B7, 0x69, 0x00 }, /* η to i */ | |
1432 { 0x03B8, 0x74, 0x68 }, /* θ to th */ | |
1433 { 0x03B9, 0x69, 0x00 }, /* ι to i */ | |
1434 { 0x03BA, 0x6b, 0x00 }, /* κ to k */ | |
1435 { 0x03BB, 0x6c, 0x00 }, /* λ to l */ | |
1436 { 0x03BC, 0x6d, 0x00 }, /* μ to m */ | |
1437 { 0x03BD, 0x6e, 0x00 }, /* ν to n */ | |
1438 { 0x03BE, 0x78, 0x00 }, /* ξ to x */ | |
1439 { 0x03BF, 0x6f, 0x00 }, /* ο to o */ | |
1440 { 0x03C0, 0x70, 0x00 }, /* π to p */ | |
1441 { 0x03C1, 0x72, 0x00 }, /* ρ to r */ | |
1442 { 0x03C3, 0x73, 0x00 }, /* σ to s */ | |
1443 { 0x03C4, 0x74, 0x00 }, /* τ to t */ | |
1444 { 0x03C5, 0x79, 0x00 }, /* υ to y */ | |
1445 { 0x03C6, 0x66, 0x00 }, /* φ to f */ | |
1446 { 0x03C7, 0x63, 0x68 }, /* χ to ch */ | |
1447 { 0x03C8, 0x70, 0x73 }, /* ψ to ps */ | |
1448 { 0x03C9, 0x6f, 0x00 }, /* ω to o */ | |
1449 { 0x03CA, 0x69, 0x00 }, /* ϊ to i */ | |
1450 { 0x03CB, 0x79, 0x00 }, /* ϋ to y */ | |
1451 { 0x03CC, 0x6f, 0x00 }, /* ό to o */ | |
1452 { 0x03CD, 0x79, 0x00 }, /* ύ to y */ | |
1453 { 0x03CE, 0x69, 0x00 }, /* ώ to i */ | |
1454 { 0x0400, 0x45, 0x00 }, /* Ѐ to E */ | |
1455 { 0x0401, 0x45, 0x00 }, /* Ё to E */ | |
1456 { 0x0402, 0x44, 0x00 }, /* Ђ to D */ | |
1457 { 0x0403, 0x47, 0x00 }, /* Ѓ to G */ | |
1458 { 0x0404, 0x45, 0x00 }, /* Є to E */ | |
1459 { 0x0405, 0x5a, 0x00 }, /* Ѕ to Z */ | |
1460 { 0x0406, 0x49, 0x00 }, /* І to I */ | |
1461 { 0x0407, 0x49, 0x00 }, /* Ї to I */ | |
1462 { 0x0408, 0x4a, 0x00 }, /* Ј to J */ | |
1463 { 0x0409, 0x49, 0x00 }, /* Љ to I */ | |
1464 { 0x040A, 0x4e, 0x00 }, /* Њ to N */ | |
1465 { 0x040B, 0x44, 0x00 }, /* Ћ to D */ | |
1466 { 0x040C, 0x4b, 0x00 }, /* Ќ to K */ | |
1467 { 0x040D, 0x49, 0x00 }, /* Ѝ to I */ | |
1468 { 0x040E, 0x55, 0x00 }, /* Ў to U */ | |
1469 { 0x040F, 0x44, 0x00 }, /* Џ to D */ | |
1470 { 0x0410, 0x41, 0x00 }, /* А to A */ | |
1471 { 0x0411, 0x42, 0x00 }, /* Б to B */ | |
1472 { 0x0412, 0x56, 0x00 }, /* В to V */ | |
1473 { 0x0413, 0x47, 0x00 }, /* Г to G */ | |
1474 { 0x0414, 0x44, 0x00 }, /* Д to D */ | |
1475 { 0x0415, 0x45, 0x00 }, /* Е to E */ | |
1476 { 0x0416, 0x5a, 0x68 }, /* Ж to Zh */ | |
1477 { 0x0417, 0x5a, 0x00 }, /* З to Z */ | |
1478 { 0x0418, 0x49, 0x00 }, /* И to I */ | |
1479 { 0x0419, 0x49, 0x00 }, /* Й to I */ | |
1480 { 0x041A, 0x4b, 0x00 }, /* К to K */ | |
1481 { 0x041B, 0x4c, 0x00 }, /* Л to L */ | |
1482 { 0x041C, 0x4d, 0x00 }, /* М to M */ | |
1483 { 0x041D, 0x4e, 0x00 }, /* Н to N */ | |
1484 { 0x041E, 0x4f, 0x00 }, /* О to O */ | |
1485 { 0x041F, 0x50, 0x00 }, /* П to P */ | |
1486 { 0x0420, 0x52, 0x00 }, /* Р to R */ | |
1487 { 0x0421, 0x53, 0x00 }, /* С to S */ | |
1488 { 0x0422, 0x54, 0x00 }, /* Т to T */ | |
1489 { 0x0423, 0x55, 0x00 }, /* У to U */ | |
1490 { 0x0424, 0x46, 0x00 }, /* Ф to F */ | |
1491 { 0x0425, 0x4b, 0x68 }, /* Х to Kh */ | |
1492 { 0x0426, 0x54, 0x63 }, /* Ц to Tc */ | |
1493 { 0x0427, 0x43, 0x68 }, /* Ч to Ch */ | |
1494 { 0x0428, 0x53, 0x68 }, /* Ш to Sh */ | |
1495 { 0x0429, 0x53, 0x68 }, /* Щ to Shch */ | |
1496 { 0x042A, 0x61, 0x00 }, /* to A */ | |
1497 { 0x042B, 0x59, 0x00 }, /* Ы to Y */ | |
1498 { 0x042C, 0x59, 0x00 }, /* to Y */ | |
1499 { 0x042D, 0x45, 0x00 }, /* Э to E */ | |
1500 { 0x042E, 0x49, 0x75 }, /* Ю to Iu */ | |
1501 { 0x042F, 0x49, 0x61 }, /* Я to Ia */ | |
1502 { 0x0430, 0x61, 0x00 }, /* а to a */ | |
1503 { 0x0431, 0x62, 0x00 }, /* б to b */ | |
1504 { 0x0432, 0x76, 0x00 }, /* в to v */ | |
1505 { 0x0433, 0x67, 0x00 }, /* г to g */ | |
1506 { 0x0434, 0x64, 0x00 }, /* д to d */ | |
1507 { 0x0435, 0x65, 0x00 }, /* е to e */ | |
1508 { 0x0436, 0x7a, 0x68 }, /* ж to zh */ | |
1509 { 0x0437, 0x7a, 0x00 }, /* з to z */ | |
1510 { 0x0438, 0x69, 0x00 }, /* и to i */ | |
1511 { 0x0439, 0x69, 0x00 }, /* й to i */ | |
1512 { 0x043A, 0x6b, 0x00 }, /* к to k */ | |
1513 { 0x043B, 0x6c, 0x00 }, /* л to l */ | |
1514 { 0x043C, 0x6d, 0x00 }, /* м to m */ | |
1515 { 0x043D, 0x6e, 0x00 }, /* н to n */ | |
1516 { 0x043E, 0x6f, 0x00 }, /* о to o */ | |
1517 { 0x043F, 0x70, 0x00 }, /* п to p */ | |
1518 { 0x0440, 0x72, 0x00 }, /* р to r */ | |
1519 { 0x0441, 0x73, 0x00 }, /* с to s */ | |
1520 { 0x0442, 0x74, 0x00 }, /* т to t */ | |
1521 { 0x0443, 0x75, 0x00 }, /* у to u */ | |
1522 { 0x0444, 0x66, 0x00 }, /* ф to f */ | |
1523 { 0x0445, 0x6b, 0x68 }, /* х to kh */ | |
1524 { 0x0446, 0x74, 0x63 }, /* ц to tc */ | |
1525 { 0x0447, 0x63, 0x68 }, /* ч to ch */ | |
1526 { 0x0448, 0x73, 0x68 }, /* ш to sh */ | |
1527 { 0x0449, 0x73, 0x68 }, /* щ to shch */ | |
1528 { 0x044A, 0x61, 0x00 }, /* to a */ | |
1529 { 0x044B, 0x79, 0x00 }, /* ы to y */ | |
1530 { 0x044C, 0x79, 0x00 }, /* to y */ | |
1531 { 0x044D, 0x65, 0x00 }, /* э to e */ | |
1532 { 0x044E, 0x69, 0x75 }, /* ю to iu */ | |
1533 { 0x044F, 0x69, 0x61 }, /* я to ia */ | |
1534 { 0x0450, 0x65, 0x00 }, /* ѐ to e */ | |
1535 { 0x0451, 0x65, 0x00 }, /* ё to e */ | |
1536 { 0x0452, 0x64, 0x00 }, /* ђ to d */ | |
1537 { 0x0453, 0x67, 0x00 }, /* ѓ to g */ | |
1538 { 0x0454, 0x65, 0x00 }, /* є to e */ | |
1539 { 0x0455, 0x7a, 0x00 }, /* ѕ to z */ | |
1540 { 0x0456, 0x69, 0x00 }, /* і to i */ | |
1541 { 0x0457, 0x69, 0x00 }, /* ї to i */ | |
1542 { 0x0458, 0x6a, 0x00 }, /* ј to j */ | |
1543 { 0x0459, 0x69, 0x00 }, /* љ to i */ | |
1544 { 0x045A, 0x6e, 0x00 }, /* њ to n */ | |
1545 { 0x045B, 0x64, 0x00 }, /* ћ to d */ | |
1546 { 0x045C, 0x6b, 0x00 }, /* ќ to k */ | |
1547 { 0x045D, 0x69, 0x00 }, /* ѝ to i */ | |
1548 { 0x045E, 0x75, 0x00 }, /* ў to u */ | |
1549 { 0x045F, 0x64, 0x00 }, /* џ to d */ | |
1550 { 0x1E02, 0x42, 0x00 }, /* Ḃ to B */ | |
1551 { 0x1E03, 0x62, 0x00 }, /* ḃ to b */ | |
1552 { 0x1E0A, 0x44, 0x00 }, /* Ḋ to D */ | |
1553 { 0x1E0B, 0x64, 0x00 }, /* ḋ to d */ | |
1554 { 0x1E1E, 0x46, 0x00 }, /* Ḟ to F */ | |
1555 { 0x1E1F, 0x66, 0x00 }, /* ḟ to f */ | |
1556 { 0x1E40, 0x4D, 0x00 }, /* Ṁ to M */ | |
1557 { 0x1E41, 0x6D, 0x00 }, /* ṁ to m */ | |
1558 { 0x1E56, 0x50, 0x00 }, /* Ṗ to P */ | |
1559 { 0x1E57, 0x70, 0x00 }, /* ṗ to p */ | |
1560 { 0x1E60, 0x53, 0x00 }, /* Ṡ to S */ | |
1561 { 0x1E61, 0x73, 0x00 }, /* ṡ to s */ | |
1562 { 0x1E6A, 0x54, 0x00 }, /* Ṫ to T */ | |
1563 { 0x1E6B, 0x74, 0x00 }, /* ṫ to t */ | |
1564 { 0x1E80, 0x57, 0x00 }, /* Ẁ to W */ | |
1565 { 0x1E81, 0x77, 0x00 }, /* ẁ to w */ | |
1566 { 0x1E82, 0x57, 0x00 }, /* Ẃ to W */ | |
1567 { 0x1E83, 0x77, 0x00 }, /* ẃ to w */ | |
1568 { 0x1E84, 0x57, 0x00 }, /* Ẅ to W */ | |
1569 { 0x1E85, 0x77, 0x00 }, /* ẅ to w */ | |
1570 { 0x1EF2, 0x59, 0x00 }, /* Ỳ to Y */ | |
1571 { 0x1EF3, 0x79, 0x00 }, /* ỳ to y */ | |
1572 { 0xFB00, 0x66, 0x66 }, /* ff to ff */ | |
1573 { 0xFB01, 0x66, 0x69 }, /* fi to fi */ | |
1574 { 0xFB02, 0x66, 0x6C }, /* fl to fl */ | |
1575 { 0xFB05, 0x73, 0x74 }, /* ſt to st */ | |
1576 { 0xFB06, 0x73, 0x74 }, /* st to st */ | |
1577 }; | |
1578 | |
1579 /* | |
1580 ** Convert the input string from UTF-8 into pure ASCII by converting | |
1581 ** all non-ASCII characters to some combination of characters in the | |
1582 ** ASCII subset. | |
1583 ** | |
1584 ** The returned string might contain more characters than the input. | |
1585 ** | |
1586 ** Space to hold the returned string comes from sqlite3_malloc() and | |
1587 ** should be freed by the caller. | |
1588 */ | |
1589 static unsigned char *transliterate(const unsigned char *zIn, int nIn){ | |
1590 unsigned char *zOut = sqlite3_malloc( nIn*4 + 1 ); | |
1591 int c, sz, nOut; | |
1592 if( zOut==0 ) return 0; | |
1593 nOut = 0; | |
1594 while( nIn>0 ){ | |
1595 c = utf8Read(zIn, nIn, &sz); | |
1596 zIn += sz; | |
1597 nIn -= sz; | |
1598 if( c<=127 ){ | |
1599 zOut[nOut++] = c; | |
1600 }else{ | |
1601 int xTop, xBtm, x; | |
1602 xTop = sizeof(translit)/sizeof(translit[0]) - 1; | |
1603 xBtm = 0; | |
1604 while( xTop>=xBtm ){ | |
1605 x = (xTop + xBtm)/2; | |
1606 if( translit[x].cFrom==c ){ | |
1607 zOut[nOut++] = translit[x].cTo0; | |
1608 if( translit[x].cTo1 ){ | |
1609 zOut[nOut++] = translit[x].cTo1; | |
1610 /* Add an extra "ch" after the "sh" for Щ and щ */ | |
1611 if( c==0x0429 || c== 0x0449 ){ | |
1612 zOut[nOut++] = 'c'; | |
1613 zOut[nOut++] = 'h'; | |
1614 } | |
1615 } | |
1616 c = 0; | |
1617 break; | |
1618 }else if( translit[x].cFrom>c ){ | |
1619 xTop = x-1; | |
1620 }else{ | |
1621 xBtm = x+1; | |
1622 } | |
1623 } | |
1624 if( c ) zOut[nOut++] = '?'; | |
1625 } | |
1626 } | |
1627 zOut[nOut] = 0; | |
1628 return zOut; | |
1629 } | |
1630 | |
1631 /* | |
1632 ** Return the number of characters in the shortest prefix of the input | |
1633 ** string that transliterates to an ASCII string nTrans bytes or longer. | |
1634 ** Or, if the transliteration of the input string is less than nTrans | |
1635 ** bytes in size, return the number of characters in the input string. | |
1636 */ | |
1637 static int translen_to_charlen(const char *zIn, int nIn, int nTrans){ | |
1638 int i, c, sz, nOut; | |
1639 int nChar; | |
1640 | |
1641 i = nOut = 0; | |
1642 for(nChar=0; i<nIn && nOut<nTrans; nChar++){ | |
1643 c = utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz); | |
1644 i += sz; | |
1645 | |
1646 nOut++; | |
1647 if( c>=128 ){ | |
1648 int xTop, xBtm, x; | |
1649 xTop = sizeof(translit)/sizeof(translit[0]) - 1; | |
1650 xBtm = 0; | |
1651 while( xTop>=xBtm ){ | |
1652 x = (xTop + xBtm)/2; | |
1653 if( translit[x].cFrom==c ){ | |
1654 if( translit[x].cTo1 ) nOut++; | |
1655 if( c==0x0429 || c== 0x0449 ) nOut += 2; | |
1656 break; | |
1657 }else if( translit[x].cFrom>c ){ | |
1658 xTop = x-1; | |
1659 }else{ | |
1660 xBtm = x+1; | |
1661 } | |
1662 } | |
1663 } | |
1664 } | |
1665 | |
1666 return nChar; | |
1667 } | |
1668 | |
1669 | |
1670 /* | |
1671 ** spellfix1_translit(X) | |
1672 ** | |
1673 ** Convert a string that contains non-ASCII Roman characters into | |
1674 ** pure ASCII. | |
1675 */ | |
1676 static void transliterateSqlFunc( | |
1677 sqlite3_context *context, | |
1678 int argc, | |
1679 sqlite3_value **argv | |
1680 ){ | |
1681 const unsigned char *zIn = sqlite3_value_text(argv[0]); | |
1682 int nIn = sqlite3_value_bytes(argv[0]); | |
1683 unsigned char *zOut = transliterate(zIn, nIn); | |
1684 if( zOut==0 ){ | |
1685 sqlite3_result_error_nomem(context); | |
1686 }else{ | |
1687 sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free); | |
1688 } | |
1689 } | |
1690 | |
1691 /* | |
1692 ** spellfix1_scriptcode(X) | |
1693 ** | |
1694 ** Try to determine the dominant script used by the word X and return | |
1695 ** its ISO 15924 numeric code. | |
1696 ** | |
1697 ** The current implementation only understands the following scripts: | |
1698 ** | |
1699 ** 215 (Latin) | |
1700 ** 220 (Cyrillic) | |
1701 ** 200 (Greek) | |
1702 ** | |
1703 ** This routine will return 998 if the input X contains characters from | |
1704 ** two or more of the above scripts or 999 if X contains no characters | |
1705 ** from any of the above scripts. | |
1706 */ | |
1707 static void scriptCodeSqlFunc( | |
1708 sqlite3_context *context, | |
1709 int argc, | |
1710 sqlite3_value **argv | |
1711 ){ | |
1712 const unsigned char *zIn = sqlite3_value_text(argv[0]); | |
1713 int nIn = sqlite3_value_bytes(argv[0]); | |
1714 int c, sz; | |
1715 int scriptMask = 0; | |
1716 int res; | |
1717 # define SCRIPT_LATIN 0x0001 | |
1718 # define SCRIPT_CYRILLIC 0x0002 | |
1719 # define SCRIPT_GREEK 0x0004 | |
1720 | |
1721 while( nIn>0 ){ | |
1722 c = utf8Read(zIn, nIn, &sz); | |
1723 zIn += sz; | |
1724 nIn -= sz; | |
1725 if( c<0x02af ){ | |
1726 scriptMask |= SCRIPT_LATIN; | |
1727 }else if( c>=0x0400 && c<=0x04ff ){ | |
1728 scriptMask |= SCRIPT_CYRILLIC; | |
1729 }else if( c>=0x0386 && c<=0x03ce ){ | |
1730 scriptMask |= SCRIPT_GREEK; | |
1731 } | |
1732 } | |
1733 switch( scriptMask ){ | |
1734 case 0: res = 999; break; | |
1735 case SCRIPT_LATIN: res = 215; break; | |
1736 case SCRIPT_CYRILLIC: res = 220; break; | |
1737 case SCRIPT_GREEK: res = 200; break; | |
1738 default: res = 998; break; | |
1739 } | |
1740 sqlite3_result_int(context, res); | |
1741 } | |
1742 | |
1743 /* End transliterate | |
1744 ****************************************************************************** | |
1745 ****************************************************************************** | |
1746 ** Begin spellfix1 virtual table. | |
1747 */ | |
1748 | |
1749 /* Maximum length of a phonehash used for querying the shadow table */ | |
1750 #define SPELLFIX_MX_HASH 8 | |
1751 | |
1752 /* Maximum number of hash strings to examine per query */ | |
1753 #define SPELLFIX_MX_RUN 1 | |
1754 | |
1755 typedef struct spellfix1_vtab spellfix1_vtab; | |
1756 typedef struct spellfix1_cursor spellfix1_cursor; | |
1757 | |
1758 /* Fuzzy-search virtual table object */ | |
1759 struct spellfix1_vtab { | |
1760 sqlite3_vtab base; /* Base class - must be first */ | |
1761 sqlite3 *db; /* Database connection */ | |
1762 char *zDbName; /* Name of database holding this table */ | |
1763 char *zTableName; /* Name of the virtual table */ | |
1764 char *zCostTable; /* Table holding edit-distance cost numbers */ | |
1765 EditDist3Config *pConfig3; /* Parsed edit distance costs */ | |
1766 }; | |
1767 | |
1768 /* Fuzzy-search cursor object */ | |
1769 struct spellfix1_cursor { | |
1770 sqlite3_vtab_cursor base; /* Base class - must be first */ | |
1771 spellfix1_vtab *pVTab; /* The table to which this cursor belongs */ | |
1772 char *zPattern; /* rhs of MATCH clause */ | |
1773 int nRow; /* Number of rows of content */ | |
1774 int nAlloc; /* Number of allocated rows */ | |
1775 int iRow; /* Current row of content */ | |
1776 int iLang; /* Value of the langid= constraint */ | |
1777 int iTop; /* Value of the top= constraint */ | |
1778 int iScope; /* Value of the scope= constraint */ | |
1779 int nSearch; /* Number of vocabulary items checked */ | |
1780 sqlite3_stmt *pFullScan; /* Shadow query for a full table scan */ | |
1781 struct spellfix1_row { /* For each row of content */ | |
1782 sqlite3_int64 iRowid; /* Rowid for this row */ | |
1783 char *zWord; /* Text for this row */ | |
1784 int iRank; /* Rank for this row */ | |
1785 int iDistance; /* Distance from pattern for this row */ | |
1786 int iScore; /* Score for sorting */ | |
1787 int iMatchlen; /* Value of matchlen column (or -1) */ | |
1788 char zHash[SPELLFIX_MX_HASH]; /* the phonehash used for this match */ | |
1789 } *a; | |
1790 }; | |
1791 | |
1792 /* | |
1793 ** Construct one or more SQL statements from the format string given | |
1794 ** and then evaluate those statements. The success code is written | |
1795 ** into *pRc. | |
1796 ** | |
1797 ** If *pRc is initially non-zero then this routine is a no-op. | |
1798 */ | |
1799 static void spellfix1DbExec( | |
1800 int *pRc, /* Success code */ | |
1801 sqlite3 *db, /* Database in which to run SQL */ | |
1802 const char *zFormat, /* Format string for SQL */ | |
1803 ... /* Arguments to the format string */ | |
1804 ){ | |
1805 va_list ap; | |
1806 char *zSql; | |
1807 if( *pRc ) return; | |
1808 va_start(ap, zFormat); | |
1809 zSql = sqlite3_vmprintf(zFormat, ap); | |
1810 va_end(ap); | |
1811 if( zSql==0 ){ | |
1812 *pRc = SQLITE_NOMEM; | |
1813 }else{ | |
1814 *pRc = sqlite3_exec(db, zSql, 0, 0, 0); | |
1815 sqlite3_free(zSql); | |
1816 } | |
1817 } | |
1818 | |
1819 /* | |
1820 ** xDisconnect/xDestroy method for the fuzzy-search module. | |
1821 */ | |
1822 static int spellfix1Uninit(int isDestroy, sqlite3_vtab *pVTab){ | |
1823 spellfix1_vtab *p = (spellfix1_vtab*)pVTab; | |
1824 int rc = SQLITE_OK; | |
1825 if( isDestroy ){ | |
1826 sqlite3 *db = p->db; | |
1827 spellfix1DbExec(&rc, db, "DROP TABLE IF EXISTS \"%w\".\"%w_vocab\"", | |
1828 p->zDbName, p->zTableName); | |
1829 } | |
1830 if( rc==SQLITE_OK ){ | |
1831 sqlite3_free(p->zTableName); | |
1832 editDist3ConfigDelete(p->pConfig3); | |
1833 sqlite3_free(p->zCostTable); | |
1834 sqlite3_free(p); | |
1835 } | |
1836 return rc; | |
1837 } | |
1838 static int spellfix1Disconnect(sqlite3_vtab *pVTab){ | |
1839 return spellfix1Uninit(0, pVTab); | |
1840 } | |
1841 static int spellfix1Destroy(sqlite3_vtab *pVTab){ | |
1842 return spellfix1Uninit(1, pVTab); | |
1843 } | |
1844 | |
1845 /* | |
1846 ** Make a copy of a string. Remove leading and trailing whitespace | |
1847 ** and dequote it. | |
1848 */ | |
1849 static char *spellfix1Dequote(const char *zIn){ | |
1850 char *zOut; | |
1851 int i, j; | |
1852 char c; | |
1853 while( isspace(zIn[0]) ) zIn++; | |
1854 zOut = sqlite3_mprintf("%s", zIn); | |
1855 if( zOut==0 ) return 0; | |
1856 i = (int)strlen(zOut); | |
1857 #if 0 /* The parser will never leave spaces at the end */ | |
1858 while( i>0 && isspace(zOut[i-1]) ){ i--; } | |
1859 #endif | |
1860 zOut[i] = 0; | |
1861 c = zOut[0]; | |
1862 if( c=='\'' || c=='"' ){ | |
1863 for(i=1, j=0; ALWAYS(zOut[i]); i++){ | |
1864 zOut[j++] = zOut[i]; | |
1865 if( zOut[i]==c ){ | |
1866 if( zOut[i+1]==c ){ | |
1867 i++; | |
1868 }else{ | |
1869 zOut[j-1] = 0; | |
1870 break; | |
1871 } | |
1872 } | |
1873 } | |
1874 } | |
1875 return zOut; | |
1876 } | |
1877 | |
1878 | |
1879 /* | |
1880 ** xConnect/xCreate method for the spellfix1 module. Arguments are: | |
1881 ** | |
1882 ** argv[0] -> module name ("spellfix1") | |
1883 ** argv[1] -> database name | |
1884 ** argv[2] -> table name | |
1885 ** argv[3].. -> optional arguments (i.e. "edit_cost_table" parameter) | |
1886 */ | |
1887 static int spellfix1Init( | |
1888 int isCreate, | |
1889 sqlite3 *db, | |
1890 void *pAux, | |
1891 int argc, const char *const*argv, | |
1892 sqlite3_vtab **ppVTab, | |
1893 char **pzErr | |
1894 ){ | |
1895 spellfix1_vtab *pNew = 0; | |
1896 /* const char *zModule = argv[0]; // not used */ | |
1897 const char *zDbName = argv[1]; | |
1898 const char *zTableName = argv[2]; | |
1899 int nDbName; | |
1900 int rc = SQLITE_OK; | |
1901 int i; | |
1902 | |
1903 nDbName = (int)strlen(zDbName); | |
1904 pNew = sqlite3_malloc( sizeof(*pNew) + nDbName + 1); | |
1905 if( pNew==0 ){ | |
1906 rc = SQLITE_NOMEM; | |
1907 }else{ | |
1908 memset(pNew, 0, sizeof(*pNew)); | |
1909 pNew->zDbName = (char*)&pNew[1]; | |
1910 memcpy(pNew->zDbName, zDbName, nDbName+1); | |
1911 pNew->zTableName = sqlite3_mprintf("%s", zTableName); | |
1912 pNew->db = db; | |
1913 if( pNew->zTableName==0 ){ | |
1914 rc = SQLITE_NOMEM; | |
1915 }else{ | |
1916 rc = sqlite3_declare_vtab(db, | |
1917 "CREATE TABLE x(word,rank,distance,langid, " | |
1918 "score, matchlen, phonehash HIDDEN, " | |
1919 "top HIDDEN, scope HIDDEN, srchcnt HIDDEN, " | |
1920 "soundslike HIDDEN, command HIDDEN)" | |
1921 ); | |
1922 #define SPELLFIX_COL_WORD 0 | |
1923 #define SPELLFIX_COL_RANK 1 | |
1924 #define SPELLFIX_COL_DISTANCE 2 | |
1925 #define SPELLFIX_COL_LANGID 3 | |
1926 #define SPELLFIX_COL_SCORE 4 | |
1927 #define SPELLFIX_COL_MATCHLEN 5 | |
1928 #define SPELLFIX_COL_PHONEHASH 6 | |
1929 #define SPELLFIX_COL_TOP 7 | |
1930 #define SPELLFIX_COL_SCOPE 8 | |
1931 #define SPELLFIX_COL_SRCHCNT 9 | |
1932 #define SPELLFIX_COL_SOUNDSLIKE 10 | |
1933 #define SPELLFIX_COL_COMMAND 11 | |
1934 } | |
1935 if( rc==SQLITE_OK && isCreate ){ | |
1936 spellfix1DbExec(&rc, db, | |
1937 "CREATE TABLE IF NOT EXISTS \"%w\".\"%w_vocab\"(\n" | |
1938 " id INTEGER PRIMARY KEY,\n" | |
1939 " rank INT,\n" | |
1940 " langid INT,\n" | |
1941 " word TEXT,\n" | |
1942 " k1 TEXT,\n" | |
1943 " k2 TEXT\n" | |
1944 ");\n", | |
1945 zDbName, zTableName | |
1946 ); | |
1947 spellfix1DbExec(&rc, db, | |
1948 "CREATE INDEX IF NOT EXISTS \"%w\".\"%w_vocab_index_langid_k2\" " | |
1949 "ON \"%w_vocab\"(langid,k2);", | |
1950 zDbName, zTableName, zTableName | |
1951 ); | |
1952 } | |
1953 for(i=3; rc==SQLITE_OK && i<argc; i++){ | |
1954 if( strncmp(argv[i],"edit_cost_table=",16)==0 && pNew->zCostTable==0 ){ | |
1955 pNew->zCostTable = spellfix1Dequote(&argv[i][16]); | |
1956 if( pNew->zCostTable==0 ) rc = SQLITE_NOMEM; | |
1957 continue; | |
1958 } | |
1959 *pzErr = sqlite3_mprintf("bad argument to spellfix1(): \"%s\"", argv[i]); | |
1960 rc = SQLITE_ERROR; | |
1961 } | |
1962 } | |
1963 | |
1964 if( rc && pNew ){ | |
1965 *ppVTab = 0; | |
1966 spellfix1Uninit(0, &pNew->base); | |
1967 }else{ | |
1968 *ppVTab = (sqlite3_vtab *)pNew; | |
1969 } | |
1970 return rc; | |
1971 } | |
1972 | |
1973 /* | |
1974 ** The xConnect and xCreate methods | |
1975 */ | |
1976 static int spellfix1Connect( | |
1977 sqlite3 *db, | |
1978 void *pAux, | |
1979 int argc, const char *const*argv, | |
1980 sqlite3_vtab **ppVTab, | |
1981 char **pzErr | |
1982 ){ | |
1983 return spellfix1Init(0, db, pAux, argc, argv, ppVTab, pzErr); | |
1984 } | |
1985 static int spellfix1Create( | |
1986 sqlite3 *db, | |
1987 void *pAux, | |
1988 int argc, const char *const*argv, | |
1989 sqlite3_vtab **ppVTab, | |
1990 char **pzErr | |
1991 ){ | |
1992 return spellfix1Init(1, db, pAux, argc, argv, ppVTab, pzErr); | |
1993 } | |
1994 | |
1995 /* | |
1996 ** Clear all of the content from a cursor. | |
1997 */ | |
1998 static void spellfix1ResetCursor(spellfix1_cursor *pCur){ | |
1999 int i; | |
2000 for(i=0; i<pCur->nRow; i++){ | |
2001 sqlite3_free(pCur->a[i].zWord); | |
2002 } | |
2003 pCur->nRow = 0; | |
2004 pCur->iRow = 0; | |
2005 pCur->nSearch = 0; | |
2006 if( pCur->pFullScan ){ | |
2007 sqlite3_finalize(pCur->pFullScan); | |
2008 pCur->pFullScan = 0; | |
2009 } | |
2010 } | |
2011 | |
2012 /* | |
2013 ** Resize the cursor to hold up to N rows of content | |
2014 */ | |
2015 static void spellfix1ResizeCursor(spellfix1_cursor *pCur, int N){ | |
2016 struct spellfix1_row *aNew; | |
2017 assert( N>=pCur->nRow ); | |
2018 aNew = sqlite3_realloc(pCur->a, sizeof(pCur->a[0])*N); | |
2019 if( aNew==0 && N>0 ){ | |
2020 spellfix1ResetCursor(pCur); | |
2021 sqlite3_free(pCur->a); | |
2022 pCur->nAlloc = 0; | |
2023 pCur->a = 0; | |
2024 }else{ | |
2025 pCur->nAlloc = N; | |
2026 pCur->a = aNew; | |
2027 } | |
2028 } | |
2029 | |
2030 | |
2031 /* | |
2032 ** Close a fuzzy-search cursor. | |
2033 */ | |
2034 static int spellfix1Close(sqlite3_vtab_cursor *cur){ | |
2035 spellfix1_cursor *pCur = (spellfix1_cursor *)cur; | |
2036 spellfix1ResetCursor(pCur); | |
2037 spellfix1ResizeCursor(pCur, 0); | |
2038 sqlite3_free(pCur->zPattern); | |
2039 sqlite3_free(pCur); | |
2040 return SQLITE_OK; | |
2041 } | |
2042 | |
2043 /* | |
2044 ** Search for terms of these forms: | |
2045 ** | |
2046 ** (A) word MATCH $str | |
2047 ** (B) langid == $langid | |
2048 ** (C) top = $top | |
2049 ** (D) scope = $scope | |
2050 ** (E) distance < $distance | |
2051 ** (F) distance <= $distance | |
2052 ** (G) rowid = $rowid | |
2053 ** | |
2054 ** The plan number is a bit mask formed with these bits: | |
2055 ** | |
2056 ** 0x01 (A) is found | |
2057 ** 0x02 (B) is found | |
2058 ** 0x04 (C) is found | |
2059 ** 0x08 (D) is found | |
2060 ** 0x10 (E) is found | |
2061 ** 0x20 (F) is found | |
2062 ** 0x40 (G) is found | |
2063 ** | |
2064 ** filter.argv[*] values contains $str, $langid, $top, $scope and $rowid | |
2065 ** if specified and in that order. | |
2066 */ | |
2067 static int spellfix1BestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ | |
2068 int iPlan = 0; | |
2069 int iLangTerm = -1; | |
2070 int iTopTerm = -1; | |
2071 int iScopeTerm = -1; | |
2072 int iDistTerm = -1; | |
2073 int iRowidTerm = -1; | |
2074 int i; | |
2075 const struct sqlite3_index_constraint *pConstraint; | |
2076 pConstraint = pIdxInfo->aConstraint; | |
2077 for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){ | |
2078 if( pConstraint->usable==0 ) continue; | |
2079 | |
2080 /* Terms of the form: word MATCH $str */ | |
2081 if( (iPlan & 1)==0 | |
2082 && pConstraint->iColumn==SPELLFIX_COL_WORD | |
2083 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH | |
2084 ){ | |
2085 iPlan |= 1; | |
2086 pIdxInfo->aConstraintUsage[i].argvIndex = 1; | |
2087 pIdxInfo->aConstraintUsage[i].omit = 1; | |
2088 } | |
2089 | |
2090 /* Terms of the form: langid = $langid */ | |
2091 if( (iPlan & 2)==0 | |
2092 && pConstraint->iColumn==SPELLFIX_COL_LANGID | |
2093 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ | |
2094 ){ | |
2095 iPlan |= 2; | |
2096 iLangTerm = i; | |
2097 } | |
2098 | |
2099 /* Terms of the form: top = $top */ | |
2100 if( (iPlan & 4)==0 | |
2101 && pConstraint->iColumn==SPELLFIX_COL_TOP | |
2102 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ | |
2103 ){ | |
2104 iPlan |= 4; | |
2105 iTopTerm = i; | |
2106 } | |
2107 | |
2108 /* Terms of the form: scope = $scope */ | |
2109 if( (iPlan & 8)==0 | |
2110 && pConstraint->iColumn==SPELLFIX_COL_SCOPE | |
2111 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ | |
2112 ){ | |
2113 iPlan |= 8; | |
2114 iScopeTerm = i; | |
2115 } | |
2116 | |
2117 /* Terms of the form: distance < $dist or distance <= $dist */ | |
2118 if( (iPlan & (16|32))==0 | |
2119 && pConstraint->iColumn==SPELLFIX_COL_DISTANCE | |
2120 && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT | |
2121 || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE) | |
2122 ){ | |
2123 iPlan |= pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT ? 16 : 32; | |
2124 iDistTerm = i; | |
2125 } | |
2126 | |
2127 /* Terms of the form: distance < $dist or distance <= $dist */ | |
2128 if( (iPlan & 64)==0 | |
2129 && pConstraint->iColumn<0 | |
2130 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ | |
2131 ){ | |
2132 iPlan |= 64; | |
2133 iRowidTerm = i; | |
2134 } | |
2135 } | |
2136 if( iPlan&1 ){ | |
2137 int idx = 2; | |
2138 pIdxInfo->idxNum = iPlan; | |
2139 if( pIdxInfo->nOrderBy==1 | |
2140 && pIdxInfo->aOrderBy[0].iColumn==SPELLFIX_COL_SCORE | |
2141 && pIdxInfo->aOrderBy[0].desc==0 | |
2142 ){ | |
2143 pIdxInfo->orderByConsumed = 1; /* Default order by iScore */ | |
2144 } | |
2145 if( iPlan&2 ){ | |
2146 pIdxInfo->aConstraintUsage[iLangTerm].argvIndex = idx++; | |
2147 pIdxInfo->aConstraintUsage[iLangTerm].omit = 1; | |
2148 } | |
2149 if( iPlan&4 ){ | |
2150 pIdxInfo->aConstraintUsage[iTopTerm].argvIndex = idx++; | |
2151 pIdxInfo->aConstraintUsage[iTopTerm].omit = 1; | |
2152 } | |
2153 if( iPlan&8 ){ | |
2154 pIdxInfo->aConstraintUsage[iScopeTerm].argvIndex = idx++; | |
2155 pIdxInfo->aConstraintUsage[iScopeTerm].omit = 1; | |
2156 } | |
2157 if( iPlan&(16|32) ){ | |
2158 pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = idx++; | |
2159 pIdxInfo->aConstraintUsage[iDistTerm].omit = 1; | |
2160 } | |
2161 pIdxInfo->estimatedCost = 1e5; | |
2162 }else if( (iPlan & 64) ){ | |
2163 pIdxInfo->idxNum = 64; | |
2164 pIdxInfo->aConstraintUsage[iRowidTerm].argvIndex = 1; | |
2165 pIdxInfo->aConstraintUsage[iRowidTerm].omit = 1; | |
2166 pIdxInfo->estimatedCost = 5; | |
2167 }else{ | |
2168 pIdxInfo->idxNum = 0; | |
2169 pIdxInfo->estimatedCost = 1e50; | |
2170 } | |
2171 return SQLITE_OK; | |
2172 } | |
2173 | |
2174 /* | |
2175 ** Open a new fuzzy-search cursor. | |
2176 */ | |
2177 static int spellfix1Open(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ | |
2178 spellfix1_vtab *p = (spellfix1_vtab*)pVTab; | |
2179 spellfix1_cursor *pCur; | |
2180 pCur = sqlite3_malloc( sizeof(*pCur) ); | |
2181 if( pCur==0 ) return SQLITE_NOMEM; | |
2182 memset(pCur, 0, sizeof(*pCur)); | |
2183 pCur->pVTab = p; | |
2184 *ppCursor = &pCur->base; | |
2185 return SQLITE_OK; | |
2186 } | |
2187 | |
2188 /* | |
2189 ** Adjust a distance measurement by the words rank in order to show | |
2190 ** preference to common words. | |
2191 */ | |
2192 static int spellfix1Score(int iDistance, int iRank){ | |
2193 int iLog2; | |
2194 for(iLog2=0; iRank>0; iLog2++, iRank>>=1){} | |
2195 return iDistance + 32 - iLog2; | |
2196 } | |
2197 | |
2198 /* | |
2199 ** Compare two spellfix1_row objects for sorting purposes in qsort() such | |
2200 ** that they sort in order of increasing distance. | |
2201 */ | |
2202 static int spellfix1RowCompare(const void *A, const void *B){ | |
2203 const struct spellfix1_row *a = (const struct spellfix1_row*)A; | |
2204 const struct spellfix1_row *b = (const struct spellfix1_row*)B; | |
2205 return a->iScore - b->iScore; | |
2206 } | |
2207 | |
2208 /* | |
2209 ** A structure used to pass information from spellfix1FilterForMatch() | |
2210 ** into spellfix1RunQuery(). | |
2211 */ | |
2212 typedef struct MatchQuery { | |
2213 spellfix1_cursor *pCur; /* The cursor being queried */ | |
2214 sqlite3_stmt *pStmt; /* shadow table query statment */ | |
2215 char zHash[SPELLFIX_MX_HASH]; /* The current phonehash for zPattern */ | |
2216 const char *zPattern; /* Transliterated input string */ | |
2217 int nPattern; /* Length of zPattern */ | |
2218 EditDist3FromString *pMatchStr3; /* Original unicode string */ | |
2219 EditDist3Config *pConfig3; /* Edit-distance cost coefficients */ | |
2220 const EditDist3Lang *pLang; /* The selected language coefficients */ | |
2221 int iLang; /* The language id */ | |
2222 int iScope; /* Default scope */ | |
2223 int iMaxDist; /* Maximum allowed edit distance, or -1 */ | |
2224 int rc; /* Error code */ | |
2225 int nRun; /* Number of prior runs for the same zPattern */ | |
2226 char azPrior[SPELLFIX_MX_RUN][SPELLFIX_MX_HASH]; /* Prior hashes */ | |
2227 } MatchQuery; | |
2228 | |
2229 /* | |
2230 ** Run a query looking for the best matches against zPattern using | |
2231 ** zHash as the character class seed hash. | |
2232 */ | |
2233 static void spellfix1RunQuery(MatchQuery *p, const char *zQuery, int nQuery){ | |
2234 const char *zK1; | |
2235 const char *zWord; | |
2236 int iDist; | |
2237 int iRank; | |
2238 int iScore; | |
2239 int iWorst = 0; | |
2240 int idx; | |
2241 int idxWorst = -1; | |
2242 int i; | |
2243 int iScope = p->iScope; | |
2244 spellfix1_cursor *pCur = p->pCur; | |
2245 sqlite3_stmt *pStmt = p->pStmt; | |
2246 char zHash1[SPELLFIX_MX_HASH]; | |
2247 char zHash2[SPELLFIX_MX_HASH]; | |
2248 char *zClass; | |
2249 int nClass; | |
2250 int rc; | |
2251 | |
2252 if( pCur->a==0 || p->rc ) return; /* Prior memory allocation failure */ | |
2253 zClass = (char*)phoneticHash((unsigned char*)zQuery, nQuery); | |
2254 if( zClass==0 ){ | |
2255 p->rc = SQLITE_NOMEM; | |
2256 return; | |
2257 } | |
2258 nClass = (int)strlen(zClass); | |
2259 if( nClass>SPELLFIX_MX_HASH-2 ){ | |
2260 nClass = SPELLFIX_MX_HASH-2; | |
2261 zClass[nClass] = 0; | |
2262 } | |
2263 if( nClass<=iScope ){ | |
2264 if( nClass>2 ){ | |
2265 iScope = nClass-1; | |
2266 }else{ | |
2267 iScope = nClass; | |
2268 } | |
2269 } | |
2270 memcpy(zHash1, zClass, iScope); | |
2271 sqlite3_free(zClass); | |
2272 zHash1[iScope] = 0; | |
2273 memcpy(zHash2, zHash1, iScope); | |
2274 zHash2[iScope] = 'Z'; | |
2275 zHash2[iScope+1] = 0; | |
2276 #if SPELLFIX_MX_RUN>1 | |
2277 for(i=0; i<p->nRun; i++){ | |
2278 if( strcmp(p->azPrior[i], zHash1)==0 ) return; | |
2279 } | |
2280 #endif | |
2281 assert( p->nRun<SPELLFIX_MX_RUN ); | |
2282 memcpy(p->azPrior[p->nRun++], zHash1, iScope+1); | |
2283 if( sqlite3_bind_text(pStmt, 1, zHash1, -1, SQLITE_STATIC)==SQLITE_NOMEM | |
2284 || sqlite3_bind_text(pStmt, 2, zHash2, -1, SQLITE_STATIC)==SQLITE_NOMEM | |
2285 ){ | |
2286 p->rc = SQLITE_NOMEM; | |
2287 return; | |
2288 } | |
2289 #if SPELLFIX_MX_RUN>1 | |
2290 for(i=0; i<pCur->nRow; i++){ | |
2291 if( pCur->a[i].iScore>iWorst ){ | |
2292 iWorst = pCur->a[i].iScore; | |
2293 idxWorst = i; | |
2294 } | |
2295 } | |
2296 #endif | |
2297 while( sqlite3_step(pStmt)==SQLITE_ROW ){ | |
2298 int iMatchlen = -1; | |
2299 iRank = sqlite3_column_int(pStmt, 2); | |
2300 if( p->pMatchStr3 ){ | |
2301 int nWord = sqlite3_column_bytes(pStmt, 1); | |
2302 zWord = (const char*)sqlite3_column_text(pStmt, 1); | |
2303 iDist = editDist3Core(p->pMatchStr3, zWord, nWord, p->pLang, &iMatchlen); | |
2304 }else{ | |
2305 zK1 = (const char*)sqlite3_column_text(pStmt, 3); | |
2306 if( zK1==0 ) continue; | |
2307 iDist = editdist1(p->zPattern, zK1, 0); | |
2308 } | |
2309 if( iDist<0 ){ | |
2310 p->rc = SQLITE_NOMEM; | |
2311 break; | |
2312 } | |
2313 pCur->nSearch++; | |
2314 iScore = spellfix1Score(iDist,iRank); | |
2315 if( p->iMaxDist>=0 ){ | |
2316 if( iDist>p->iMaxDist ) continue; | |
2317 if( pCur->nRow>=pCur->nAlloc-1 ){ | |
2318 spellfix1ResizeCursor(pCur, pCur->nAlloc*2 + 10); | |
2319 if( pCur->a==0 ) break; | |
2320 } | |
2321 idx = pCur->nRow; | |
2322 }else if( pCur->nRow<pCur->nAlloc ){ | |
2323 idx = pCur->nRow; | |
2324 }else if( iScore<iWorst ){ | |
2325 idx = idxWorst; | |
2326 sqlite3_free(pCur->a[idx].zWord); | |
2327 }else{ | |
2328 continue; | |
2329 } | |
2330 pCur->a[idx].zWord = sqlite3_mprintf("%s", sqlite3_column_text(pStmt, 1)); | |
2331 if( pCur->a[idx].zWord==0 ){ | |
2332 p->rc = SQLITE_NOMEM; | |
2333 break; | |
2334 } | |
2335 pCur->a[idx].iRowid = sqlite3_column_int64(pStmt, 0); | |
2336 pCur->a[idx].iRank = iRank; | |
2337 pCur->a[idx].iDistance = iDist; | |
2338 pCur->a[idx].iScore = iScore; | |
2339 pCur->a[idx].iMatchlen = iMatchlen; | |
2340 memcpy(pCur->a[idx].zHash, zHash1, iScope+1); | |
2341 if( pCur->nRow<pCur->nAlloc ) pCur->nRow++; | |
2342 if( pCur->nRow==pCur->nAlloc ){ | |
2343 iWorst = pCur->a[0].iScore; | |
2344 idxWorst = 0; | |
2345 for(i=1; i<pCur->nRow; i++){ | |
2346 iScore = pCur->a[i].iScore; | |
2347 if( iWorst<iScore ){ | |
2348 iWorst = iScore; | |
2349 idxWorst = i; | |
2350 } | |
2351 } | |
2352 } | |
2353 } | |
2354 rc = sqlite3_reset(pStmt); | |
2355 if( rc ) p->rc = rc; | |
2356 } | |
2357 | |
2358 /* | |
2359 ** This version of the xFilter method work if the MATCH term is present | |
2360 ** and we are doing a scan. | |
2361 */ | |
2362 static int spellfix1FilterForMatch( | |
2363 spellfix1_cursor *pCur, | |
2364 int idxNum, | |
2365 int argc, | |
2366 sqlite3_value **argv | |
2367 ){ | |
2368 const unsigned char *zMatchThis; /* RHS of the MATCH operator */ | |
2369 EditDist3FromString *pMatchStr3 = 0; /* zMatchThis as an editdist string */ | |
2370 char *zPattern; /* Transliteration of zMatchThis */ | |
2371 int nPattern; /* Length of zPattern */ | |
2372 int iLimit = 20; /* Max number of rows of output */ | |
2373 int iScope = 3; /* Use this many characters of zClass */ | |
2374 int iLang = 0; /* Language code */ | |
2375 char *zSql; /* SQL of shadow table query */ | |
2376 sqlite3_stmt *pStmt = 0; /* Shadow table query */ | |
2377 int rc; /* Result code */ | |
2378 int idx = 1; /* Next available filter parameter */ | |
2379 spellfix1_vtab *p = pCur->pVTab; /* The virtual table that owns pCur */ | |
2380 MatchQuery x; /* For passing info to RunQuery() */ | |
2381 | |
2382 /* Load the cost table if we have not already done so */ | |
2383 if( p->zCostTable!=0 && p->pConfig3==0 ){ | |
2384 p->pConfig3 = sqlite3_malloc( sizeof(p->pConfig3[0]) ); | |
2385 if( p->pConfig3==0 ) return SQLITE_NOMEM; | |
2386 memset(p->pConfig3, 0, sizeof(p->pConfig3[0])); | |
2387 rc = editDist3ConfigLoad(p->pConfig3, p->db, p->zCostTable); | |
2388 if( rc ) return rc; | |
2389 } | |
2390 memset(&x, 0, sizeof(x)); | |
2391 x.iScope = 3; /* Default scope if none specified by "WHERE scope=N" */ | |
2392 x.iMaxDist = -1; /* Maximum allowed edit distance */ | |
2393 | |
2394 if( idxNum&2 ){ | |
2395 iLang = sqlite3_value_int(argv[idx++]); | |
2396 } | |
2397 if( idxNum&4 ){ | |
2398 iLimit = sqlite3_value_int(argv[idx++]); | |
2399 if( iLimit<1 ) iLimit = 1; | |
2400 } | |
2401 if( idxNum&8 ){ | |
2402 x.iScope = sqlite3_value_int(argv[idx++]); | |
2403 if( x.iScope<1 ) x.iScope = 1; | |
2404 if( x.iScope>SPELLFIX_MX_HASH-2 ) x.iScope = SPELLFIX_MX_HASH-2; | |
2405 } | |
2406 if( idxNum&(16|32) ){ | |
2407 x.iMaxDist = sqlite3_value_int(argv[idx++]); | |
2408 if( idxNum&16 ) x.iMaxDist--; | |
2409 if( x.iMaxDist<0 ) x.iMaxDist = 0; | |
2410 } | |
2411 spellfix1ResetCursor(pCur); | |
2412 spellfix1ResizeCursor(pCur, iLimit); | |
2413 zMatchThis = sqlite3_value_text(argv[0]); | |
2414 if( zMatchThis==0 ) return SQLITE_OK; | |
2415 if( p->pConfig3 ){ | |
2416 x.pLang = editDist3FindLang(p->pConfig3, iLang); | |
2417 pMatchStr3 = editDist3FromStringNew(x.pLang, (const char*)zMatchThis, -1); | |
2418 if( pMatchStr3==0 ){ | |
2419 x.rc = SQLITE_NOMEM; | |
2420 goto filter_exit; | |
2421 } | |
2422 }else{ | |
2423 x.pLang = 0; | |
2424 } | |
2425 zPattern = (char*)transliterate(zMatchThis, sqlite3_value_bytes(argv[0])); | |
2426 sqlite3_free(pCur->zPattern); | |
2427 pCur->zPattern = zPattern; | |
2428 if( zPattern==0 ){ | |
2429 x.rc = SQLITE_NOMEM; | |
2430 goto filter_exit; | |
2431 } | |
2432 nPattern = (int)strlen(zPattern); | |
2433 if( zPattern[nPattern-1]=='*' ) nPattern--; | |
2434 zSql = sqlite3_mprintf( | |
2435 "SELECT id, word, rank, k1" | |
2436 " FROM \"%w\".\"%w_vocab\"" | |
2437 " WHERE langid=%d AND k2>=?1 AND k2<?2", | |
2438 p->zDbName, p->zTableName, iLang | |
2439 ); | |
2440 if( zSql==0 ){ | |
2441 x.rc = SQLITE_NOMEM; | |
2442 pStmt = 0; | |
2443 goto filter_exit; | |
2444 } | |
2445 rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); | |
2446 sqlite3_free(zSql); | |
2447 pCur->iLang = iLang; | |
2448 x.pCur = pCur; | |
2449 x.pStmt = pStmt; | |
2450 x.zPattern = zPattern; | |
2451 x.nPattern = nPattern; | |
2452 x.pMatchStr3 = pMatchStr3; | |
2453 x.iLang = iLang; | |
2454 x.rc = rc; | |
2455 x.pConfig3 = p->pConfig3; | |
2456 if( x.rc==SQLITE_OK ){ | |
2457 spellfix1RunQuery(&x, zPattern, nPattern); | |
2458 } | |
2459 | |
2460 if( pCur->a ){ | |
2461 qsort(pCur->a, pCur->nRow, sizeof(pCur->a[0]), spellfix1RowCompare); | |
2462 pCur->iTop = iLimit; | |
2463 pCur->iScope = iScope; | |
2464 }else{ | |
2465 x.rc = SQLITE_NOMEM; | |
2466 } | |
2467 | |
2468 filter_exit: | |
2469 sqlite3_finalize(pStmt); | |
2470 editDist3FromStringDelete(pMatchStr3); | |
2471 return x.rc; | |
2472 } | |
2473 | |
2474 /* | |
2475 ** This version of xFilter handles a full-table scan case | |
2476 */ | |
2477 static int spellfix1FilterForFullScan( | |
2478 spellfix1_cursor *pCur, | |
2479 int idxNum, | |
2480 int argc, | |
2481 sqlite3_value **argv | |
2482 ){ | |
2483 int rc = SQLITE_OK; | |
2484 char *zSql; | |
2485 spellfix1_vtab *pVTab = pCur->pVTab; | |
2486 spellfix1ResetCursor(pCur); | |
2487 assert( idxNum==0 || idxNum==64 ); | |
2488 zSql = sqlite3_mprintf( | |
2489 "SELECT word, rank, NULL, langid, id FROM \"%w\".\"%w_vocab\"%s", | |
2490 pVTab->zDbName, pVTab->zTableName, | |
2491 ((idxNum & 64) ? " WHERE rowid=?" : "") | |
2492 ); | |
2493 if( zSql==0 ) return SQLITE_NOMEM; | |
2494 rc = sqlite3_prepare_v2(pVTab->db, zSql, -1, &pCur->pFullScan, 0); | |
2495 sqlite3_free(zSql); | |
2496 if( rc==SQLITE_OK && (idxNum & 64) ){ | |
2497 assert( argc==1 ); | |
2498 rc = sqlite3_bind_value(pCur->pFullScan, 1, argv[0]); | |
2499 } | |
2500 pCur->nRow = pCur->iRow = 0; | |
2501 if( rc==SQLITE_OK ){ | |
2502 rc = sqlite3_step(pCur->pFullScan); | |
2503 if( rc==SQLITE_ROW ){ pCur->iRow = -1; rc = SQLITE_OK; } | |
2504 if( rc==SQLITE_DONE ){ rc = SQLITE_OK; } | |
2505 }else{ | |
2506 pCur->iRow = 0; | |
2507 } | |
2508 return rc; | |
2509 } | |
2510 | |
2511 | |
2512 /* | |
2513 ** Called to "rewind" a cursor back to the beginning so that | |
2514 ** it starts its output over again. Always called at least once | |
2515 ** prior to any spellfix1Column, spellfix1Rowid, or spellfix1Eof call. | |
2516 */ | |
2517 static int spellfix1Filter( | |
2518 sqlite3_vtab_cursor *cur, | |
2519 int idxNum, const char *idxStr, | |
2520 int argc, sqlite3_value **argv | |
2521 ){ | |
2522 spellfix1_cursor *pCur = (spellfix1_cursor *)cur; | |
2523 int rc; | |
2524 if( idxNum & 1 ){ | |
2525 rc = spellfix1FilterForMatch(pCur, idxNum, argc, argv); | |
2526 }else{ | |
2527 rc = spellfix1FilterForFullScan(pCur, idxNum, argc, argv); | |
2528 } | |
2529 return rc; | |
2530 } | |
2531 | |
2532 | |
2533 /* | |
2534 ** Advance a cursor to its next row of output | |
2535 */ | |
2536 static int spellfix1Next(sqlite3_vtab_cursor *cur){ | |
2537 spellfix1_cursor *pCur = (spellfix1_cursor *)cur; | |
2538 int rc = SQLITE_OK; | |
2539 if( pCur->iRow < pCur->nRow ){ | |
2540 if( pCur->pFullScan ){ | |
2541 rc = sqlite3_step(pCur->pFullScan); | |
2542 if( rc!=SQLITE_ROW ) pCur->iRow = pCur->nRow; | |
2543 if( rc==SQLITE_ROW || rc==SQLITE_DONE ) rc = SQLITE_OK; | |
2544 }else{ | |
2545 pCur->iRow++; | |
2546 } | |
2547 } | |
2548 return rc; | |
2549 } | |
2550 | |
2551 /* | |
2552 ** Return TRUE if we are at the end-of-file | |
2553 */ | |
2554 static int spellfix1Eof(sqlite3_vtab_cursor *cur){ | |
2555 spellfix1_cursor *pCur = (spellfix1_cursor *)cur; | |
2556 return pCur->iRow>=pCur->nRow; | |
2557 } | |
2558 | |
2559 /* | |
2560 ** Return columns from the current row. | |
2561 */ | |
2562 static int spellfix1Column( | |
2563 sqlite3_vtab_cursor *cur, | |
2564 sqlite3_context *ctx, | |
2565 int i | |
2566 ){ | |
2567 spellfix1_cursor *pCur = (spellfix1_cursor*)cur; | |
2568 if( pCur->pFullScan ){ | |
2569 if( i<=SPELLFIX_COL_LANGID ){ | |
2570 sqlite3_result_value(ctx, sqlite3_column_value(pCur->pFullScan, i)); | |
2571 }else{ | |
2572 sqlite3_result_null(ctx); | |
2573 } | |
2574 return SQLITE_OK; | |
2575 } | |
2576 switch( i ){ | |
2577 case SPELLFIX_COL_WORD: { | |
2578 sqlite3_result_text(ctx, pCur->a[pCur->iRow].zWord, -1, SQLITE_STATIC); | |
2579 break; | |
2580 } | |
2581 case SPELLFIX_COL_RANK: { | |
2582 sqlite3_result_int(ctx, pCur->a[pCur->iRow].iRank); | |
2583 break; | |
2584 } | |
2585 case SPELLFIX_COL_DISTANCE: { | |
2586 sqlite3_result_int(ctx, pCur->a[pCur->iRow].iDistance); | |
2587 break; | |
2588 } | |
2589 case SPELLFIX_COL_LANGID: { | |
2590 sqlite3_result_int(ctx, pCur->iLang); | |
2591 break; | |
2592 } | |
2593 case SPELLFIX_COL_SCORE: { | |
2594 sqlite3_result_int(ctx, pCur->a[pCur->iRow].iScore); | |
2595 break; | |
2596 } | |
2597 case SPELLFIX_COL_MATCHLEN: { | |
2598 int iMatchlen = pCur->a[pCur->iRow].iMatchlen; | |
2599 if( iMatchlen<0 ){ | |
2600 int nPattern = (int)strlen(pCur->zPattern); | |
2601 char *zWord = pCur->a[pCur->iRow].zWord; | |
2602 int nWord = (int)strlen(zWord); | |
2603 | |
2604 if( nPattern>0 && pCur->zPattern[nPattern-1]=='*' ){ | |
2605 char *zTranslit; | |
2606 int res; | |
2607 zTranslit = (char *)transliterate((unsigned char *)zWord, nWord); | |
2608 if( !zTranslit ) return SQLITE_NOMEM; | |
2609 res = editdist1(pCur->zPattern, zTranslit, &iMatchlen); | |
2610 sqlite3_free(zTranslit); | |
2611 if( res<0 ) return SQLITE_NOMEM; | |
2612 iMatchlen = translen_to_charlen(zWord, nWord, iMatchlen); | |
2613 }else{ | |
2614 iMatchlen = utf8Charlen(zWord, nWord); | |
2615 } | |
2616 } | |
2617 | |
2618 sqlite3_result_int(ctx, iMatchlen); | |
2619 break; | |
2620 } | |
2621 case SPELLFIX_COL_PHONEHASH: { | |
2622 sqlite3_result_text(ctx, pCur->a[pCur->iRow].zHash, -1, SQLITE_STATIC); | |
2623 break; | |
2624 } | |
2625 case SPELLFIX_COL_TOP: { | |
2626 sqlite3_result_int(ctx, pCur->iTop); | |
2627 break; | |
2628 } | |
2629 case SPELLFIX_COL_SCOPE: { | |
2630 sqlite3_result_int(ctx, pCur->iScope); | |
2631 break; | |
2632 } | |
2633 case SPELLFIX_COL_SRCHCNT: { | |
2634 sqlite3_result_int(ctx, pCur->nSearch); | |
2635 break; | |
2636 } | |
2637 default: { | |
2638 sqlite3_result_null(ctx); | |
2639 break; | |
2640 } | |
2641 } | |
2642 return SQLITE_OK; | |
2643 } | |
2644 | |
2645 /* | |
2646 ** The rowid. | |
2647 */ | |
2648 static int spellfix1Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ | |
2649 spellfix1_cursor *pCur = (spellfix1_cursor*)cur; | |
2650 if( pCur->pFullScan ){ | |
2651 *pRowid = sqlite3_column_int64(pCur->pFullScan, 4); | |
2652 }else{ | |
2653 *pRowid = pCur->a[pCur->iRow].iRowid; | |
2654 } | |
2655 return SQLITE_OK; | |
2656 } | |
2657 | |
2658 /* | |
2659 ** The xUpdate() method. | |
2660 */ | |
2661 static int spellfix1Update( | |
2662 sqlite3_vtab *pVTab, | |
2663 int argc, | |
2664 sqlite3_value **argv, | |
2665 sqlite_int64 *pRowid | |
2666 ){ | |
2667 int rc = SQLITE_OK; | |
2668 sqlite3_int64 rowid, newRowid; | |
2669 spellfix1_vtab *p = (spellfix1_vtab*)pVTab; | |
2670 sqlite3 *db = p->db; | |
2671 | |
2672 if( argc==1 ){ | |
2673 /* A delete operation on the rowid given by argv[0] */ | |
2674 rowid = *pRowid = sqlite3_value_int64(argv[0]); | |
2675 spellfix1DbExec(&rc, db, "DELETE FROM \"%w\".\"%w_vocab\" " | |
2676 " WHERE id=%lld", | |
2677 p->zDbName, p->zTableName, rowid); | |
2678 }else{ | |
2679 const unsigned char *zWord = sqlite3_value_text(argv[SPELLFIX_COL_WORD+2]); | |
2680 int nWord = sqlite3_value_bytes(argv[SPELLFIX_COL_WORD+2]); | |
2681 int iLang = sqlite3_value_int(argv[SPELLFIX_COL_LANGID+2]); | |
2682 int iRank = sqlite3_value_int(argv[SPELLFIX_COL_RANK+2]); | |
2683 const unsigned char *zSoundslike = | |
2684 sqlite3_value_text(argv[SPELLFIX_COL_SOUNDSLIKE+2]); | |
2685 int nSoundslike = sqlite3_value_bytes(argv[SPELLFIX_COL_SOUNDSLIKE+2]); | |
2686 char *zK1, *zK2; | |
2687 int i; | |
2688 char c; | |
2689 | |
2690 if( zWord==0 ){ | |
2691 /* Inserts of the form: INSERT INTO table(command) VALUES('xyzzy'); | |
2692 ** cause zWord to be NULL, so we look at the "command" column to see | |
2693 ** what special actions to take */ | |
2694 const char *zCmd = | |
2695 (const char*)sqlite3_value_text(argv[SPELLFIX_COL_COMMAND+2]); | |
2696 if( zCmd==0 ){ | |
2697 pVTab->zErrMsg = sqlite3_mprintf("NOT NULL constraint failed: %s.word", | |
2698 p->zTableName); | |
2699 return SQLITE_CONSTRAINT_NOTNULL; | |
2700 } | |
2701 if( strcmp(zCmd,"reset")==0 ){ | |
2702 /* Reset the edit cost table (if there is one). */ | |
2703 editDist3ConfigDelete(p->pConfig3); | |
2704 p->pConfig3 = 0; | |
2705 return SQLITE_OK; | |
2706 } | |
2707 if( strncmp(zCmd,"edit_cost_table=",16)==0 ){ | |
2708 editDist3ConfigDelete(p->pConfig3); | |
2709 p->pConfig3 = 0; | |
2710 sqlite3_free(p->zCostTable); | |
2711 p->zCostTable = spellfix1Dequote(zCmd+16); | |
2712 if( p->zCostTable==0 ) return SQLITE_NOMEM; | |
2713 if( p->zCostTable[0]==0 || sqlite3_stricmp(p->zCostTable,"null")==0 ){ | |
2714 sqlite3_free(p->zCostTable); | |
2715 p->zCostTable = 0; | |
2716 } | |
2717 return SQLITE_OK; | |
2718 } | |
2719 pVTab->zErrMsg = sqlite3_mprintf("unknown value for %s.command: \"%w\"", | |
2720 p->zTableName, zCmd); | |
2721 return SQLITE_ERROR; | |
2722 } | |
2723 if( iRank<1 ) iRank = 1; | |
2724 if( zSoundslike ){ | |
2725 zK1 = (char*)transliterate(zSoundslike, nSoundslike); | |
2726 }else{ | |
2727 zK1 = (char*)transliterate(zWord, nWord); | |
2728 } | |
2729 if( zK1==0 ) return SQLITE_NOMEM; | |
2730 for(i=0; (c = zK1[i])!=0; i++){ | |
2731 if( c>='A' && c<='Z' ) zK1[i] += 'a' - 'A'; | |
2732 } | |
2733 zK2 = (char*)phoneticHash((const unsigned char*)zK1, i); | |
2734 if( zK2==0 ){ | |
2735 sqlite3_free(zK1); | |
2736 return SQLITE_NOMEM; | |
2737 } | |
2738 if( sqlite3_value_type(argv[0])==SQLITE_NULL ){ | |
2739 if( sqlite3_value_type(argv[1])==SQLITE_NULL ){ | |
2740 spellfix1DbExec(&rc, db, | |
2741 "INSERT INTO \"%w\".\"%w_vocab\"(rank,langid,word,k1,k2) " | |
2742 "VALUES(%d,%d,%Q,%Q,%Q)", | |
2743 p->zDbName, p->zTableName, | |
2744 iRank, iLang, zWord, zK1, zK2 | |
2745 ); | |
2746 }else{ | |
2747 newRowid = sqlite3_value_int64(argv[1]); | |
2748 spellfix1DbExec(&rc, db, | |
2749 "INSERT INTO \"%w\".\"%w_vocab\"(id,rank,langid,word,k1,k2) " | |
2750 "VALUES(%lld,%d,%d,%Q,%Q,%Q)", | |
2751 p->zDbName, p->zTableName, | |
2752 newRowid, iRank, iLang, zWord, zK1, zK2 | |
2753 ); | |
2754 } | |
2755 *pRowid = sqlite3_last_insert_rowid(db); | |
2756 }else{ | |
2757 rowid = sqlite3_value_int64(argv[0]); | |
2758 newRowid = *pRowid = sqlite3_value_int64(argv[1]); | |
2759 spellfix1DbExec(&rc, db, | |
2760 "UPDATE \"%w\".\"%w_vocab\" SET id=%lld, rank=%d, langid=%d," | |
2761 " word=%Q, k1=%Q, k2=%Q WHERE id=%lld", | |
2762 p->zDbName, p->zTableName, newRowid, iRank, iLang, | |
2763 zWord, zK1, zK2, rowid | |
2764 ); | |
2765 } | |
2766 sqlite3_free(zK1); | |
2767 sqlite3_free(zK2); | |
2768 } | |
2769 return rc; | |
2770 } | |
2771 | |
2772 /* | |
2773 ** Rename the spellfix1 table. | |
2774 */ | |
2775 static int spellfix1Rename(sqlite3_vtab *pVTab, const char *zNew){ | |
2776 spellfix1_vtab *p = (spellfix1_vtab*)pVTab; | |
2777 sqlite3 *db = p->db; | |
2778 int rc = SQLITE_OK; | |
2779 char *zNewName = sqlite3_mprintf("%s", zNew); | |
2780 if( zNewName==0 ){ | |
2781 return SQLITE_NOMEM; | |
2782 } | |
2783 spellfix1DbExec(&rc, db, | |
2784 "ALTER TABLE \"%w\".\"%w_vocab\" RENAME TO \"%w_vocab\"", | |
2785 p->zDbName, p->zTableName, zNewName | |
2786 ); | |
2787 if( rc==SQLITE_OK ){ | |
2788 sqlite3_free(p->zTableName); | |
2789 p->zTableName = zNewName; | |
2790 }else{ | |
2791 sqlite3_free(zNewName); | |
2792 } | |
2793 return rc; | |
2794 } | |
2795 | |
2796 | |
2797 /* | |
2798 ** A virtual table module that provides fuzzy search. | |
2799 */ | |
2800 static sqlite3_module spellfix1Module = { | |
2801 0, /* iVersion */ | |
2802 spellfix1Create, /* xCreate - handle CREATE VIRTUAL TABLE */ | |
2803 spellfix1Connect, /* xConnect - reconnected to an existing table */ | |
2804 spellfix1BestIndex, /* xBestIndex - figure out how to do a query */ | |
2805 spellfix1Disconnect, /* xDisconnect - close a connection */ | |
2806 spellfix1Destroy, /* xDestroy - handle DROP TABLE */ | |
2807 spellfix1Open, /* xOpen - open a cursor */ | |
2808 spellfix1Close, /* xClose - close a cursor */ | |
2809 spellfix1Filter, /* xFilter - configure scan constraints */ | |
2810 spellfix1Next, /* xNext - advance a cursor */ | |
2811 spellfix1Eof, /* xEof - check for end of scan */ | |
2812 spellfix1Column, /* xColumn - read data */ | |
2813 spellfix1Rowid, /* xRowid - read data */ | |
2814 spellfix1Update, /* xUpdate */ | |
2815 0, /* xBegin */ | |
2816 0, /* xSync */ | |
2817 0, /* xCommit */ | |
2818 0, /* xRollback */ | |
2819 0, /* xFindMethod */ | |
2820 spellfix1Rename, /* xRename */ | |
2821 }; | |
2822 | |
2823 /* | |
2824 ** Register the various functions and the virtual table. | |
2825 */ | |
2826 static int spellfix1Register(sqlite3 *db){ | |
2827 int rc = SQLITE_OK; | |
2828 int i; | |
2829 rc = sqlite3_create_function(db, "spellfix1_translit", 1, SQLITE_UTF8, 0, | |
2830 transliterateSqlFunc, 0, 0); | |
2831 if( rc==SQLITE_OK ){ | |
2832 rc = sqlite3_create_function(db, "spellfix1_editdist", 2, SQLITE_UTF8, 0, | |
2833 editdistSqlFunc, 0, 0); | |
2834 } | |
2835 if( rc==SQLITE_OK ){ | |
2836 rc = sqlite3_create_function(db, "spellfix1_phonehash", 1, SQLITE_UTF8, 0, | |
2837 phoneticHashSqlFunc, 0, 0); | |
2838 } | |
2839 if( rc==SQLITE_OK ){ | |
2840 rc = sqlite3_create_function(db, "spellfix1_scriptcode", 1, SQLITE_UTF8, 0, | |
2841 scriptCodeSqlFunc, 0, 0); | |
2842 } | |
2843 if( rc==SQLITE_OK ){ | |
2844 rc = sqlite3_create_module(db, "spellfix1", &spellfix1Module, 0); | |
2845 } | |
2846 if( rc==SQLITE_OK ){ | |
2847 rc = editDist3Install(db); | |
2848 } | |
2849 | |
2850 /* Verify sanity of the translit[] table */ | |
2851 for(i=0; i<sizeof(translit)/sizeof(translit[0])-1; i++){ | |
2852 assert( translit[i].cFrom<translit[i+1].cFrom ); | |
2853 } | |
2854 | |
2855 return rc; | |
2856 } | |
2857 | |
2858 #endif /* SQLITE_OMIT_VIRTUALTABLE */ | |
2859 | |
2860 /* | |
2861 ** Extension load function. | |
2862 */ | |
2863 #ifdef _WIN32 | |
2864 __declspec(dllexport) | |
2865 #endif | |
2866 int sqlite3_spellfix_init( | |
2867 sqlite3 *db, | |
2868 char **pzErrMsg, | |
2869 const sqlite3_api_routines *pApi | |
2870 ){ | |
2871 SQLITE_EXTENSION_INIT2(pApi); | |
2872 #ifndef SQLITE_OMIT_VIRTUALTABLE | |
2873 return spellfix1Register(db); | |
2874 #endif | |
2875 return SQLITE_OK; | |
2876 } | |
OLD | NEW |