OLD | NEW |
1 /* | 1 /* |
2 ** 2012 May 25 | 2 ** 2012 May 25 |
3 ** | 3 ** |
4 ** The author disclaims copyright to this source code. In place of | 4 ** The author disclaims copyright to this source code. In place of |
5 ** a legal notice, here is a blessing: | 5 ** a legal notice, here is a blessing: |
6 ** | 6 ** |
7 ** May you do good and not evil. | 7 ** May you do good and not evil. |
8 ** May you find forgiveness for yourself and forgive others. | 8 ** May you find forgiveness for yourself and forgive others. |
9 ** May you share freely, never taking more than you give. | 9 ** May you share freely, never taking more than you give. |
10 ** | 10 ** |
11 ****************************************************************************** | 11 ****************************************************************************** |
12 */ | 12 */ |
13 | 13 |
14 /* | 14 /* |
15 ** DO NOT EDIT THIS MACHINE GENERATED FILE. | 15 ** DO NOT EDIT THIS MACHINE GENERATED FILE. |
16 */ | 16 */ |
17 | 17 |
18 #ifndef SQLITE_DISABLE_FTS3_UNICODE | |
19 #if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) | |
20 | 18 |
21 #include <assert.h> | 19 #include <assert.h> |
22 | 20 |
23 /* | 21 /* |
24 ** Return true if the argument corresponds to a unicode codepoint | 22 ** Return true if the argument corresponds to a unicode codepoint |
25 ** classified as either a letter or a number. Otherwise false. | 23 ** classified as either a letter or a number. Otherwise false. |
26 ** | 24 ** |
27 ** The results are undefined if the value passed to this function | 25 ** The results are undefined if the value passed to this function |
28 ** is less than zero. | 26 ** is less than zero. |
29 */ | 27 */ |
30 int sqlite3FtsUnicodeIsalnum(int c){ | 28 int sqlite3Fts5UnicodeIsalnum(int c){ |
31 /* Each unsigned integer in the following array corresponds to a contiguous | 29 /* Each unsigned integer in the following array corresponds to a contiguous |
32 ** range of unicode codepoints that are not either letters or numbers (i.e. | 30 ** range of unicode codepoints that are not either letters or numbers (i.e. |
33 ** codepoints for which this function should return 0). | 31 ** codepoints for which this function should return 0). |
34 ** | 32 ** |
35 ** The most significant 22 bits in each 32-bit value contain the first | 33 ** The most significant 22 bits in each 32-bit value contain the first |
36 ** codepoint in the range. The least significant 10 bits are used to store | 34 ** codepoint in the range. The least significant 10 bits are used to store |
37 ** the size of the range (always at least 1). In other words, the value | 35 ** the size of the range (always at least 1). In other words, the value |
38 ** ((C<<22) + N) represents a range of N codepoints starting with codepoint | 36 ** ((C<<22) + N) represents a range of N codepoints starting with codepoint |
39 ** C. It is not possible to represent a range larger than 1023 codepoints | 37 ** C. It is not possible to represent a range larger than 1023 codepoints |
40 ** using this format. | 38 ** using this format. |
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
152 | 150 |
153 | 151 |
154 /* | 152 /* |
155 ** If the argument is a codepoint corresponding to a lowercase letter | 153 ** If the argument is a codepoint corresponding to a lowercase letter |
156 ** in the ASCII range with a diacritic added, return the codepoint | 154 ** in the ASCII range with a diacritic added, return the codepoint |
157 ** of the ASCII letter only. For example, if passed 235 - "LATIN | 155 ** of the ASCII letter only. For example, if passed 235 - "LATIN |
158 ** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER | 156 ** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER |
159 ** E"). The resuls of passing a codepoint that corresponds to an | 157 ** E"). The resuls of passing a codepoint that corresponds to an |
160 ** uppercase letter are undefined. | 158 ** uppercase letter are undefined. |
161 */ | 159 */ |
162 static int remove_diacritic(int c){ | 160 static int fts5_remove_diacritic(int c){ |
163 unsigned short aDia[] = { | 161 unsigned short aDia[] = { |
164 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, | 162 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, |
165 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, | 163 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, |
166 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, | 164 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, |
167 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, | 165 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, |
168 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928, | 166 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928, |
169 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234, | 167 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234, |
170 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504, | 168 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504, |
171 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529, | 169 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529, |
172 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726, | 170 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726, |
(...skipping 29 matching lines...) Expand all Loading... |
202 } | 200 } |
203 assert( key>=aDia[iRes] ); | 201 assert( key>=aDia[iRes] ); |
204 return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]); | 202 return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]); |
205 } | 203 } |
206 | 204 |
207 | 205 |
208 /* | 206 /* |
209 ** Return true if the argument interpreted as a unicode codepoint | 207 ** Return true if the argument interpreted as a unicode codepoint |
210 ** is a diacritical modifier character. | 208 ** is a diacritical modifier character. |
211 */ | 209 */ |
212 int sqlite3FtsUnicodeIsdiacritic(int c){ | 210 int sqlite3Fts5UnicodeIsdiacritic(int c){ |
213 unsigned int mask0 = 0x08029FDF; | 211 unsigned int mask0 = 0x08029FDF; |
214 unsigned int mask1 = 0x000361F8; | 212 unsigned int mask1 = 0x000361F8; |
215 if( c<768 || c>817 ) return 0; | 213 if( c<768 || c>817 ) return 0; |
216 return (c < 768+32) ? | 214 return (c < 768+32) ? |
217 (mask0 & (1 << (c-768))) : | 215 (mask0 & (1 << (c-768))) : |
218 (mask1 & (1 << (c-768-32))); | 216 (mask1 & (1 << (c-768-32))); |
219 } | 217 } |
220 | 218 |
221 | 219 |
222 /* | 220 /* |
223 ** Interpret the argument as a unicode codepoint. If the codepoint | 221 ** Interpret the argument as a unicode codepoint. If the codepoint |
224 ** is an upper case character that has a lower case equivalent, | 222 ** is an upper case character that has a lower case equivalent, |
225 ** return the codepoint corresponding to the lower case version. | 223 ** return the codepoint corresponding to the lower case version. |
226 ** Otherwise, return a copy of the argument. | 224 ** Otherwise, return a copy of the argument. |
227 ** | 225 ** |
228 ** The results are undefined if the value passed to this function | 226 ** The results are undefined if the value passed to this function |
229 ** is less than zero. | 227 ** is less than zero. |
230 */ | 228 */ |
231 int sqlite3FtsUnicodeFold(int c, int bRemoveDiacritic){ | 229 int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic){ |
232 /* Each entry in the following array defines a rule for folding a range | 230 /* Each entry in the following array defines a rule for folding a range |
233 ** of codepoints to lower case. The rule applies to a range of nRange | 231 ** of codepoints to lower case. The rule applies to a range of nRange |
234 ** codepoints starting at codepoint iCode. | 232 ** codepoints starting at codepoint iCode. |
235 ** | 233 ** |
236 ** If the least significant bit in flags is clear, then the rule applies | 234 ** If the least significant bit in flags is clear, then the rule applies |
237 ** to all nRange codepoints (i.e. all nRange codepoints are upper case and | 235 ** to all nRange codepoints (i.e. all nRange codepoints are upper case and |
238 ** need to be folded). Or, if it is set, then the rule only applies to | 236 ** need to be folded). Or, if it is set, then the rule only applies to |
239 ** every second codepoint in the range, starting with codepoint C. | 237 ** every second codepoint in the range, starting with codepoint C. |
240 ** | 238 ** |
241 ** The 7 most significant bits in flags are an index into the aiOff[] | 239 ** The 7 most significant bits in flags are an index into the aiOff[] |
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
315 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721, | 313 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721, |
316 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, | 314 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, |
317 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, | 315 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, |
318 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, | 316 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, |
319 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, | 317 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, |
320 65514, 65521, 65527, 65528, 65529, | 318 65514, 65521, 65527, 65528, 65529, |
321 }; | 319 }; |
322 | 320 |
323 int ret = c; | 321 int ret = c; |
324 | 322 |
325 assert( c>=0 ); | |
326 assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); | 323 assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); |
327 | 324 |
328 if( c<128 ){ | 325 if( c<128 ){ |
329 if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); | 326 if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); |
330 }else if( c<65536 ){ | 327 }else if( c<65536 ){ |
| 328 const struct TableEntry *p; |
331 int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; | 329 int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; |
332 int iLo = 0; | 330 int iLo = 0; |
333 int iRes = -1; | 331 int iRes = -1; |
334 | 332 |
| 333 assert( c>aEntry[0].iCode ); |
335 while( iHi>=iLo ){ | 334 while( iHi>=iLo ){ |
336 int iTest = (iHi + iLo) / 2; | 335 int iTest = (iHi + iLo) / 2; |
337 int cmp = (c - aEntry[iTest].iCode); | 336 int cmp = (c - aEntry[iTest].iCode); |
338 if( cmp>=0 ){ | 337 if( cmp>=0 ){ |
339 iRes = iTest; | 338 iRes = iTest; |
340 iLo = iTest+1; | 339 iLo = iTest+1; |
341 }else{ | 340 }else{ |
342 iHi = iTest-1; | 341 iHi = iTest-1; |
343 } | 342 } |
344 } | 343 } |
345 assert( iRes<0 || c>=aEntry[iRes].iCode ); | |
346 | 344 |
347 if( iRes>=0 ){ | 345 assert( iRes>=0 && c>=aEntry[iRes].iCode ); |
348 const struct TableEntry *p = &aEntry[iRes]; | 346 p = &aEntry[iRes]; |
349 if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ | 347 if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ |
350 ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; | 348 ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; |
351 assert( ret>0 ); | 349 assert( ret>0 ); |
352 } | |
353 } | 350 } |
354 | 351 |
355 if( bRemoveDiacritic ) ret = remove_diacritic(ret); | 352 if( bRemoveDiacritic ) ret = fts5_remove_diacritic(ret); |
356 } | 353 } |
357 | 354 |
358 else if( c>=66560 && c<66600 ){ | 355 else if( c>=66560 && c<66600 ){ |
359 ret = c + 40; | 356 ret = c + 40; |
360 } | 357 } |
361 | 358 |
362 return ret; | 359 return ret; |
363 } | 360 } |
364 #endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */ | |
365 #endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */ | |
OLD | NEW |