Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(377)

Side by Side Diff: third_party/sqlite/sqlite-src-3100200/ext/fts5/fts5_unicode2.c

Issue 1610543003: [sql] Import reference version of SQLite 3.10.2. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 ** 2012 May 25 2 ** 2012 May 25
3 ** 3 **
4 ** The author disclaims copyright to this source code. In place of 4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing: 5 ** a legal notice, here is a blessing:
6 ** 6 **
7 ** May you do good and not evil. 7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others. 8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give. 9 ** May you share freely, never taking more than you give.
10 ** 10 **
11 ****************************************************************************** 11 ******************************************************************************
12 */ 12 */
13 13
14 /* 14 /*
15 ** DO NOT EDIT THIS MACHINE GENERATED FILE. 15 ** DO NOT EDIT THIS MACHINE GENERATED FILE.
16 */ 16 */
17 17
18 #ifndef SQLITE_DISABLE_FTS3_UNICODE
19 #if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
20 18
21 #include <assert.h> 19 #include <assert.h>
22 20
23 /* 21 /*
24 ** Return true if the argument corresponds to a unicode codepoint 22 ** Return true if the argument corresponds to a unicode codepoint
25 ** classified as either a letter or a number. Otherwise false. 23 ** classified as either a letter or a number. Otherwise false.
26 ** 24 **
27 ** The results are undefined if the value passed to this function 25 ** The results are undefined if the value passed to this function
28 ** is less than zero. 26 ** is less than zero.
29 */ 27 */
30 int sqlite3FtsUnicodeIsalnum(int c){ 28 int sqlite3Fts5UnicodeIsalnum(int c){
31 /* Each unsigned integer in the following array corresponds to a contiguous 29 /* Each unsigned integer in the following array corresponds to a contiguous
32 ** range of unicode codepoints that are not either letters or numbers (i.e. 30 ** range of unicode codepoints that are not either letters or numbers (i.e.
33 ** codepoints for which this function should return 0). 31 ** codepoints for which this function should return 0).
34 ** 32 **
35 ** The most significant 22 bits in each 32-bit value contain the first 33 ** The most significant 22 bits in each 32-bit value contain the first
36 ** codepoint in the range. The least significant 10 bits are used to store 34 ** codepoint in the range. The least significant 10 bits are used to store
37 ** the size of the range (always at least 1). In other words, the value 35 ** the size of the range (always at least 1). In other words, the value
38 ** ((C<<22) + N) represents a range of N codepoints starting with codepoint 36 ** ((C<<22) + N) represents a range of N codepoints starting with codepoint
39 ** C. It is not possible to represent a range larger than 1023 codepoints 37 ** C. It is not possible to represent a range larger than 1023 codepoints
40 ** using this format. 38 ** using this format.
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after
152 150
153 151
154 /* 152 /*
155 ** If the argument is a codepoint corresponding to a lowercase letter 153 ** If the argument is a codepoint corresponding to a lowercase letter
156 ** in the ASCII range with a diacritic added, return the codepoint 154 ** in the ASCII range with a diacritic added, return the codepoint
157 ** of the ASCII letter only. For example, if passed 235 - "LATIN 155 ** of the ASCII letter only. For example, if passed 235 - "LATIN
158 ** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER 156 ** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
159 ** E"). The resuls of passing a codepoint that corresponds to an 157 ** E"). The resuls of passing a codepoint that corresponds to an
160 ** uppercase letter are undefined. 158 ** uppercase letter are undefined.
161 */ 159 */
162 static int remove_diacritic(int c){ 160 static int fts5_remove_diacritic(int c){
163 unsigned short aDia[] = { 161 unsigned short aDia[] = {
164 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, 162 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995,
165 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, 163 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286,
166 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, 164 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732,
167 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, 165 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336,
168 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928, 166 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928,
169 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234, 167 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234,
170 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504, 168 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504,
171 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529, 169 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529,
172 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726, 170 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726,
(...skipping 29 matching lines...) Expand all
202 } 200 }
203 assert( key>=aDia[iRes] ); 201 assert( key>=aDia[iRes] );
204 return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]); 202 return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);
205 } 203 }
206 204
207 205
208 /* 206 /*
209 ** Return true if the argument interpreted as a unicode codepoint 207 ** Return true if the argument interpreted as a unicode codepoint
210 ** is a diacritical modifier character. 208 ** is a diacritical modifier character.
211 */ 209 */
212 int sqlite3FtsUnicodeIsdiacritic(int c){ 210 int sqlite3Fts5UnicodeIsdiacritic(int c){
213 unsigned int mask0 = 0x08029FDF; 211 unsigned int mask0 = 0x08029FDF;
214 unsigned int mask1 = 0x000361F8; 212 unsigned int mask1 = 0x000361F8;
215 if( c<768 || c>817 ) return 0; 213 if( c<768 || c>817 ) return 0;
216 return (c < 768+32) ? 214 return (c < 768+32) ?
217 (mask0 & (1 << (c-768))) : 215 (mask0 & (1 << (c-768))) :
218 (mask1 & (1 << (c-768-32))); 216 (mask1 & (1 << (c-768-32)));
219 } 217 }
220 218
221 219
222 /* 220 /*
223 ** Interpret the argument as a unicode codepoint. If the codepoint 221 ** Interpret the argument as a unicode codepoint. If the codepoint
224 ** is an upper case character that has a lower case equivalent, 222 ** is an upper case character that has a lower case equivalent,
225 ** return the codepoint corresponding to the lower case version. 223 ** return the codepoint corresponding to the lower case version.
226 ** Otherwise, return a copy of the argument. 224 ** Otherwise, return a copy of the argument.
227 ** 225 **
228 ** The results are undefined if the value passed to this function 226 ** The results are undefined if the value passed to this function
229 ** is less than zero. 227 ** is less than zero.
230 */ 228 */
231 int sqlite3FtsUnicodeFold(int c, int bRemoveDiacritic){ 229 int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic){
232 /* Each entry in the following array defines a rule for folding a range 230 /* Each entry in the following array defines a rule for folding a range
233 ** of codepoints to lower case. The rule applies to a range of nRange 231 ** of codepoints to lower case. The rule applies to a range of nRange
234 ** codepoints starting at codepoint iCode. 232 ** codepoints starting at codepoint iCode.
235 ** 233 **
236 ** If the least significant bit in flags is clear, then the rule applies 234 ** If the least significant bit in flags is clear, then the rule applies
237 ** to all nRange codepoints (i.e. all nRange codepoints are upper case and 235 ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
238 ** need to be folded). Or, if it is set, then the rule only applies to 236 ** need to be folded). Or, if it is set, then the rule only applies to
239 ** every second codepoint in the range, starting with codepoint C. 237 ** every second codepoint in the range, starting with codepoint C.
240 ** 238 **
241 ** The 7 most significant bits in flags are an index into the aiOff[] 239 ** The 7 most significant bits in flags are an index into the aiOff[]
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
315 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721, 313 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721,
316 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, 314 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274,
317 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, 315 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406,
318 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, 316 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462,
319 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, 317 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511,
320 65514, 65521, 65527, 65528, 65529, 318 65514, 65521, 65527, 65528, 65529,
321 }; 319 };
322 320
323 int ret = c; 321 int ret = c;
324 322
325 assert( c>=0 );
326 assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); 323 assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
327 324
328 if( c<128 ){ 325 if( c<128 ){
329 if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); 326 if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
330 }else if( c<65536 ){ 327 }else if( c<65536 ){
328 const struct TableEntry *p;
331 int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; 329 int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
332 int iLo = 0; 330 int iLo = 0;
333 int iRes = -1; 331 int iRes = -1;
334 332
333 assert( c>aEntry[0].iCode );
335 while( iHi>=iLo ){ 334 while( iHi>=iLo ){
336 int iTest = (iHi + iLo) / 2; 335 int iTest = (iHi + iLo) / 2;
337 int cmp = (c - aEntry[iTest].iCode); 336 int cmp = (c - aEntry[iTest].iCode);
338 if( cmp>=0 ){ 337 if( cmp>=0 ){
339 iRes = iTest; 338 iRes = iTest;
340 iLo = iTest+1; 339 iLo = iTest+1;
341 }else{ 340 }else{
342 iHi = iTest-1; 341 iHi = iTest-1;
343 } 342 }
344 } 343 }
345 assert( iRes<0 || c>=aEntry[iRes].iCode );
346 344
347 if( iRes>=0 ){ 345 assert( iRes>=0 && c>=aEntry[iRes].iCode );
348 const struct TableEntry *p = &aEntry[iRes]; 346 p = &aEntry[iRes];
349 if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ 347 if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
350 ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; 348 ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
351 assert( ret>0 ); 349 assert( ret>0 );
352 }
353 } 350 }
354 351
355 if( bRemoveDiacritic ) ret = remove_diacritic(ret); 352 if( bRemoveDiacritic ) ret = fts5_remove_diacritic(ret);
356 } 353 }
357 354
358 else if( c>=66560 && c<66600 ){ 355 else if( c>=66560 && c<66600 ){
359 ret = c + 40; 356 ret = c + 40;
360 } 357 }
361 358
362 return ret; 359 return ret;
363 } 360 }
364 #endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */
365 #endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698