Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 ** 2007 May 6 | 2 ** 2007 May 6 |
| 3 ** | 3 ** |
| 4 ** The author disclaims copyright to this source code. In place of | 4 ** The author disclaims copyright to this source code. In place of |
| 5 ** a legal notice, here is a blessing: | 5 ** a legal notice, here is a blessing: |
| 6 ** | 6 ** |
| 7 ** May you do good and not evil. | 7 ** May you do good and not evil. |
| 8 ** May you find forgiveness for yourself and forgive others. | 8 ** May you find forgiveness for yourself and forgive others. |
| 9 ** May you share freely, never taking more than you give. | 9 ** May you share freely, never taking more than you give. |
| 10 ** | 10 ** |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 54 #endif | 54 #endif |
| 55 | 55 |
| 56 /* | 56 /* |
| 57 ** Version of sqlite3_free() that is always a function, never a macro. | 57 ** Version of sqlite3_free() that is always a function, never a macro. |
| 58 */ | 58 */ |
| 59 static void xFree(void *p){ | 59 static void xFree(void *p){ |
| 60 sqlite3_free(p); | 60 sqlite3_free(p); |
| 61 } | 61 } |
| 62 | 62 |
| 63 /* | 63 /* |
| 64 ** This lookup table is used to help decode the first byte of | |
| 65 ** a multi-byte UTF8 character. It is copied here from SQLite source | |
| 66 ** code file utf8.c. | |
| 67 */ | |
| 68 static const unsigned char icuUtf8Trans1[] = { | |
| 69 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | |
| 70 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, | |
| 71 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, | |
| 72 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, | |
| 73 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | |
| 74 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, | |
| 75 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | |
| 76 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, | |
|
jungshik at Google
2016/03/11 21:37:08
The last line for 0xf8 ~ 0xff should be dropped. I
| |
| 77 }; | |
| 78 | |
| 79 #define SQLITE_ICU_READ_UTF8(zIn, c) \ | |
| 80 c = *(zIn++); \ | |
| 81 if( c>=0xc0 ){ \ | |
|
jungshik at Google
2016/03/11 21:37:08
For the same reason, |c| has to be bound by 0xf8.
| |
| 82 c = icuUtf8Trans1[c-0xc0]; \ | |
| 83 while( (*zIn & 0xc0)==0x80 ){ \ | |
|
jungshik at Google
2016/03/11 21:37:08
Moreover, a check has to be in place for cases whe
| |
| 84 c = (c<<6) + (0x3f & *(zIn++)); \ | |
| 85 } \ | |
| 86 } | |
| 87 | |
| 88 #define SQLITE_ICU_SKIP_UTF8(zIn) \ | |
| 89 assert( *zIn ); \ | |
| 90 if( *(zIn++)>=0xc0 ){ \ | |
|
jungshik at Google
2016/03/11 21:37:08
same comment as for the previous |if c>=0xc0|.
| |
| 91 while( (*zIn & 0xc0)==0x80 ){zIn++;} \ | |
| 92 } | |
| 93 | |
| 94 | |
| 95 /* | |
| 64 ** Compare two UTF-8 strings for equality where the first string is | 96 ** Compare two UTF-8 strings for equality where the first string is |
| 65 ** a "LIKE" expression. Return true (1) if they are the same and | 97 ** a "LIKE" expression. Return true (1) if they are the same and |
| 66 ** false (0) if they are different. | 98 ** false (0) if they are different. |
| 67 */ | 99 */ |
| 68 static int icuLikeCompare( | 100 static int icuLikeCompare( |
| 69 const uint8_t *zPattern, /* LIKE pattern */ | 101 const uint8_t *zPattern, /* LIKE pattern */ |
| 70 const uint8_t *zString, /* The UTF-8 string to compare against */ | 102 const uint8_t *zString, /* The UTF-8 string to compare against */ |
| 71 const UChar32 uEsc /* The escape character */ | 103 const UChar32 uEsc /* The escape character */ |
| 72 ){ | 104 ){ |
| 73 static const int MATCH_ONE = (UChar32)'_'; | 105 static const int MATCH_ONE = (UChar32)'_'; |
| 74 static const int MATCH_ALL = (UChar32)'%'; | 106 static const int MATCH_ALL = (UChar32)'%'; |
| 75 | 107 |
| 76 int iPattern = 0; /* Current byte index in zPattern */ | |
| 77 int iString = 0; /* Current byte index in zString */ | |
| 78 | |
| 79 int prevEscape = 0; /* True if the previous character was uEsc */ | 108 int prevEscape = 0; /* True if the previous character was uEsc */ |
| 80 | 109 |
| 81 while( zPattern[iPattern]!=0 ){ | 110 while( 1 ){ |
| 82 | 111 |
| 83 /* Read (and consume) the next character from the input pattern. */ | 112 /* Read (and consume) the next character from the input pattern. */ |
| 84 UChar32 uPattern; | 113 UChar32 uPattern; |
| 85 U8_NEXT_OR_FFFD(zPattern, iPattern, -1, uPattern); | 114 SQLITE_ICU_READ_UTF8(zPattern, uPattern); |
| 115 if( uPattern==0 ) break; | |
| 86 | 116 |
| 87 /* There are now 4 possibilities: | 117 /* There are now 4 possibilities: |
| 88 ** | 118 ** |
| 89 ** 1. uPattern is an unescaped match-all character "%", | 119 ** 1. uPattern is an unescaped match-all character "%", |
| 90 ** 2. uPattern is an unescaped match-one character "_", | 120 ** 2. uPattern is an unescaped match-one character "_", |
| 91 ** 3. uPattern is an unescaped escape character, or | 121 ** 3. uPattern is an unescaped escape character, or |
| 92 ** 4. uPattern is to be handled as an ordinary character | 122 ** 4. uPattern is to be handled as an ordinary character |
| 93 */ | 123 */ |
| 94 if( !prevEscape && uPattern==MATCH_ALL ){ | 124 if( !prevEscape && uPattern==MATCH_ALL ){ |
| 95 /* Case 1. */ | 125 /* Case 1. */ |
| 96 uint8_t c; | 126 uint8_t c; |
| 97 | 127 |
| 98 /* Skip any MATCH_ALL or MATCH_ONE characters that follow a | 128 /* Skip any MATCH_ALL or MATCH_ONE characters that follow a |
| 99 ** MATCH_ALL. For each MATCH_ONE, skip one character in the | 129 ** MATCH_ALL. For each MATCH_ONE, skip one character in the |
| 100 ** test string. | 130 ** test string. |
| 101 */ | 131 */ |
| 102 while( (c=zPattern[iPattern]) == MATCH_ALL || c == MATCH_ONE ){ | 132 while( (c=*zPattern) == MATCH_ALL || c == MATCH_ONE ){ |
| 103 if( c==MATCH_ONE ){ | 133 if( c==MATCH_ONE ){ |
| 104 if( zString[iString]==0 ) return 0; | 134 if( *zString==0 ) return 0; |
| 105 U8_FWD_1(zString, iString, -1); | 135 SQLITE_ICU_SKIP_UTF8(zString); |
| 106 } | 136 } |
| 107 iPattern++; | 137 zPattern++; |
| 108 } | 138 } |
| 109 | 139 |
| 110 if( zPattern[iPattern]==0 ) return 1; | 140 if( *zPattern==0 ) return 1; |
| 111 | 141 |
| 112 while( zString[iString] ){ | 142 while( *zString ){ |
| 113 if( icuLikeCompare(&zPattern[iPattern], &zString[iString], uEsc) ){ | 143 if( icuLikeCompare(zPattern, zString, uEsc) ){ |
| 114 return 1; | 144 return 1; |
| 115 } | 145 } |
| 116 U8_FWD_1(zString, iString, -1); | 146 SQLITE_ICU_SKIP_UTF8(zString); |
| 117 } | 147 } |
| 118 return 0; | 148 return 0; |
| 119 | 149 |
| 120 }else if( !prevEscape && uPattern==MATCH_ONE ){ | 150 }else if( !prevEscape && uPattern==MATCH_ONE ){ |
| 121 /* Case 2. */ | 151 /* Case 2. */ |
| 122 if( zString[iString]==0 ) return 0; | 152 if( *zString==0 ) return 0; |
| 123 U8_FWD_1(zString, iString, -1); | 153 SQLITE_ICU_SKIP_UTF8(zString); |
| 124 | 154 |
| 125 }else if( !prevEscape && uPattern==uEsc){ | 155 }else if( !prevEscape && uPattern==uEsc){ |
| 126 /* Case 3. */ | 156 /* Case 3. */ |
| 127 prevEscape = 1; | 157 prevEscape = 1; |
| 128 | 158 |
| 129 }else{ | 159 }else{ |
| 130 /* Case 4. */ | 160 /* Case 4. */ |
| 131 UChar32 uString; | 161 UChar32 uString; |
| 132 U8_NEXT_OR_FFFD(zString, iString, -1, uString); | 162 SQLITE_ICU_READ_UTF8(zString, uString); |
| 133 uString = u_foldCase(uString, U_FOLD_CASE_DEFAULT); | 163 uString = u_foldCase(uString, U_FOLD_CASE_DEFAULT); |
| 134 uPattern = u_foldCase(uPattern, U_FOLD_CASE_DEFAULT); | 164 uPattern = u_foldCase(uPattern, U_FOLD_CASE_DEFAULT); |
| 135 if( uString!=uPattern ){ | 165 if( uString!=uPattern ){ |
| 136 return 0; | 166 return 0; |
| 137 } | 167 } |
| 138 prevEscape = 0; | 168 prevEscape = 0; |
| 139 } | 169 } |
| 140 } | 170 } |
| 141 | 171 |
| 142 return zString[iString]==0; | 172 return *zString==0; |
| 143 } | 173 } |
| 144 | 174 |
| 145 /* | 175 /* |
| 146 ** Implementation of the like() SQL function. This function implements | 176 ** Implementation of the like() SQL function. This function implements |
| 147 ** the build-in LIKE operator. The first argument to the function is the | 177 ** the build-in LIKE operator. The first argument to the function is the |
| 148 ** pattern and the second argument is the string. So, the SQL statements: | 178 ** pattern and the second argument is the string. So, the SQL statements: |
| 149 ** | 179 ** |
| 150 ** A LIKE B | 180 ** A LIKE B |
| 151 ** | 181 ** |
| 152 ** is implemented as like(B, A). If there is an escape character E, | 182 ** is implemented as like(B, A). If there is an escape character E, |
| (...skipping 361 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 514 sqlite3 *db, | 544 sqlite3 *db, |
| 515 char **pzErrMsg, | 545 char **pzErrMsg, |
| 516 const sqlite3_api_routines *pApi | 546 const sqlite3_api_routines *pApi |
| 517 ){ | 547 ){ |
| 518 SQLITE_EXTENSION_INIT2(pApi) | 548 SQLITE_EXTENSION_INIT2(pApi) |
| 519 return sqlite3IcuInit(db); | 549 return sqlite3IcuInit(db); |
| 520 } | 550 } |
| 521 #endif | 551 #endif |
| 522 | 552 |
| 523 #endif | 553 #endif |
| OLD | NEW |