| OLD | NEW |
| 1 /* | 1 /* |
| 2 ** 2006 September 30 | 2 ** 2006 September 30 |
| 3 ** | 3 ** |
| 4 ** The author disclaims copyright to this source code. In place of | 4 ** The author disclaims copyright to this source code. In place of |
| 5 ** a legal notice, here is a blessing: | 5 ** a legal notice, here is a blessing: |
| 6 ** | 6 ** |
| 7 ** May you do good and not evil. | 7 ** May you do good and not evil. |
| 8 ** May you find forgiveness for yourself and forgive others. | 8 ** May you find forgiveness for yourself and forgive others. |
| 9 ** May you share freely, never taking more than you give. | 9 ** May you share freely, never taking more than you give. |
| 10 ** | 10 ** |
| (...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 121 */ | 121 */ |
| 122 static int porterClose(sqlite3_tokenizer_cursor *pCursor){ | 122 static int porterClose(sqlite3_tokenizer_cursor *pCursor){ |
| 123 porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; | 123 porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; |
| 124 sqlite3_free(c->zToken); | 124 sqlite3_free(c->zToken); |
| 125 sqlite3_free(c); | 125 sqlite3_free(c); |
| 126 return SQLITE_OK; | 126 return SQLITE_OK; |
| 127 } | 127 } |
| 128 /* | 128 /* |
| 129 ** Vowel or consonant | 129 ** Vowel or consonant |
| 130 */ | 130 */ |
| 131 static const char vOrCType[] = { | 131 static const char cType[] = { |
| 132 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, | 132 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, |
| 133 1, 1, 1, 2, 1 | 133 1, 1, 1, 2, 1 |
| 134 }; | 134 }; |
| 135 | 135 |
| 136 /* | 136 /* |
| 137 ** isConsonant() and isVowel() determine if their first character in | 137 ** isConsonant() and isVowel() determine if their first character in |
| 138 ** the string they point to is a consonant or a vowel, according | 138 ** the string they point to is a consonant or a vowel, according |
| 139 ** to Porter ruls. | 139 ** to Porter ruls. |
| 140 ** | 140 ** |
| 141 ** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'. | 141 ** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'. |
| 142 ** 'Y' is a consonant unless it follows another consonant, | 142 ** 'Y' is a consonant unless it follows another consonant, |
| 143 ** in which case it is a vowel. | 143 ** in which case it is a vowel. |
| 144 ** | 144 ** |
| 145 ** In these routine, the letters are in reverse order. So the 'y' rule | 145 ** In these routine, the letters are in reverse order. So the 'y' rule |
| 146 ** is that 'y' is a consonant unless it is followed by another | 146 ** is that 'y' is a consonant unless it is followed by another |
| 147 ** consonent. | 147 ** consonent. |
| 148 */ | 148 */ |
| 149 static int isVowel(const char*); | 149 static int isVowel(const char*); |
| 150 static int isConsonant(const char *z){ | 150 static int isConsonant(const char *z){ |
| 151 int j; | 151 int j; |
| 152 char x = *z; | 152 char x = *z; |
| 153 if( x==0 ) return 0; | 153 if( x==0 ) return 0; |
| 154 assert( x>='a' && x<='z' ); | 154 assert( x>='a' && x<='z' ); |
| 155 j = vOrCType[x-'a']; | 155 j = cType[x-'a']; |
| 156 if( j<2 ) return j; | 156 if( j<2 ) return j; |
| 157 return z[1]==0 || isVowel(z + 1); | 157 return z[1]==0 || isVowel(z + 1); |
| 158 } | 158 } |
| 159 static int isVowel(const char *z){ | 159 static int isVowel(const char *z){ |
| 160 int j; | 160 int j; |
| 161 char x = *z; | 161 char x = *z; |
| 162 if( x==0 ) return 0; | 162 if( x==0 ) return 0; |
| 163 assert( x>='a' && x<='z' ); | 163 assert( x>='a' && x<='z' ); |
| 164 j = vOrCType[x-'a']; | 164 j = cType[x-'a']; |
| 165 if( j<2 ) return 1-j; | 165 if( j<2 ) return 1-j; |
| 166 return isConsonant(z + 1); | 166 return isConsonant(z + 1); |
| 167 } | 167 } |
| 168 | 168 |
| 169 /* | 169 /* |
| 170 ** Let any sequence of one or more vowels be represented by V and let | 170 ** Let any sequence of one or more vowels be represented by V and let |
| 171 ** C be sequence of one or more consonants. Then every word can be | 171 ** C be sequence of one or more consonants. Then every word can be |
| 172 ** represented as: | 172 ** represented as: |
| 173 ** | 173 ** |
| 174 ** [C] (VC){m} [V] | 174 ** [C] (VC){m} [V] |
| (...skipping 478 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 653 ** Allocate a new porter tokenizer. Return a pointer to the new | 653 ** Allocate a new porter tokenizer. Return a pointer to the new |
| 654 ** tokenizer in *ppModule | 654 ** tokenizer in *ppModule |
| 655 */ | 655 */ |
| 656 void sqlite3Fts3PorterTokenizerModule( | 656 void sqlite3Fts3PorterTokenizerModule( |
| 657 sqlite3_tokenizer_module const**ppModule | 657 sqlite3_tokenizer_module const**ppModule |
| 658 ){ | 658 ){ |
| 659 *ppModule = &porterTokenizerModule; | 659 *ppModule = &porterTokenizerModule; |
| 660 } | 660 } |
| 661 | 661 |
| 662 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ | 662 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
| OLD | NEW |