OLD | NEW |
1 /* | 1 /* |
2 ** 2006 September 30 | 2 ** 2006 September 30 |
3 ** | 3 ** |
4 ** The author disclaims copyright to this source code. In place of | 4 ** The author disclaims copyright to this source code. In place of |
5 ** a legal notice, here is a blessing: | 5 ** a legal notice, here is a blessing: |
6 ** | 6 ** |
7 ** May you do good and not evil. | 7 ** May you do good and not evil. |
8 ** May you find forgiveness for yourself and forgive others. | 8 ** May you find forgiveness for yourself and forgive others. |
9 ** May you share freely, never taking more than you give. | 9 ** May you share freely, never taking more than you give. |
10 ** | 10 ** |
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
121 */ | 121 */ |
122 static int porterClose(sqlite3_tokenizer_cursor *pCursor){ | 122 static int porterClose(sqlite3_tokenizer_cursor *pCursor){ |
123 porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; | 123 porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; |
124 sqlite3_free(c->zToken); | 124 sqlite3_free(c->zToken); |
125 sqlite3_free(c); | 125 sqlite3_free(c); |
126 return SQLITE_OK; | 126 return SQLITE_OK; |
127 } | 127 } |
128 /* | 128 /* |
129 ** Vowel or consonant | 129 ** Vowel or consonant |
130 */ | 130 */ |
131 static const char vOrCType[] = { | 131 static const char cType[] = { |
132 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, | 132 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, |
133 1, 1, 1, 2, 1 | 133 1, 1, 1, 2, 1 |
134 }; | 134 }; |
135 | 135 |
136 /* | 136 /* |
137 ** isConsonant() and isVowel() determine if their first character in | 137 ** isConsonant() and isVowel() determine if their first character in |
138 ** the string they point to is a consonant or a vowel, according | 138 ** the string they point to is a consonant or a vowel, according |
139 ** to Porter ruls. | 139 ** to Porter ruls. |
140 ** | 140 ** |
141 ** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'. | 141 ** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'. |
142 ** 'Y' is a consonant unless it follows another consonant, | 142 ** 'Y' is a consonant unless it follows another consonant, |
143 ** in which case it is a vowel. | 143 ** in which case it is a vowel. |
144 ** | 144 ** |
145 ** In these routine, the letters are in reverse order. So the 'y' rule | 145 ** In these routine, the letters are in reverse order. So the 'y' rule |
146 ** is that 'y' is a consonant unless it is followed by another | 146 ** is that 'y' is a consonant unless it is followed by another |
147 ** consonent. | 147 ** consonent. |
148 */ | 148 */ |
149 static int isVowel(const char*); | 149 static int isVowel(const char*); |
150 static int isConsonant(const char *z){ | 150 static int isConsonant(const char *z){ |
151 int j; | 151 int j; |
152 char x = *z; | 152 char x = *z; |
153 if( x==0 ) return 0; | 153 if( x==0 ) return 0; |
154 assert( x>='a' && x<='z' ); | 154 assert( x>='a' && x<='z' ); |
155 j = vOrCType[x-'a']; | 155 j = cType[x-'a']; |
156 if( j<2 ) return j; | 156 if( j<2 ) return j; |
157 return z[1]==0 || isVowel(z + 1); | 157 return z[1]==0 || isVowel(z + 1); |
158 } | 158 } |
159 static int isVowel(const char *z){ | 159 static int isVowel(const char *z){ |
160 int j; | 160 int j; |
161 char x = *z; | 161 char x = *z; |
162 if( x==0 ) return 0; | 162 if( x==0 ) return 0; |
163 assert( x>='a' && x<='z' ); | 163 assert( x>='a' && x<='z' ); |
164 j = vOrCType[x-'a']; | 164 j = cType[x-'a']; |
165 if( j<2 ) return 1-j; | 165 if( j<2 ) return 1-j; |
166 return isConsonant(z + 1); | 166 return isConsonant(z + 1); |
167 } | 167 } |
168 | 168 |
169 /* | 169 /* |
170 ** Let any sequence of one or more vowels be represented by V and let | 170 ** Let any sequence of one or more vowels be represented by V and let |
171 ** C be sequence of one or more consonants. Then every word can be | 171 ** C be sequence of one or more consonants. Then every word can be |
172 ** represented as: | 172 ** represented as: |
173 ** | 173 ** |
174 ** [C] (VC){m} [V] | 174 ** [C] (VC){m} [V] |
(...skipping 478 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
653 ** Allocate a new porter tokenizer. Return a pointer to the new | 653 ** Allocate a new porter tokenizer. Return a pointer to the new |
654 ** tokenizer in *ppModule | 654 ** tokenizer in *ppModule |
655 */ | 655 */ |
656 void sqlite3Fts3PorterTokenizerModule( | 656 void sqlite3Fts3PorterTokenizerModule( |
657 sqlite3_tokenizer_module const**ppModule | 657 sqlite3_tokenizer_module const**ppModule |
658 ){ | 658 ){ |
659 *ppModule = &porterTokenizerModule; | 659 *ppModule = &porterTokenizerModule; |
660 } | 660 } |
661 | 661 |
662 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ | 662 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
OLD | NEW |