OLD | NEW |
1 /* | 1 /* |
2 ** 2006 September 30 | 2 ** 2006 September 30 |
3 ** | 3 ** |
4 ** The author disclaims copyright to this source code. In place of | 4 ** The author disclaims copyright to this source code. In place of |
5 ** a legal notice, here is a blessing: | 5 ** a legal notice, here is a blessing: |
6 ** | 6 ** |
7 ** May you do good and not evil. | 7 ** May you do good and not evil. |
8 ** May you find forgiveness for yourself and forgive others. | 8 ** May you find forgiveness for yourself and forgive others. |
9 ** May you share freely, never taking more than you give. | 9 ** May you share freely, never taking more than you give. |
10 ** | 10 ** |
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
122 */ | 122 */ |
123 static int porterClose(sqlite3_tokenizer_cursor *pCursor){ | 123 static int porterClose(sqlite3_tokenizer_cursor *pCursor){ |
124 porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; | 124 porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; |
125 sqlite3_free(c->zToken); | 125 sqlite3_free(c->zToken); |
126 sqlite3_free(c); | 126 sqlite3_free(c); |
127 return SQLITE_OK; | 127 return SQLITE_OK; |
128 } | 128 } |
129 /* | 129 /* |
130 ** Vowel or consonant | 130 ** Vowel or consonant |
131 */ | 131 */ |
132 static const char vOrCType[] = { | 132 static const char cType[] = { |
133 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, | 133 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, |
134 1, 1, 1, 2, 1 | 134 1, 1, 1, 2, 1 |
135 }; | 135 }; |
136 | 136 |
137 /* | 137 /* |
138 ** isConsonant() and isVowel() determine if their first character in | 138 ** isConsonant() and isVowel() determine if their first character in |
139 ** the string they point to is a consonant or a vowel, according | 139 ** the string they point to is a consonant or a vowel, according |
140 ** to Porter ruls. | 140 ** to Porter ruls. |
141 ** | 141 ** |
142 ** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'. | 142 ** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'. |
143 ** 'Y' is a consonant unless it follows another consonant, | 143 ** 'Y' is a consonant unless it follows another consonant, |
144 ** in which case it is a vowel. | 144 ** in which case it is a vowel. |
145 ** | 145 ** |
146 ** In these routine, the letters are in reverse order. So the 'y' rule | 146 ** In these routine, the letters are in reverse order. So the 'y' rule |
147 ** is that 'y' is a consonant unless it is followed by another | 147 ** is that 'y' is a consonant unless it is followed by another |
148 ** consonent. | 148 ** consonent. |
149 */ | 149 */ |
150 static int isVowel(const char*); | 150 static int isVowel(const char*); |
151 static int isConsonant(const char *z){ | 151 static int isConsonant(const char *z){ |
152 int j; | 152 int j; |
153 char x = *z; | 153 char x = *z; |
154 if( x==0 ) return 0; | 154 if( x==0 ) return 0; |
155 assert( x>='a' && x<='z' ); | 155 assert( x>='a' && x<='z' ); |
156 j = vOrCType[x-'a']; | 156 j = cType[x-'a']; |
157 if( j<2 ) return j; | 157 if( j<2 ) return j; |
158 return z[1]==0 || isVowel(z + 1); | 158 return z[1]==0 || isVowel(z + 1); |
159 } | 159 } |
160 static int isVowel(const char *z){ | 160 static int isVowel(const char *z){ |
161 int j; | 161 int j; |
162 char x = *z; | 162 char x = *z; |
163 if( x==0 ) return 0; | 163 if( x==0 ) return 0; |
164 assert( x>='a' && x<='z' ); | 164 assert( x>='a' && x<='z' ); |
165 j = vOrCType[x-'a']; | 165 j = cType[x-'a']; |
166 if( j<2 ) return 1-j; | 166 if( j<2 ) return 1-j; |
167 return isConsonant(z + 1); | 167 return isConsonant(z + 1); |
168 } | 168 } |
169 | 169 |
170 /* | 170 /* |
171 ** Let any sequence of one or more vowels be represented by V and let | 171 ** Let any sequence of one or more vowels be represented by V and let |
172 ** C be sequence of one or more consonants. Then every word can be | 172 ** C be sequence of one or more consonants. Then every word can be |
173 ** represented as: | 173 ** represented as: |
174 ** | 174 ** |
175 ** [C] (VC){m} [V] | 175 ** [C] (VC){m} [V] |
(...skipping 461 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
637 ** Allocate a new porter tokenizer. Return a pointer to the new | 637 ** Allocate a new porter tokenizer. Return a pointer to the new |
638 ** tokenizer in *ppModule | 638 ** tokenizer in *ppModule |
639 */ | 639 */ |
640 void sqlite3Fts3PorterTokenizerModule( | 640 void sqlite3Fts3PorterTokenizerModule( |
641 sqlite3_tokenizer_module const**ppModule | 641 sqlite3_tokenizer_module const**ppModule |
642 ){ | 642 ){ |
643 *ppModule = &porterTokenizerModule; | 643 *ppModule = &porterTokenizerModule; |
644 } | 644 } |
645 | 645 |
646 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ | 646 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |
OLD | NEW |