third_party/hunspell_new/src/hunspell/phonet.cxx - Issue 1135173004: Rename third_party/hunspell_new back to third_party/hunspell.

Side by Side Diff: third_party/hunspell_new/src/hunspell/phonet.cxx

Issue 1135173004: Rename third_party/hunspell_new back to third_party/hunspell. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 5 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 /* phonetic.c - generic replacement aglogithms for phonetic transformation

2 Copyright (C) 2000 Bjoern Jacke

3

4 This library is free software; you can redistribute it and/or

5 modify it under the terms of the GNU Lesser General Public

6 License version 2.1 as published by the Free Software Foundation;

7

8 This library is distributed in the hope that it will be useful,

9 but WITHOUT ANY WARRANTY; without even the implied warranty of

10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

11 Lesser General Public License for more details.

12

13 You should have received a copy of the GNU Lesser General Public

14 License along with this library; If not, see

15 <http://www.gnu.org/licenses/>.

16

17 Changelog:

18

19 2000-01-05 Bjoern Jacke <bjoern at j3e.de>

20 Initial Release insprired by the article about phonetic

21 transformations out of c't 25/1999

22

23 2007-07-26 Bjoern Jacke <bjoern at j3e.de>

24 Released under MPL/GPL/LGPL tri-license for Hunspell

25

26 2007-08-23 Laszlo Nemeth <nemeth at OOo>

27 Porting from Aspell to Hunspell using C-like structs

28 */

29

30 #include <stdlib.h>

31 #include <string.h>

32 #include <stdio.h>

33 #include <ctype.h>

34

35 #include "csutil.hxx"

36 #include "phonet.hxx"

37

38 void init_phonet_hash(phonetable & parms)

39 {

40 int i, k;

41

42 for (i = 0; i < HASHSIZE; i++) {

43 parms.hash[i] = -1;

44 }

45

46 for (i = 0; parms.rules[i][0] != '\0'; i += 2) {

47 / set hash value /

48 k = (unsigned char) parms.rules[i][0];

49

50 if (parms.hash[k] < 0) {

51 parms.hash[k] = i;

52 }

53 }

54 }

55

56 // like strcpy but safe if the strings overlap

57 // but only if dest < src

58 static inline void strmove(char * dest, char * src) {

59 while (*src)

60 dest++ = src++;

61 *dest = '\0';

62 }

63

64 static int myisalpha(char ch) {

65 if ((unsigned char) ch < 128) return isalpha(ch);

66 return 1;

67 }

68

69 /* phonetic transcription algorithm */

70 /* see: http://aspell.net/man-html/Phonetic-Code.html */

71 /* convert string to uppercase before this call */

72 int phonet (const char * inword, char * target,

73 int len,

74 phonetable & parms)

75 {

76 / Do phonetic transformation. /

77 / "len" = length of "inword" incl. '\0'. /

78

79 / result: >= 0: length of "target" /

80 / otherwise: error /

81

82 int i,j,k=0,n,p,z;

83 int k0,n0,p0=-333,z0;

84 char c, c0;

85 const char * s;

86 typedef unsigned char uchar;

87 char word[MAXPHONETUTF8LEN + 1];

88 if (len == -1) len = strlen(inword);

89 if (len > MAXPHONETUTF8LEN) return 0;

90 strcpy(word, inword);

91

92 / check word /

93 i = j = z = 0;

94 while ((c = word[i]) != '\0') {

95 n = parms.hash[(uchar) c];

96 z0 = 0;

97

98 if (n >= 0) {

99 / check all rules for the same letter /

100 while (parms.rules[n][0] == c) {

101

102 / check whole string /

103 k = 1; / number of found letters /

104 p = 5; / default priority /

105 s = parms.rules[n];

106 s++; /** important for (see below) "(s-1)" */

107

108 while (s != '\0' && word[i+k] == s

109 && !isdigit ((unsigned char) s) && strchr ("(-<^$", s) == NULL) {

110 k++;

111 s++;

112 }

113 if (*s == '(') {

114 / check letters in "(..)" /

115 if (myisalpha(word[i+k]) // ...could be implied?

116 && strchr(s+1, word[i+k]) != NULL) {

117 k++;

118 while (*s != ')')

119 s++;

120 s++;

121 }

122 }

123 p0 = (int) *s;

124 k0 = k;

125 while (*s == '-' && k > 1) {

126 k--;

127 s++;

128 }

129 if (*s == '<')

130 s++;

131 if (isdigit ((unsigned char) *s)) {

132 / determine priority /

133 p = *s - '0';

134 s++;

135 }

136 if (s == '^' && (s+1) == '^')

137 s++;

138

139 if (*s == '\0'

140 \|\| (*s == '^'

141 && (i == 0 \|\| ! myisalpha(word[i-1]))

142 && (*(s+1) != '$'

143 \|\| (! myisalpha(word[i+k0]) )))

144 \|\| (*s == '$' && i > 0

145 && myisalpha(word[i-1])

146 && (! myisalpha(word[i+k0]) )))

147 {

148 / search for followup rules, if: /

149 / parms.followup and k > 1 and NO '-' in searchstring /

150 c0 = word[i+k-1];

151 n0 = parms.hash[(uchar) c0];

152

153 // if (parms.followup && k > 1 && n0 >= 0

154 if (k > 1 && n0 >= 0

155 && p0 != (int) '-' && word[i+k] != '\0') {

156 / test follow-up rule for "word[i+k]" /

157 while (parms.rules[n0][0] == c0) {

158

159 / check whole string /

160 k0 = k;

161 p0 = 5;

162 s = parms.rules[n0];

163 s++;

164 while (s != '\0' && word[i+k0] == s

165 && ! isdigit((unsigned char) s) && strchr("(-<^$",s) == NULL) {

166 k0++;

167 s++;

168 }

169 if (*s == '(') {

170 / check letters /

171 if (myisalpha(word[i+k0])

172 && strchr (s+1, word[i+k0]) != NULL) {

173 k0++;

174 while (s != ')' && s != '\0')

175 s++;

176 if (*s == ')')

177 s++;

178 }

179 }

180 while (*s == '-') {

181 / "k0" gets NOT reduced /

182 / because "if (k0 == k)" /

183 s++;

184 }

185 if (*s == '<')

186 s++;

187 if (isdigit ((unsigned char) *s)) {

188 p0 = *s - '0';

189 s++;

190 }

191

192 if (*s == '\0'

193 /** s == '^' cuts */

194 \|\| (*s == '$' && ! myisalpha(word[i+k0])))

195 {

196 if (k0 == k) {

197 / this is just a piece of the string /

198 n0 += 2;

199 continue;

200 }

201

202 if (p0 < p) {

203 / priority too low /

204 n0 += 2;

205 continue;

206 }

207 / rule fits; stop search /

208 break;

209 }

210 n0 += 2;

211 } / End of "while (parms.rules[n0][0] == c0)" /

212

213 if (p0 >= p && parms.rules[n0][0] == c0) {

214 n += 2;

215 continue;

216 }

217 } / end of follow-up stuff /

218

219 / replace string /

220 s = parms.rules[n+1];

221 p0 = (parms.rules[n][0] != '\0'

222 && strchr (parms.rules[n]+1,'<') != NULL) ? 1:0;

223 if (p0 == 1 && z == 0) {

224 / rule with '<' is used /

225 if (j > 0 && *s != '\0'

226 && (target[j-1] == c \|\| target[j-1] == *s)) {

227 j--;

228 }

229 z0 = 1;

230 z = 1;

231 k0 = 0;

232 while (*s != '\0' && word[i+k0] != '\0') {

233 word[i+k0] = *s;

234 k0++;

235 s++;

236 }

237 if (k > k0)

238 strmove (&word[0]+i+k0, &word[0]+i+k);

239

240 / new "actual letter" /

241 c = word[i];

242 }

243 else { / no '<' rule used /

244 i += k - 1;

245 z = 0;

246 while (*s != '\0'

247 && *(s+1) != '\0' && j < len) {

248 if (j == 0 \|\| target[j-1] != *s) {

249 target[j] = *s;

250 j++;

251 }

252 s++;

253 }

254 / new "actual letter" /

255 c = *s;

256 if (parms.rules[n][0] != '\0'

257 && strstr (parms.rules[n]+1, "^^") != NULL) {

258 if (c != '\0') {

259 target[j] = c;

260 j++;

261 }

262 strmove (&word[0], &word[0]+i+1);

263 i = 0;

264 z0 = 1;

265 }

266 }

267 break;

268 } / end of follow-up stuff /

269 n += 2;

270 } / end of while (parms.rules[n][0] == c) /

271 } / end of if (n >= 0) /

272 if (z0 == 0) {

273 // if (k && (assert(p0!=-333),!p0) && j < len && c != '\0'

274 // && (!parms.collapse_result \|\| j == 0 \|\| target[j-1] != c)){

275 if (k && !p0 && j < len && c != '\0'

276 && (1 \|\| j == 0 \|\| target[j-1] != c)){

277 / condense only double letters /

278 target[j] = c;

279 ///printf("\n setting \n");

280 j++;

281 }

282

283 i++;

284 z = 0;

285 k=0;

286 }

287 } / end of while ((c = word[i]) != '\0') /

288

289 target[j] = '\0';

290 return (j);

291

292 } / end of function "phonet" /

OLD	NEW

« no previous file with comments | « third_party/hunspell_new/src/hunspell/phonet.hxx ('k') | third_party/hunspell_new/src/hunspell/replist.hxx » ('j') | no next file with comments »