OLD | NEW |
1 /* | 1 /* |
2 ****************************************************************************** | 2 ****************************************************************************** |
3 * | 3 * |
4 * Copyright (C) 2000-2013, International Business Machines | 4 * Copyright (C) 2000-2014, International Business Machines |
5 * Corporation and others. All Rights Reserved. | 5 * Corporation and others. All Rights Reserved. |
6 * | 6 * |
7 ****************************************************************************** | 7 ****************************************************************************** |
8 * file name: ushape.cpp | 8 * file name: ushape.cpp |
9 * encoding: US-ASCII | 9 * encoding: US-ASCII |
10 * tab size: 8 (not used) | 10 * tab size: 8 (not used) |
11 * indentation:4 | 11 * indentation:4 |
12 * | 12 * |
13 * created on: 2000jun29 | 13 * created on: 2000jun29 |
14 * created by: Markus W. Scherer | 14 * created by: Markus W. Scherer |
15 * | 15 * |
16 * Arabic letter shaping implemented by Ayman Roshdy | 16 * Arabic letter shaping implemented by Ayman Roshdy |
17 */ | 17 */ |
18 | 18 |
19 #include "unicode/utypes.h" | 19 #include "unicode/utypes.h" |
20 #include "unicode/uchar.h" | 20 #include "unicode/uchar.h" |
21 #include "unicode/ustring.h" | 21 #include "unicode/ustring.h" |
22 #include "unicode/ushape.h" | 22 #include "unicode/ushape.h" |
23 #include "cmemory.h" | 23 #include "cmemory.h" |
24 #include "putilimp.h" | 24 #include "putilimp.h" |
25 #include "ustr_imp.h" | 25 #include "ustr_imp.h" |
26 #include "ubidi_props.h" | 26 #include "ubidi_props.h" |
27 #include "uassert.h" | 27 #include "uassert.h" |
28 | 28 |
29 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | |
30 | |
31 /* | 29 /* |
32 * This implementation is designed for 16-bit Unicode strings. | 30 * This implementation is designed for 16-bit Unicode strings. |
33 * The main assumption is that the Arabic characters and their | 31 * The main assumption is that the Arabic characters and their |
34 * presentation forms each fit into a single UChar. | 32 * presentation forms each fit into a single UChar. |
35 * With UTF-8, they occupy 2 or 3 bytes, and more than the ASCII | 33 * With UTF-8, they occupy 2 or 3 bytes, and more than the ASCII |
36 * characters. | 34 * characters. |
37 */ | 35 */ |
38 | 36 |
39 /* | 37 /* |
40 * ### TODO in general for letter shaping: | 38 * ### TODO in general for letter shaping: |
(...skipping 164 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
205 0, 0, 0, 0, 0, /*0x0660-0x0664*/ | 203 0, 0, 0, 0, 0, /*0x0660-0x0664*/ |
206 0, 0, 0, 0, 0, /*0x0665-0x0669*/ | 204 0, 0, 0, 0, 0, /*0x0665-0x0669*/ |
207 0, 0, 0, 0, 0, 0, /*0x066A-0x066F*/ | 205 0, 0, 0, 0, 0, 0, /*0x066A-0x066F*/ |
208 4 + 256 * 6, /*0x0670*/ | 206 4 + 256 * 6, /*0x0670*/ |
209 1 + 8 + 256 * 0x00,/*0x0671*/ | 207 1 + 8 + 256 * 0x00,/*0x0671*/ |
210 1 + 32, /*0x0672*/ | 208 1 + 32, /*0x0672*/ |
211 1 + 32, /*0x0673*/ | 209 1 + 32, /*0x0673*/ |
212 0, /*0x0674*/ | 210 0, /*0x0674*/ |
213 1 + 32, /*0x0675*/ | 211 1 + 32, /*0x0675*/ |
214 1, 1, /*0x0676-0x0677*/ | 212 1, 1, /*0x0676-0x0677*/ |
215 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x0678-0x067D*/ | 213 1 + 2, /*0x0678*/ |
| 214 1 + 2 + 8 + 256 * 0x16,/*0x0679*/ |
| 215 1 + 2 + 8 + 256 * 0x0E,/*0x067A*/ |
| 216 1 + 2 + 8 + 256 * 0x02,/*0x067B*/ |
| 217 1+2, 1+2, /*0x67C-0x067D*/ |
216 1+2+8+256 * 0x06, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x067E-0x0683*/ | 218 1+2+8+256 * 0x06, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x067E-0x0683*/ |
217 1+2, 1+2, 1+2+8+256 * 0x2A, 1+2, /*0x0684-0x0687*/ | 219 1+2, 1+2, 1+2+8+256 * 0x2A, 1+2, /*0x0684-0x0687*/ |
218 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*0x0688-0x0691*/ | 220 1 + 8 + 256 * 0x38,/*0x0688*/ |
| 221 1, 1, 1, /*0x0689-0x068B*/ |
| 222 1 + 8 + 256 * 0x34,/*0x068C*/ |
| 223 1 + 8 + 256 * 0x32,/*0x068D*/ |
| 224 1 + 8 + 256 * 0x36,/*0x068E*/ |
| 225 1, 1, /*0x068F-0x0690*/ |
| 226 1 + 8 + 256 * 0x3C,/*0x0691*/ |
219 1, 1, 1, 1, 1, 1, 1+8+256 * 0x3A, 1, /*0x0692-0x0699*/ | 227 1, 1, 1, 1, 1, 1, 1+8+256 * 0x3A, 1, /*0x0692-0x0699*/ |
220 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/ | 228 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/ |
221 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/ | 229 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/ |
222 1+2, 1+2, 1+2, 1+2, 1+2, 1+2+8+256 * 0x3E, /*0x06A4-0x06AD*/ | 230 1+2, 1+2, 1+2, 1+2, 1+2, 1+2+8+256 * 0x3E, /*0x06A4-0x06AD*/ |
223 1+2, 1+2, 1+2, 1+2, /*0x06A4-0x06AD*/ | 231 1+2, 1+2, 1+2, 1+2, /*0x06A4-0x06AD*/ |
224 1+2, 1+2+8+256 * 0x42, 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/ | 232 1+2, 1+2+8+256 * 0x42, 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/ |
225 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/ | 233 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/ |
226 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x06B8-0x06BF*/ | 234 1+2, 1+2, /*0x06B8-0x06B9*/ |
227 1+2, 1+2, /*0x06B8-0x06BF*/ | 235 1 + 8 + 256 * 0x4E,/*0x06BA*/ |
228 1, /*0x06C0*/ | 236 1 + 2 + 8 + 256 * 0x50,/*0x06BB*/ |
229 1+2, /*0x06C1*/ | 237 1+2, 1+2, /*0x06BC-0x06BD*/ |
230 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*0x06C2-0x06CB*/ | 238 1 + 2 + 8 + 256 * 0x5A,/*0x06BE*/ |
231 1+2+8+256 * 0xAC, /*0x06CC*/ | 239 1+2, /*0x06BF*/ |
| 240 1 + 8 + 256 * 0x54,/*0x06C0*/ |
| 241 1 + 2 + 8 + 256 * 0x56,/*0x06C1*/ |
| 242 1, 1, 1, /*0x06C2-0x06C4*/ |
| 243 1 + 8 + 256 * 0x90,/*0x06C5*/ |
| 244 1 + 8 + 256 * 0x89,/*0x06C6*/ |
| 245 1 + 8 + 256 * 0x87,/*0x06C7*/ |
| 246 1 + 8 + 256 * 0x8B,/*0x06C8*/ |
| 247 1 + 8 + 256 * 0x92,/*0x06C9*/ |
| 248 1, /*0x06CA*/ |
| 249 1 + 8 + 256 * 0x8E,/*0x06CB*/ |
| 250 1 + 2 + 8 + 256 * 0xAC,/*0x06CC*/ |
232 1, /*0x06CD*/ | 251 1, /*0x06CD*/ |
233 1+2, 1+2, 1+2, 1+2, /*0x06CE-0x06D1*/ | 252 1+2, 1+2, /*0x06CE-0x06CF*/ |
234 1, 1 /*0x06D2-0x06D3*/ | 253 1 + 2 + 8 + 256 * 0x94,/*0x06D0*/ |
| 254 1+2, /*0x06D1*/ |
| 255 1 + 8 + 256 * 0x5E,/*0x06D2*/ |
| 256 1 + 8 + 256 * 0x60 /*0x06D3*/ |
235 }; | 257 }; |
236 | 258 |
237 static const uint8_t presALink[] = { | 259 static const uint8_t presALink[] = { |
238 /***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*
****C*****D*****E*****F*/ | 260 /***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*
****C*****D*****E*****F*/ |
239 /*FB5*/ 0, 1, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0,
0, 0, 0, 0, | 261 /*FB5*/ 0, 1, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0,
0, 0, 0, 0, |
240 /*FB6*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, | 262 /*FB6*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, |
241 /*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
2,1 + 2, 0, 0, | 263 /*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
2,1 + 2, 0, 0, |
242 /*FB8*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 1, | 264 /*FB8*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 1, |
243 /*FB9*/ 2,1 + 2, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, | 265 /*FB9*/ 2,1 + 2, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, |
244 /*FBA*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, | 266 /*FBA*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, |
(...skipping 21 matching lines...) Expand all Loading... |
266 /*FEB*/ 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1
+ 2, 0, 1, 2, | 288 /*FEB*/ 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1
+ 2, 0, 1, 2, |
267 /*FEC*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1
+ 2, 0, 1, 2, | 289 /*FEC*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1
+ 2, 0, 1, 2, |
268 /*FED*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1
+ 2, 0, 1, 2, | 290 /*FED*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1
+ 2, 0, 1, 2, |
269 /*FEE*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1
+ 2, 0, 1, 0, | 291 /*FEE*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1
+ 2, 0, 1, 0, |
270 /*FEF*/ 1, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0,
1, 0, 0, 0 | 292 /*FEF*/ 1, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0,
1, 0, 0, 0 |
271 }; | 293 }; |
272 | 294 |
273 static const UChar convertFBto06[] = | 295 static const UChar convertFBto06[] = |
274 { | 296 { |
275 /***********0******1******2******3******4******5******6******7******8******9****
**A******B******C******D******E******F***/ | 297 /***********0******1******2******3******4******5******6******7******8******9****
**A******B******C******D******E******F***/ |
276 /*FB5*/ 0x671, 0x671, 0, 0, 0, 0, 0x67E, 0x67E, 0x67E, 0x67E,
0, 0, 0, 0, 0, 0, | 298 /*FB5*/ 0x671, 0x671, 0x67B, 0x67B, 0x67B, 0x67B, 0x67E, 0x67E, 0x67E, 0x67E,
0, 0, 0, 0, 0x67A, 0x67A, |
277 /*FB6*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, | 299 /*FB6*/ 0x67A, 0x67A, 0, 0, 0, 0, 0x679, 0x679, 0x679, 0x679,
0, 0, 0, 0, 0, 0, |
278 /*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0x686, 0x686, 0x686, 0x686, 0, 0, | 300 /*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0x686, 0x686, 0x686, 0x686, 0, 0, |
279 /*FB8*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0x698, 0x698, 0, 0, 0x6A9, 0x6A9, | 301 /*FB8*/ 0, 0, 0x68D, 0x68D, 0x68C, 0x68C, 0x68E, 0x68E, 0x688, 0x688,
0x698, 0x698, 0x691, 0x691, 0x6A9, 0x6A9, |
280 /*FB9*/ 0x6A9, 0x6A9, 0x6AF, 0x6AF, 0x6AF, 0x6AF, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, | 302 /*FB9*/ 0x6A9, 0x6A9, 0x6AF, 0x6AF, 0x6AF, 0x6AF, 0, 0, 0, 0,
0, 0, 0, 0, 0x6BA, 0x6BA, |
281 /*FBA*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, | 303 /*FBA*/ 0x6BB, 0x6BB, 0x6BB, 0x6BB, 0x6C0, 0x6C0, 0x6C1, 0x6C1, 0x6C1, 0x6C1,
0x6BE, 0x6BE, 0x6BE, 0x6BE, 0x6d2, 0x6D2, |
282 /*FBB*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, | 304 /*FBB*/ 0x6D3, 0x6D3, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, |
283 /*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, | 305 /*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, |
284 /*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, | 306 /*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0x6C7, 0x6C7, 0x6C6,
0x6C6, 0x6C8, 0x6C8, 0, 0x6CB, 0x6CB, |
285 /*FBE*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, | 307 /*FBE*/ 0x6C5, 0x6C5, 0x6C9, 0x6C9, 0x6D0, 0x6D0, 0x6D0, 0x6D0, 0, 0,
0, 0, 0, 0, 0, 0, |
286 /*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0x6CC, 0x6CC, 0x6CC, 0x6CC | 308 /*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0x6CC, 0x6CC, 0x6CC, 0x6CC |
287 }; | 309 }; |
288 | 310 |
289 static const UChar convertFEto06[] = | 311 static const UChar convertFEto06[] = |
290 { | 312 { |
291 /***********0******1******2******3******4******5******6******7******8******9****
**A******B******C******D******E******F***/ | 313 /***********0******1******2******3******4******5******6******7******8******9****
**A******B******C******D******E******F***/ |
292 /*FE7*/ 0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F,
0x650, 0x650, 0x651, 0x651, 0x652, 0x652, | 314 /*FE7*/ 0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F,
0x650, 0x650, 0x651, 0x651, 0x652, 0x652, |
293 /*FE8*/ 0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626,
0x626, 0x626, 0x626, 0x627, 0x627, 0x628, | 315 /*FE8*/ 0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626,
0x626, 0x626, 0x626, 0x627, 0x627, 0x628, |
294 /*FE9*/ 0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B,
0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C, | 316 /*FE9*/ 0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B,
0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C, |
295 /*FEA*/ 0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F,
0x62F, 0x630, 0x630, 0x631, 0x631, 0x632, | 317 /*FEA*/ 0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F,
0x62F, 0x630, 0x630, 0x631, 0x631, 0x632, |
(...skipping 1233 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1529 | 1551 |
1530 /* | 1552 /* |
1531 * need a temporary buffer of size max(outputSize, sourceLength) | 1553 * need a temporary buffer of size max(outputSize, sourceLength) |
1532 * because at first we copy source->temp | 1554 * because at first we copy source->temp |
1533 */ | 1555 */ |
1534 if(sourceLength>outputSize) { | 1556 if(sourceLength>outputSize) { |
1535 outputSize=sourceLength; | 1557 outputSize=sourceLength; |
1536 } | 1558 } |
1537 | 1559 |
1538 /* Start of Arabic letter shaping part */ | 1560 /* Start of Arabic letter shaping part */ |
1539 if(outputSize<=LENGTHOF(buffer)) { | 1561 if(outputSize<=UPRV_LENGTHOF(buffer)) { |
1540 outputSize=LENGTHOF(buffer); | 1562 outputSize=UPRV_LENGTHOF(buffer); |
1541 tempbuffer=buffer; | 1563 tempbuffer=buffer; |
1542 } else { | 1564 } else { |
1543 tempbuffer = (UChar *)uprv_malloc(outputSize*U_SIZEOF_UCHAR); | 1565 tempbuffer = (UChar *)uprv_malloc(outputSize*U_SIZEOF_UCHAR); |
1544 | 1566 |
1545 /*Test for NULL*/ | 1567 /*Test for NULL*/ |
1546 if(tempbuffer == NULL) { | 1568 if(tempbuffer == NULL) { |
1547 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; | 1569 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; |
1548 if (tempsource != NULL) uprv_free(tempsource); | 1570 if (tempsource != NULL) uprv_free(tempsource); |
1549 return 0; | 1571 return 0; |
1550 } | 1572 } |
(...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1697 TRUE); | 1719 TRUE); |
1698 break; | 1720 break; |
1699 default: | 1721 default: |
1700 /* will never occur because of validity checks above */ | 1722 /* will never occur because of validity checks above */ |
1701 break; | 1723 break; |
1702 } | 1724 } |
1703 } | 1725 } |
1704 | 1726 |
1705 return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); | 1727 return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); |
1706 } | 1728 } |
OLD | NEW |