| OLD | NEW |
| 1 /* | 1 /* |
| 2 ****************************************************************************** | 2 ****************************************************************************** |
| 3 * | 3 * |
| 4 * Copyright (C) 2000-2013, International Business Machines | 4 * Copyright (C) 2000-2014, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. | 5 * Corporation and others. All Rights Reserved. |
| 6 * | 6 * |
| 7 ****************************************************************************** | 7 ****************************************************************************** |
| 8 * file name: ushape.cpp | 8 * file name: ushape.cpp |
| 9 * encoding: US-ASCII | 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) | 10 * tab size: 8 (not used) |
| 11 * indentation:4 | 11 * indentation:4 |
| 12 * | 12 * |
| 13 * created on: 2000jun29 | 13 * created on: 2000jun29 |
| 14 * created by: Markus W. Scherer | 14 * created by: Markus W. Scherer |
| 15 * | 15 * |
| 16 * Arabic letter shaping implemented by Ayman Roshdy | 16 * Arabic letter shaping implemented by Ayman Roshdy |
| 17 */ | 17 */ |
| 18 | 18 |
| 19 #include "unicode/utypes.h" | 19 #include "unicode/utypes.h" |
| 20 #include "unicode/uchar.h" | 20 #include "unicode/uchar.h" |
| 21 #include "unicode/ustring.h" | 21 #include "unicode/ustring.h" |
| 22 #include "unicode/ushape.h" | 22 #include "unicode/ushape.h" |
| 23 #include "cmemory.h" | 23 #include "cmemory.h" |
| 24 #include "putilimp.h" | 24 #include "putilimp.h" |
| 25 #include "ustr_imp.h" | 25 #include "ustr_imp.h" |
| 26 #include "ubidi_props.h" | 26 #include "ubidi_props.h" |
| 27 #include "uassert.h" | 27 #include "uassert.h" |
| 28 | 28 |
| 29 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | |
| 30 | |
| 31 /* | 29 /* |
| 32 * This implementation is designed for 16-bit Unicode strings. | 30 * This implementation is designed for 16-bit Unicode strings. |
| 33 * The main assumption is that the Arabic characters and their | 31 * The main assumption is that the Arabic characters and their |
| 34 * presentation forms each fit into a single UChar. | 32 * presentation forms each fit into a single UChar. |
| 35 * With UTF-8, they occupy 2 or 3 bytes, and more than the ASCII | 33 * With UTF-8, they occupy 2 or 3 bytes, and more than the ASCII |
| 36 * characters. | 34 * characters. |
| 37 */ | 35 */ |
| 38 | 36 |
| 39 /* | 37 /* |
| 40 * ### TODO in general for letter shaping: | 38 * ### TODO in general for letter shaping: |
| (...skipping 164 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 205 0, 0, 0, 0, 0, /*0x0660-0x0664*/ | 203 0, 0, 0, 0, 0, /*0x0660-0x0664*/ |
| 206 0, 0, 0, 0, 0, /*0x0665-0x0669*/ | 204 0, 0, 0, 0, 0, /*0x0665-0x0669*/ |
| 207 0, 0, 0, 0, 0, 0, /*0x066A-0x066F*/ | 205 0, 0, 0, 0, 0, 0, /*0x066A-0x066F*/ |
| 208 4 + 256 * 6, /*0x0670*/ | 206 4 + 256 * 6, /*0x0670*/ |
| 209 1 + 8 + 256 * 0x00,/*0x0671*/ | 207 1 + 8 + 256 * 0x00,/*0x0671*/ |
| 210 1 + 32, /*0x0672*/ | 208 1 + 32, /*0x0672*/ |
| 211 1 + 32, /*0x0673*/ | 209 1 + 32, /*0x0673*/ |
| 212 0, /*0x0674*/ | 210 0, /*0x0674*/ |
| 213 1 + 32, /*0x0675*/ | 211 1 + 32, /*0x0675*/ |
| 214 1, 1, /*0x0676-0x0677*/ | 212 1, 1, /*0x0676-0x0677*/ |
| 215 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x0678-0x067D*/ | 213 1 + 2, /*0x0678*/ |
| 214 1 + 2 + 8 + 256 * 0x16,/*0x0679*/ |
| 215 1 + 2 + 8 + 256 * 0x0E,/*0x067A*/ |
| 216 1 + 2 + 8 + 256 * 0x02,/*0x067B*/ |
| 217 1+2, 1+2, /*0x67C-0x067D*/ |
| 216 1+2+8+256 * 0x06, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x067E-0x0683*/ | 218 1+2+8+256 * 0x06, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x067E-0x0683*/ |
| 217 1+2, 1+2, 1+2+8+256 * 0x2A, 1+2, /*0x0684-0x0687*/ | 219 1+2, 1+2, 1+2+8+256 * 0x2A, 1+2, /*0x0684-0x0687*/ |
| 218 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*0x0688-0x0691*/ | 220 1 + 8 + 256 * 0x38,/*0x0688*/ |
| 221 1, 1, 1, /*0x0689-0x068B*/ |
| 222 1 + 8 + 256 * 0x34,/*0x068C*/ |
| 223 1 + 8 + 256 * 0x32,/*0x068D*/ |
| 224 1 + 8 + 256 * 0x36,/*0x068E*/ |
| 225 1, 1, /*0x068F-0x0690*/ |
| 226 1 + 8 + 256 * 0x3C,/*0x0691*/ |
| 219 1, 1, 1, 1, 1, 1, 1+8+256 * 0x3A, 1, /*0x0692-0x0699*/ | 227 1, 1, 1, 1, 1, 1, 1+8+256 * 0x3A, 1, /*0x0692-0x0699*/ |
| 220 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/ | 228 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/ |
| 221 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/ | 229 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/ |
| 222 1+2, 1+2, 1+2, 1+2, 1+2, 1+2+8+256 * 0x3E, /*0x06A4-0x06AD*/ | 230 1+2, 1+2, 1+2, 1+2, 1+2, 1+2+8+256 * 0x3E, /*0x06A4-0x06AD*/ |
| 223 1+2, 1+2, 1+2, 1+2, /*0x06A4-0x06AD*/ | 231 1+2, 1+2, 1+2, 1+2, /*0x06A4-0x06AD*/ |
| 224 1+2, 1+2+8+256 * 0x42, 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/ | 232 1+2, 1+2+8+256 * 0x42, 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/ |
| 225 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/ | 233 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/ |
| 226 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x06B8-0x06BF*/ | 234 1+2, 1+2, /*0x06B8-0x06B9*/ |
| 227 1+2, 1+2, /*0x06B8-0x06BF*/ | 235 1 + 8 + 256 * 0x4E,/*0x06BA*/ |
| 228 1, /*0x06C0*/ | 236 1 + 2 + 8 + 256 * 0x50,/*0x06BB*/ |
| 229 1+2, /*0x06C1*/ | 237 1+2, 1+2, /*0x06BC-0x06BD*/ |
| 230 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*0x06C2-0x06CB*/ | 238 1 + 2 + 8 + 256 * 0x5A,/*0x06BE*/ |
| 231 1+2+8+256 * 0xAC, /*0x06CC*/ | 239 1+2, /*0x06BF*/ |
| 240 1 + 8 + 256 * 0x54,/*0x06C0*/ |
| 241 1 + 2 + 8 + 256 * 0x56,/*0x06C1*/ |
| 242 1, 1, 1, /*0x06C2-0x06C4*/ |
| 243 1 + 8 + 256 * 0x90,/*0x06C5*/ |
| 244 1 + 8 + 256 * 0x89,/*0x06C6*/ |
| 245 1 + 8 + 256 * 0x87,/*0x06C7*/ |
| 246 1 + 8 + 256 * 0x8B,/*0x06C8*/ |
| 247 1 + 8 + 256 * 0x92,/*0x06C9*/ |
| 248 1, /*0x06CA*/ |
| 249 1 + 8 + 256 * 0x8E,/*0x06CB*/ |
| 250 1 + 2 + 8 + 256 * 0xAC,/*0x06CC*/ |
| 232 1, /*0x06CD*/ | 251 1, /*0x06CD*/ |
| 233 1+2, 1+2, 1+2, 1+2, /*0x06CE-0x06D1*/ | 252 1+2, 1+2, /*0x06CE-0x06CF*/ |
| 234 1, 1 /*0x06D2-0x06D3*/ | 253 1 + 2 + 8 + 256 * 0x94,/*0x06D0*/ |
| 254 1+2, /*0x06D1*/ |
| 255 1 + 8 + 256 * 0x5E,/*0x06D2*/ |
| 256 1 + 8 + 256 * 0x60 /*0x06D3*/ |
| 235 }; | 257 }; |
| 236 | 258 |
| 237 static const uint8_t presALink[] = { | 259 static const uint8_t presALink[] = { |
| 238 /***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*
****C*****D*****E*****F*/ | 260 /***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*
****C*****D*****E*****F*/ |
| 239 /*FB5*/ 0, 1, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0,
0, 0, 0, 0, | 261 /*FB5*/ 0, 1, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0,
0, 0, 0, 0, |
| 240 /*FB6*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, | 262 /*FB6*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, |
| 241 /*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
2,1 + 2, 0, 0, | 263 /*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
2,1 + 2, 0, 0, |
| 242 /*FB8*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 1, | 264 /*FB8*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 1, |
| 243 /*FB9*/ 2,1 + 2, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, | 265 /*FB9*/ 2,1 + 2, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, |
| 244 /*FBA*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, | 266 /*FBA*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, |
| (...skipping 21 matching lines...) Expand all Loading... |
| 266 /*FEB*/ 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1
+ 2, 0, 1, 2, | 288 /*FEB*/ 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1
+ 2, 0, 1, 2, |
| 267 /*FEC*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1
+ 2, 0, 1, 2, | 289 /*FEC*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1
+ 2, 0, 1, 2, |
| 268 /*FED*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1
+ 2, 0, 1, 2, | 290 /*FED*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1
+ 2, 0, 1, 2, |
| 269 /*FEE*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1
+ 2, 0, 1, 0, | 291 /*FEE*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1
+ 2, 0, 1, 0, |
| 270 /*FEF*/ 1, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0,
1, 0, 0, 0 | 292 /*FEF*/ 1, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0,
1, 0, 0, 0 |
| 271 }; | 293 }; |
| 272 | 294 |
| 273 static const UChar convertFBto06[] = | 295 static const UChar convertFBto06[] = |
| 274 { | 296 { |
| 275 /***********0******1******2******3******4******5******6******7******8******9****
**A******B******C******D******E******F***/ | 297 /***********0******1******2******3******4******5******6******7******8******9****
**A******B******C******D******E******F***/ |
| 276 /*FB5*/ 0x671, 0x671, 0, 0, 0, 0, 0x67E, 0x67E, 0x67E, 0x67E,
0, 0, 0, 0, 0, 0, | 298 /*FB5*/ 0x671, 0x671, 0x67B, 0x67B, 0x67B, 0x67B, 0x67E, 0x67E, 0x67E, 0x67E,
0, 0, 0, 0, 0x67A, 0x67A, |
| 277 /*FB6*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, | 299 /*FB6*/ 0x67A, 0x67A, 0, 0, 0, 0, 0x679, 0x679, 0x679, 0x679,
0, 0, 0, 0, 0, 0, |
| 278 /*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0x686, 0x686, 0x686, 0x686, 0, 0, | 300 /*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0x686, 0x686, 0x686, 0x686, 0, 0, |
| 279 /*FB8*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0x698, 0x698, 0, 0, 0x6A9, 0x6A9, | 301 /*FB8*/ 0, 0, 0x68D, 0x68D, 0x68C, 0x68C, 0x68E, 0x68E, 0x688, 0x688,
0x698, 0x698, 0x691, 0x691, 0x6A9, 0x6A9, |
| 280 /*FB9*/ 0x6A9, 0x6A9, 0x6AF, 0x6AF, 0x6AF, 0x6AF, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, | 302 /*FB9*/ 0x6A9, 0x6A9, 0x6AF, 0x6AF, 0x6AF, 0x6AF, 0, 0, 0, 0,
0, 0, 0, 0, 0x6BA, 0x6BA, |
| 281 /*FBA*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, | 303 /*FBA*/ 0x6BB, 0x6BB, 0x6BB, 0x6BB, 0x6C0, 0x6C0, 0x6C1, 0x6C1, 0x6C1, 0x6C1,
0x6BE, 0x6BE, 0x6BE, 0x6BE, 0x6d2, 0x6D2, |
| 282 /*FBB*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, | 304 /*FBB*/ 0x6D3, 0x6D3, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, |
| 283 /*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, | 305 /*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, |
| 284 /*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, | 306 /*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0x6C7, 0x6C7, 0x6C6,
0x6C6, 0x6C8, 0x6C8, 0, 0x6CB, 0x6CB, |
| 285 /*FBE*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, | 307 /*FBE*/ 0x6C5, 0x6C5, 0x6C9, 0x6C9, 0x6D0, 0x6D0, 0x6D0, 0x6D0, 0, 0,
0, 0, 0, 0, 0, 0, |
| 286 /*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0x6CC, 0x6CC, 0x6CC, 0x6CC | 308 /*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0x6CC, 0x6CC, 0x6CC, 0x6CC |
| 287 }; | 309 }; |
| 288 | 310 |
| 289 static const UChar convertFEto06[] = | 311 static const UChar convertFEto06[] = |
| 290 { | 312 { |
| 291 /***********0******1******2******3******4******5******6******7******8******9****
**A******B******C******D******E******F***/ | 313 /***********0******1******2******3******4******5******6******7******8******9****
**A******B******C******D******E******F***/ |
| 292 /*FE7*/ 0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F,
0x650, 0x650, 0x651, 0x651, 0x652, 0x652, | 314 /*FE7*/ 0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F,
0x650, 0x650, 0x651, 0x651, 0x652, 0x652, |
| 293 /*FE8*/ 0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626,
0x626, 0x626, 0x626, 0x627, 0x627, 0x628, | 315 /*FE8*/ 0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626,
0x626, 0x626, 0x626, 0x627, 0x627, 0x628, |
| 294 /*FE9*/ 0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B,
0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C, | 316 /*FE9*/ 0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B,
0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C, |
| 295 /*FEA*/ 0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F,
0x62F, 0x630, 0x630, 0x631, 0x631, 0x632, | 317 /*FEA*/ 0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F,
0x62F, 0x630, 0x630, 0x631, 0x631, 0x632, |
| (...skipping 1233 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1529 | 1551 |
| 1530 /* | 1552 /* |
| 1531 * need a temporary buffer of size max(outputSize, sourceLength) | 1553 * need a temporary buffer of size max(outputSize, sourceLength) |
| 1532 * because at first we copy source->temp | 1554 * because at first we copy source->temp |
| 1533 */ | 1555 */ |
| 1534 if(sourceLength>outputSize) { | 1556 if(sourceLength>outputSize) { |
| 1535 outputSize=sourceLength; | 1557 outputSize=sourceLength; |
| 1536 } | 1558 } |
| 1537 | 1559 |
| 1538 /* Start of Arabic letter shaping part */ | 1560 /* Start of Arabic letter shaping part */ |
| 1539 if(outputSize<=LENGTHOF(buffer)) { | 1561 if(outputSize<=UPRV_LENGTHOF(buffer)) { |
| 1540 outputSize=LENGTHOF(buffer); | 1562 outputSize=UPRV_LENGTHOF(buffer); |
| 1541 tempbuffer=buffer; | 1563 tempbuffer=buffer; |
| 1542 } else { | 1564 } else { |
| 1543 tempbuffer = (UChar *)uprv_malloc(outputSize*U_SIZEOF_UCHAR); | 1565 tempbuffer = (UChar *)uprv_malloc(outputSize*U_SIZEOF_UCHAR); |
| 1544 | 1566 |
| 1545 /*Test for NULL*/ | 1567 /*Test for NULL*/ |
| 1546 if(tempbuffer == NULL) { | 1568 if(tempbuffer == NULL) { |
| 1547 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; | 1569 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; |
| 1548 if (tempsource != NULL) uprv_free(tempsource); | 1570 if (tempsource != NULL) uprv_free(tempsource); |
| 1549 return 0; | 1571 return 0; |
| 1550 } | 1572 } |
| (...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1697 TRUE); | 1719 TRUE); |
| 1698 break; | 1720 break; |
| 1699 default: | 1721 default: |
| 1700 /* will never occur because of validity checks above */ | 1722 /* will never occur because of validity checks above */ |
| 1701 break; | 1723 break; |
| 1702 } | 1724 } |
| 1703 } | 1725 } |
| 1704 | 1726 |
| 1705 return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); | 1727 return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); |
| 1706 } | 1728 } |
| OLD | NEW |