OLD | NEW |
1 /* This Source Code Form is subject to the terms of the Mozilla Public | 1 /* This Source Code Form is subject to the terms of the Mozilla Public |
2 * License, v. 2.0. If a copy of the MPL was not distributed with this | 2 * License, v. 2.0. If a copy of the MPL was not distributed with this |
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ | 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
4 | 4 |
5 #include "seccomon.h" | 5 #include "seccomon.h" |
6 #include "secport.h" | 6 #include "secport.h" |
7 | 7 |
8 #ifdef TEST_UTF8 | 8 #ifdef TEST_UTF8 |
9 #include <assert.h> | 9 #include <assert.h> |
10 #undef PORT_Assert | 10 #undef PORT_Assert |
(...skipping 301 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
312 return PR_TRUE; | 312 return PR_TRUE; |
313 } else { | 313 } else { |
314 unsigned int i, len = 0; | 314 unsigned int i, len = 0; |
315 PORT_Assert((inBufLen % 2) == 0); | 315 PORT_Assert((inBufLen % 2) == 0); |
316 if ((inBufLen % 2) != 0) { | 316 if ((inBufLen % 2) != 0) { |
317 *outBufLen = 0; | 317 *outBufLen = 0; |
318 return PR_FALSE; | 318 return PR_FALSE; |
319 } | 319 } |
320 | 320 |
321 for( i = 0; i < inBufLen; i += 2 ) { | 321 for( i = 0; i < inBufLen; i += 2 ) { |
322 if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_0] & 0x80) == 0x00) ) len += 1; | 322 if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_1] & 0x80) == 0x00) ) len += 1; |
323 else if( inBuf[i+H_0] < 0x08 ) len += 2; | 323 else if( inBuf[i+H_0] < 0x08 ) len += 2; |
324 else if( ((inBuf[i+0+H_0] & 0xDC) == 0xD8) ) { | 324 else if( ((inBuf[i+0+H_0] & 0xFC) == 0xD8) ) { |
325 if( ((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2) ) { | 325 if( ((inBufLen - i) > 2) && ((inBuf[i+2+H_0] & 0xFC) == 0xDC) ) { |
326 i += 2; | 326 i += 2; |
327 len += 4; | 327 len += 4; |
328 } else { | 328 } else { |
329 return PR_FALSE; | 329 return PR_FALSE; |
330 } | 330 } |
331 } | 331 } |
332 else len += 3; | 332 else len += 3; |
333 } | 333 } |
334 | 334 |
335 if( len > maxOutBufLen ) { | 335 if( len > maxOutBufLen ) { |
(...skipping 13 matching lines...) Expand all Loading... |
349 len += 1; | 349 len += 1; |
350 } else if( inBuf[i+H_0] < 0x08 ) { | 350 } else if( inBuf[i+H_0] < 0x08 ) { |
351 /* 0080-07FF -> 110xxxxx 10xxxxxx */ | 351 /* 0080-07FF -> 110xxxxx 10xxxxxx */ |
352 /* 00000abc defghijk -> 110abcde 10fghijk */ | 352 /* 00000abc defghijk -> 110abcde 10fghijk */ |
353 | 353 |
354 outBuf[len+0] = 0xC0 | ((inBuf[i+H_0] & 0x07) << 2) | 354 outBuf[len+0] = 0xC0 | ((inBuf[i+H_0] & 0x07) << 2) |
355 | ((inBuf[i+H_1] & 0xC0) >> 6); | 355 | ((inBuf[i+H_1] & 0xC0) >> 6); |
356 outBuf[len+1] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0); | 356 outBuf[len+1] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0); |
357 | 357 |
358 len += 2; | 358 len += 2; |
359 } else if( (inBuf[i+H_0] & 0xDC) == 0xD8 ) { | 359 } else if( (inBuf[i+H_0] & 0xFC) == 0xD8 ) { |
360 int abcde, BCDE; | 360 int abcde, BCDE; |
361 | 361 |
362 PORT_Assert(((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2)); | 362 PORT_Assert(((inBufLen - i) > 2) && ((inBuf[i+2+H_0] & 0xFC) == 0xDC) ); |
363 | 363 |
364 /* D800-DBFF DC00-DFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ | 364 /* D800-DBFF DC00-DFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
365 /* 110110BC DEfghijk 110111lm nopqrstu -> | 365 /* 110110BC DEfghijk 110111lm nopqrstu -> |
366 { Let abcde = BCDE + 1 } | 366 { Let abcde = BCDE + 1 } |
367 11110abc 10defghi 10jklmno 10pqrstu */ | 367 11110abc 10defghi 10jklmno 10pqrstu */ |
368 | 368 |
369 BCDE = ((inBuf[i+H_0] & 0x03) << 2) | ((inBuf[i+H_1] & 0xC0) >> 6); | 369 BCDE = ((inBuf[i+H_0] & 0x03) << 2) | ((inBuf[i+H_1] & 0xC0) >> 6); |
370 abcde = BCDE + 1; | 370 abcde = BCDE + 1; |
371 | 371 |
372 outBuf[len+0] = 0xF0 | ((abcde & 0x1C) >> 2); | 372 outBuf[len+0] = 0xF0 | ((abcde & 0x1C) >> 2); |
(...skipping 472 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
845 { 0x8020, "\xE8\x80\xA0" }, | 845 { 0x8020, "\xE8\x80\xA0" }, |
846 { 0x8040, "\xE8\x81\x80" }, | 846 { 0x8040, "\xE8\x81\x80" }, |
847 { 0x8080, "\xE8\x82\x80" }, | 847 { 0x8080, "\xE8\x82\x80" }, |
848 { 0x8100, "\xE8\x84\x80" }, | 848 { 0x8100, "\xE8\x84\x80" }, |
849 { 0x8200, "\xE8\x88\x80" }, | 849 { 0x8200, "\xE8\x88\x80" }, |
850 { 0x8400, "\xE8\x90\x80" }, | 850 { 0x8400, "\xE8\x90\x80" }, |
851 { 0x8800, "\xE8\xA0\x80" }, | 851 { 0x8800, "\xE8\xA0\x80" }, |
852 { 0x9000, "\xE9\x80\x80" }, | 852 { 0x9000, "\xE9\x80\x80" }, |
853 { 0xA000, "\xEA\x80\x80" }, | 853 { 0xA000, "\xEA\x80\x80" }, |
854 { 0xC000, "\xEC\x80\x80" }, | 854 { 0xC000, "\xEC\x80\x80" }, |
| 855 { 0xFB01, "\xEF\xAC\x81" }, |
855 { 0xFFFF, "\xEF\xBF\xBF" } | 856 { 0xFFFF, "\xEF\xBF\xBF" } |
856 | 857 |
857 }; | 858 }; |
858 | 859 |
859 /* | 860 /* |
860 * UTF-16 vectors | 861 * UTF-16 vectors |
861 */ | 862 */ |
862 | 863 |
863 struct utf16 utf16[] = { | 864 struct utf16 utf16[] = { |
864 { 0x00010000, { 0xD800, 0xDC00 } }, | 865 { 0x00010000, { 0xD800, 0xDC00 } }, |
(...skipping 281 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1146 "\xFD\xBF\xBF\xBF\xBF\xBF", | 1147 "\xFD\xBF\xBF\xBF\xBF\xBF", |
1147 "\x80", | 1148 "\x80", |
1148 "\xC3", | 1149 "\xC3", |
1149 "\xC3\xC3\x80", | 1150 "\xC3\xC3\x80", |
1150 "\xED\xA0\x80", | 1151 "\xED\xA0\x80", |
1151 "\xED\xBF\x80", | 1152 "\xED\xBF\x80", |
1152 "\xED\xBF\xBF", | 1153 "\xED\xBF\xBF", |
1153 "\xED\xA0\x80\xE0\xBF\xBF", | 1154 "\xED\xA0\x80\xE0\xBF\xBF", |
1154 }; | 1155 }; |
1155 | 1156 |
| 1157 /* illegal UTF-16 sequences, 0-terminated */ |
| 1158 uint16_t utf16_bad[][3] = { |
| 1159 /* leading surrogate not followed by trailing surrogate */ |
| 1160 { 0xD800, 0, 0 }, |
| 1161 { 0xD800, 0x41, 0 }, |
| 1162 { 0xD800, 0xfe, 0 }, |
| 1163 { 0xD800, 0x3bb, 0 }, |
| 1164 { 0xD800, 0xD800, 0 }, |
| 1165 { 0xD800, 0xFEFF, 0 }, |
| 1166 { 0xD800, 0xFFFD, 0 }, |
| 1167 }; |
| 1168 |
1156 static void | 1169 static void |
1157 dump_utf8 | 1170 dump_utf8 |
1158 ( | 1171 ( |
1159 char *word, | 1172 char *word, |
1160 unsigned char *utf8, | 1173 unsigned char *utf8, |
1161 char *end | 1174 char *end |
1162 ) | 1175 ) |
1163 { | 1176 { |
1164 fprintf(stdout, "%s ", word); | 1177 fprintf(stdout, "%s ", word); |
1165 for( ; *utf8; utf8++ ) { | 1178 for( ; *utf8; utf8++ ) { |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1213 rv = PR_FALSE; | 1226 rv = PR_FALSE; |
1214 continue; | 1227 continue; |
1215 } | 1228 } |
1216 | 1229 |
1217 if( (sizeof(back) != len) || (e->c != back) ) { | 1230 if( (sizeof(back) != len) || (e->c != back) ) { |
1218 dump_utf8("Wrong conversion of UTF-8", utf8, " to UCS-4:"); | 1231 dump_utf8("Wrong conversion of UTF-8", utf8, " to UCS-4:"); |
1219 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back); | 1232 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back); |
1220 rv = PR_FALSE; | 1233 rv = PR_FALSE; |
1221 continue; | 1234 continue; |
1222 } | 1235 } |
| 1236 |
| 1237 len = strlen(e->utf8) - 1; |
| 1238 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, |
| 1239 (unsigned char *)&e->c, sizeof(e->c), utf8 + sizeof(utf8) - len, len, |
| 1240 &len); |
| 1241 |
| 1242 if( result || len != strlen(e->utf8) ) { |
| 1243 fprintf(stdout, "Length computation error converting UCS-4 0x%08.8x" |
| 1244 " to UTF-8\n", e->c); |
| 1245 rv = PR_FALSE; |
| 1246 continue; |
| 1247 } |
1223 } | 1248 } |
1224 | 1249 |
1225 return rv; | 1250 return rv; |
1226 } | 1251 } |
1227 | 1252 |
1228 static PRBool | 1253 static PRBool |
1229 test_ucs2_chars | 1254 test_ucs2_chars |
1230 ( | 1255 ( |
1231 void | 1256 void |
1232 ) | 1257 ) |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1270 rv = PR_FALSE; | 1295 rv = PR_FALSE; |
1271 continue; | 1296 continue; |
1272 } | 1297 } |
1273 | 1298 |
1274 if( (sizeof(back) != len) || (e->c != back) ) { | 1299 if( (sizeof(back) != len) || (e->c != back) ) { |
1275 dump_utf8("Wrong conversion of UTF-8", utf8, "to UCS-2:"); | 1300 dump_utf8("Wrong conversion of UTF-8", utf8, "to UCS-2:"); |
1276 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back); | 1301 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back); |
1277 rv = PR_FALSE; | 1302 rv = PR_FALSE; |
1278 continue; | 1303 continue; |
1279 } | 1304 } |
| 1305 |
| 1306 len = strlen(e->utf8) - 1; |
| 1307 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, |
| 1308 (unsigned char *)&e->c, sizeof(e->c), utf8 + sizeof(utf8) - len, len, |
| 1309 &len); |
| 1310 |
| 1311 if( result || len != strlen(e->utf8) ) { |
| 1312 fprintf(stdout, "Length computation error converting UCS-2 0x%04.4x" |
| 1313 " to UTF-8\n", e->c); |
| 1314 rv = PR_FALSE; |
| 1315 continue; |
| 1316 } |
1280 } | 1317 } |
1281 | 1318 |
1282 return rv; | 1319 return rv; |
1283 } | 1320 } |
1284 | 1321 |
1285 static PRBool | 1322 static PRBool |
1286 test_utf16_chars | 1323 test_utf16_chars |
1287 ( | 1324 ( |
1288 void | 1325 void |
1289 ) | 1326 ) |
(...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1420 rv = PR_FALSE; | 1457 rv = PR_FALSE; |
1421 continue; | 1458 continue; |
1422 } | 1459 } |
1423 | 1460 |
1424 } | 1461 } |
1425 | 1462 |
1426 return rv; | 1463 return rv; |
1427 } | 1464 } |
1428 | 1465 |
1429 static PRBool | 1466 static PRBool |
| 1467 test_utf16_bad_chars(void) |
| 1468 { |
| 1469 PRBool rv = PR_TRUE; |
| 1470 int i; |
| 1471 |
| 1472 for( i = 0; i < sizeof(utf16_bad)/sizeof(utf16_bad[0]); ++i ) { |
| 1473 PRBool result; |
| 1474 unsigned char destbuf[18]; |
| 1475 unsigned int j, len, destlen; |
| 1476 uint16_t *buf; |
| 1477 |
| 1478 for( len = 0; utf16_bad[i][len] != 0; ++len ) |
| 1479 /* nothing */; |
| 1480 |
| 1481 buf = malloc(sizeof(uint16_t) * len); |
| 1482 for( j = 0; j < len; ++j ) |
| 1483 buf[j] = htons(utf16_bad[i][j]); |
| 1484 |
| 1485 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, |
| 1486 (unsigned char *)buf, sizeof(uint16_t) * len, destbuf, sizeof(destbuf), |
| 1487 &destlen); |
| 1488 if( result ) { |
| 1489 fprintf(stdout, "Failed to detect bad UTF-16 string conversion for " |
| 1490 "{0x%x,0x%x} (UTF-8 len = %u)\n", utf16_bad[i][0], utf16_bad[i][1], |
| 1491 destlen); |
| 1492 rv = PR_FALSE; |
| 1493 } |
| 1494 free(buf); |
| 1495 } |
| 1496 } |
| 1497 |
| 1498 static PRBool |
1430 test_iso88591_chars | 1499 test_iso88591_chars |
1431 ( | 1500 ( |
1432 void | 1501 void |
1433 ) | 1502 ) |
1434 { | 1503 { |
1435 PRBool rv = PR_TRUE; | 1504 PRBool rv = PR_TRUE; |
1436 int i; | 1505 int i; |
1437 | 1506 |
1438 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { | 1507 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { |
1439 struct ucs2 *e = &ucs2[i]; | 1508 struct ucs2 *e = &ucs2[i]; |
(...skipping 129 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1569 PRBool result; | 1638 PRBool result; |
1570 | 1639 |
1571 ucs4s = (PRUint32 *)calloc(sizeof(ucs4)/sizeof(ucs4[0]), sizeof(PRUint32)); | 1640 ucs4s = (PRUint32 *)calloc(sizeof(ucs4)/sizeof(ucs4[0]), sizeof(PRUint32)); |
1572 ucs2s = (PRUint16 *)calloc(sizeof(ucs2)/sizeof(ucs2[0]), sizeof(PRUint16)); | 1641 ucs2s = (PRUint16 *)calloc(sizeof(ucs2)/sizeof(ucs2[0]), sizeof(PRUint16)); |
1573 | 1642 |
1574 if( ((PRUint32 *)NULL == ucs4s) || ((PRUint16 *)NULL == ucs2s) ) { | 1643 if( ((PRUint32 *)NULL == ucs4s) || ((PRUint16 *)NULL == ucs2s) ) { |
1575 fprintf(stderr, "out of memory\n"); | 1644 fprintf(stderr, "out of memory\n"); |
1576 exit(1); | 1645 exit(1); |
1577 } | 1646 } |
1578 | 1647 |
1579 len = 0; | 1648 len = 1; |
1580 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) { | 1649 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) { |
1581 ucs4s[i] = ucs4[i].c; | 1650 ucs4s[i] = ucs4[i].c; |
1582 len += strlen(ucs4[i].utf8); | 1651 len += strlen(ucs4[i].utf8); |
1583 } | 1652 } |
1584 | 1653 |
1585 ucs4_utf8 = (char *)malloc(len); | 1654 ucs4_utf8 = (char *)malloc(len); |
1586 | 1655 |
1587 len = 0; | 1656 len = 1; |
1588 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { | 1657 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { |
1589 ucs2s[i] = ucs2[i].c; | 1658 ucs2s[i] = ucs2[i].c; |
1590 len += strlen(ucs2[i].utf8); | 1659 len += strlen(ucs2[i].utf8); |
1591 } | 1660 } |
1592 | 1661 |
1593 ucs2_utf8 = (char *)malloc(len); | 1662 ucs2_utf8 = (char *)malloc(len); |
1594 | 1663 |
1595 if( ((char *)NULL == ucs4_utf8) || ((char *)NULL == ucs2_utf8) ) { | 1664 if( ((char *)NULL == ucs4_utf8) || ((char *)NULL == ucs2_utf8) ) { |
1596 fprintf(stderr, "out of memory\n"); | 1665 fprintf(stderr, "out of memory\n"); |
1597 exit(1); | 1666 exit(1); |
(...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1774 int argc, | 1843 int argc, |
1775 char *argv[] | 1844 char *argv[] |
1776 ) | 1845 ) |
1777 { | 1846 { |
1778 byte_order(); | 1847 byte_order(); |
1779 | 1848 |
1780 if( test_ucs4_chars() && | 1849 if( test_ucs4_chars() && |
1781 test_ucs2_chars() && | 1850 test_ucs2_chars() && |
1782 test_utf16_chars() && | 1851 test_utf16_chars() && |
1783 test_utf8_bad_chars() && | 1852 test_utf8_bad_chars() && |
| 1853 test_utf16_bad_chars() && |
1784 test_iso88591_chars() && | 1854 test_iso88591_chars() && |
1785 test_zeroes() && | 1855 test_zeroes() && |
1786 test_multichars() && | 1856 test_multichars() && |
1787 PR_TRUE ) { | 1857 PR_TRUE ) { |
1788 fprintf(stderr, "PASS\n"); | 1858 fprintf(stderr, "PASS\n"); |
1789 return 1; | 1859 return 1; |
1790 } else { | 1860 } else { |
1791 fprintf(stderr, "FAIL\n"); | 1861 fprintf(stderr, "FAIL\n"); |
1792 return 0; | 1862 return 0; |
1793 } | 1863 } |
1794 } | 1864 } |
1795 | 1865 |
1796 #endif /* TEST_UTF8 */ | 1866 #endif /* TEST_UTF8 */ |
OLD | NEW |