OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 1280 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1291 uint32_t vqmovn_s8[2], vqmovn_u16[2], vqmovn_s32[2]; | 1291 uint32_t vqmovn_s8[2], vqmovn_u16[2], vqmovn_s32[2]; |
1292 int32_t vcvt_s32_f32[4]; | 1292 int32_t vcvt_s32_f32[4]; |
1293 uint32_t vcvt_u32_f32[4]; | 1293 uint32_t vcvt_u32_f32[4]; |
1294 float vcvt_f32_s32[4], vcvt_f32_u32[4]; | 1294 float vcvt_f32_s32[4], vcvt_f32_u32[4]; |
1295 uint32_t vdup8[4], vdup16[4], vdup32[4]; | 1295 uint32_t vdup8[4], vdup16[4], vdup32[4]; |
1296 float vabsf[4], vnegf[4]; | 1296 float vabsf[4], vnegf[4]; |
1297 uint32_t vabs_s8[4], vabs_s16[4], vabs_s32[4]; | 1297 uint32_t vabs_s8[4], vabs_s16[4], vabs_s32[4]; |
1298 uint32_t vneg_s8[4], vneg_s16[4], vneg_s32[4]; | 1298 uint32_t vneg_s8[4], vneg_s16[4], vneg_s32[4]; |
1299 uint32_t veor[4], vand[4], vorr[4]; | 1299 uint32_t veor[4], vand[4], vorr[4]; |
1300 float vdupf[4], vaddf[4], vpaddf[2], vsubf[4], vmulf[4]; | 1300 float vdupf[4], vaddf[4], vpaddf[2], vsubf[4], vmulf[4]; |
| 1301 uint32_t vdupf_16[2], vdupf_8[4]; |
1301 uint32_t vmin_s8[4], vmin_u16[4], vmin_s32[4]; | 1302 uint32_t vmin_s8[4], vmin_u16[4], vmin_s32[4]; |
1302 uint32_t vmax_s8[4], vmax_u16[4], vmax_s32[4]; | 1303 uint32_t vmax_s8[4], vmax_u16[4], vmax_s32[4]; |
1303 uint32_t vpadd_i8[2], vpadd_i16[2], vpadd_i32[2]; | 1304 uint32_t vpadd_i8[2], vpadd_i16[2], vpadd_i32[2]; |
1304 uint32_t vpmin_s8[2], vpmin_u16[2], vpmin_s32[2]; | 1305 uint32_t vpmin_s8[2], vpmin_u16[2], vpmin_s32[2]; |
1305 uint32_t vpmax_s8[2], vpmax_u16[2], vpmax_s32[2]; | 1306 uint32_t vpmax_s8[2], vpmax_u16[2], vpmax_s32[2]; |
1306 uint32_t vadd8[4], vadd16[4], vadd32[4]; | 1307 uint32_t vadd8[4], vadd16[4], vadd32[4]; |
1307 uint32_t vqadd_s8[4], vqadd_u16[4], vqadd_s32[4]; | 1308 uint32_t vqadd_s8[4], vqadd_u16[4], vqadd_s32[4]; |
1308 uint32_t vsub8[4], vsub16[4], vsub32[4]; | 1309 uint32_t vsub8[4], vsub16[4], vsub32[4]; |
1309 uint32_t vqsub_u8[4], vqsub_s16[4], vqsub_u32[4]; | 1310 uint32_t vqsub_u8[4], vqsub_s16[4], vqsub_u32[4]; |
1310 uint32_t vmul8[4], vmul16[4], vmul32[4]; | 1311 uint32_t vmul8[4], vmul16[4], vmul32[4]; |
1311 uint32_t vshl8[4], vshl16[4], vshl32[5]; | 1312 uint32_t vshl8[4], vshl16[4], vshl32[5]; |
1312 uint32_t vshr_s8[4], vshr_u16[4], vshr_s32[5]; | 1313 uint32_t vshr_s8[4], vshr_u16[4], vshr_s32[5]; |
| 1314 uint32_t vsli_64[2], vsri_64[2], vsli_32[2], vsri_32[2]; |
1313 uint32_t vceq[4], vceqf[4], vcgef[4], vcgtf[4]; | 1315 uint32_t vceq[4], vceqf[4], vcgef[4], vcgtf[4]; |
1314 uint32_t vcge_s8[4], vcge_u16[4], vcge_s32[4]; | 1316 uint32_t vcge_s8[4], vcge_u16[4], vcge_s32[4]; |
1315 uint32_t vcgt_s8[4], vcgt_u16[4], vcgt_s32[4]; | 1317 uint32_t vcgt_s8[4], vcgt_u16[4], vcgt_s32[4]; |
1316 float vrecpe[4], vrecps[4], vrsqrte[4], vrsqrts[4]; | 1318 float vrecpe[4], vrecps[4], vrsqrte[4], vrsqrts[4]; |
1317 float vminf[4], vmaxf[4]; | 1319 float vminf[4], vmaxf[4]; |
1318 uint32_t vtst[4], vbsl[4]; | 1320 uint32_t vtst[4], vbsl[4]; |
1319 uint32_t vext[4]; | 1321 uint32_t vext[4]; |
1320 uint32_t vzip8a[4], vzip8b[4], vzip16a[4], vzip16b[4], vzip32a[4], | 1322 uint32_t vzip8a[4], vzip8b[4], vzip16a[4], vzip16b[4], vzip32a[4], |
1321 vzip32b[4]; | 1323 vzip32b[4]; |
1322 uint32_t vzipd8a[2], vzipd8b[2], vzipd16a[2], vzipd16b[2]; | 1324 uint32_t vzipd8a[2], vzipd8b[2], vzipd16a[2], vzipd16b[2]; |
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1433 __ mov(r4, Operand(kMaxUInt32)); | 1435 __ mov(r4, Operand(kMaxUInt32)); |
1434 __ mov(r5, Operand(kMinInt + 1)); | 1436 __ mov(r5, Operand(kMinInt + 1)); |
1435 __ vmov(d1, r4, r5); // q0 = [kMinInt, kMaxInt, kMaxUInt32, kMinInt + 1] | 1437 __ vmov(d1, r4, r5); // q0 = [kMinInt, kMaxInt, kMaxUInt32, kMinInt + 1] |
1436 __ vcvt_f32_s32(q1, q0); | 1438 __ vcvt_f32_s32(q1, q0); |
1437 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcvt_f32_s32)))); | 1439 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcvt_f32_s32)))); |
1438 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); | 1440 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); |
1439 __ vcvt_f32_u32(q1, q0); | 1441 __ vcvt_f32_u32(q1, q0); |
1440 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcvt_f32_u32)))); | 1442 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcvt_f32_u32)))); |
1441 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); | 1443 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); |
1442 | 1444 |
1443 // vdup (integer). | 1445 // vdup (from register). |
1444 __ mov(r4, Operand(0xa)); | 1446 __ mov(r4, Operand(0xa)); |
1445 __ vdup(Neon8, q0, r4); | 1447 __ vdup(Neon8, q0, r4); |
1446 __ vdup(Neon16, q1, r4); | 1448 __ vdup(Neon16, q1, r4); |
1447 __ vdup(Neon32, q2, r4); | 1449 __ vdup(Neon32, q2, r4); |
1448 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vdup8)))); | 1450 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vdup8)))); |
1449 __ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4)); | 1451 __ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4)); |
1450 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vdup16)))); | 1452 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vdup16)))); |
1451 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); | 1453 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); |
1452 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vdup32)))); | 1454 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vdup32)))); |
1453 __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); | 1455 __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); |
1454 | 1456 |
1455 // vdup (float). | 1457 // vdup (from scalar). |
1456 __ vmov(s0, -1.0); | 1458 __ vmov(s0, -1.0); |
1457 __ vdup(q0, s0); | 1459 __ vdup(Neon32, q1, d0, 0); |
1458 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vdupf)))); | 1460 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vdupf)))); |
1459 __ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4)); | 1461 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); |
| 1462 __ vdup(Neon16, d2, d0, 1); |
| 1463 __ vstr(d2, r0, offsetof(T, vdupf_16)); |
| 1464 __ vdup(Neon8, q1, d0, 3); |
| 1465 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vdupf_8)))); |
| 1466 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); |
1460 | 1467 |
1461 // vabs (float). | 1468 // vabs (float). |
1462 __ vmov(s0, -1.0); | 1469 __ vmov(s0, -1.0); |
1463 __ vmov(s1, -0.0); | 1470 __ vmov(s1, -0.0); |
1464 __ vmov(s2, 0.0); | 1471 __ vmov(s2, 0.0); |
1465 __ vmov(s3, 1.0); | 1472 __ vmov(s3, 1.0); |
1466 __ vabs(q1, q0); | 1473 __ vabs(q1, q0); |
1467 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vabsf)))); | 1474 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vabsf)))); |
1468 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); | 1475 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); |
1469 // vneg (float). | 1476 // vneg (float). |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1518 __ mov(r4, Operand(0xaa)); | 1525 __ mov(r4, Operand(0xaa)); |
1519 __ vdup(Neon16, q0, r4); | 1526 __ vdup(Neon16, q0, r4); |
1520 __ mov(r4, Operand(0x55)); | 1527 __ mov(r4, Operand(0x55)); |
1521 __ vdup(Neon16, q1, r4); | 1528 __ vdup(Neon16, q1, r4); |
1522 __ vorr(q1, q1, q0); | 1529 __ vorr(q1, q1, q0); |
1523 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vorr)))); | 1530 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vorr)))); |
1524 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); | 1531 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); |
1525 | 1532 |
1526 // vmin (float). | 1533 // vmin (float). |
1527 __ vmov(s4, 2.0); | 1534 __ vmov(s4, 2.0); |
1528 __ vdup(q0, s4); | 1535 __ vdup(Neon32, q0, d2, 0); |
1529 __ vmov(s4, 1.0); | 1536 __ vmov(s4, 1.0); |
1530 __ vdup(q1, s4); | 1537 __ vdup(Neon32, q1, d2, 0); |
1531 __ vmin(q1, q1, q0); | 1538 __ vmin(q1, q1, q0); |
1532 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vminf)))); | 1539 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vminf)))); |
1533 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); | 1540 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); |
1534 // vmax (float). | 1541 // vmax (float). |
1535 __ vmov(s4, 2.0); | 1542 __ vmov(s4, 2.0); |
1536 __ vdup(q0, s4); | 1543 __ vdup(Neon32, q0, d2, 0); |
1537 __ vmov(s4, 1.0); | 1544 __ vmov(s4, 1.0); |
1538 __ vdup(q1, s4); | 1545 __ vdup(Neon32, q1, d2, 0); |
1539 __ vmax(q1, q1, q0); | 1546 __ vmax(q1, q1, q0); |
1540 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmaxf)))); | 1547 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmaxf)))); |
1541 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); | 1548 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); |
1542 // vadd (float). | 1549 // vadd (float). |
1543 __ vmov(s4, 1.0); | 1550 __ vmov(s4, 1.0); |
1544 __ vdup(q0, s4); | 1551 __ vdup(Neon32, q0, d2, 0); |
1545 __ vdup(q1, s4); | 1552 __ vdup(Neon32, q1, d2, 0); |
1546 __ vadd(q1, q1, q0); | 1553 __ vadd(q1, q1, q0); |
1547 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vaddf)))); | 1554 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vaddf)))); |
1548 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); | 1555 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); |
1549 // vpadd (float). | 1556 // vpadd (float). |
1550 __ vmov(s0, 1.0); | 1557 __ vmov(s0, 1.0); |
1551 __ vmov(s1, 2.0); | 1558 __ vmov(s1, 2.0); |
1552 __ vmov(s2, 3.0); | 1559 __ vmov(s2, 3.0); |
1553 __ vmov(s3, 4.0); | 1560 __ vmov(s3, 4.0); |
1554 __ vpadd(d2, d0, d1); | 1561 __ vpadd(d2, d0, d1); |
1555 __ vstr(d2, r0, offsetof(T, vpaddf)); | 1562 __ vstr(d2, r0, offsetof(T, vpaddf)); |
1556 // vsub (float). | 1563 // vsub (float). |
1557 __ vmov(s4, 2.0); | 1564 __ vmov(s4, 2.0); |
1558 __ vdup(q0, s4); | 1565 __ vdup(Neon32, q0, d2, 0); |
1559 __ vmov(s4, 1.0); | 1566 __ vmov(s4, 1.0); |
1560 __ vdup(q1, s4); | 1567 __ vdup(Neon32, q1, d2, 0); |
1561 __ vsub(q1, q1, q0); | 1568 __ vsub(q1, q1, q0); |
1562 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vsubf)))); | 1569 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vsubf)))); |
1563 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); | 1570 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); |
1564 // vmul (float). | 1571 // vmul (float). |
1565 __ vmov(s4, 2.0); | 1572 __ vmov(s4, 2.0); |
1566 __ vdup(q0, s4); | 1573 __ vdup(Neon32, q0, d2, 0); |
1567 __ vdup(q1, s4); | 1574 __ vdup(Neon32, q1, d2, 0); |
1568 __ vmul(q1, q1, q0); | 1575 __ vmul(q1, q1, q0); |
1569 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmulf)))); | 1576 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmulf)))); |
1570 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); | 1577 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); |
1571 // vrecpe. | 1578 // vrecpe. |
1572 __ vmov(s4, 2.0); | 1579 __ vmov(s4, 2.0); |
1573 __ vdup(q0, s4); | 1580 __ vdup(Neon32, q0, d2, 0); |
1574 __ vrecpe(q1, q0); | 1581 __ vrecpe(q1, q0); |
1575 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrecpe)))); | 1582 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrecpe)))); |
1576 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); | 1583 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); |
1577 // vrecps. | 1584 // vrecps. |
1578 __ vmov(s4, 2.0); | 1585 __ vmov(s4, 2.0); |
1579 __ vdup(q0, s4); | 1586 __ vdup(Neon32, q0, d2, 0); |
1580 __ vmov(s4, 1.5); | 1587 __ vmov(s4, 1.5); |
1581 __ vdup(q1, s4); | 1588 __ vdup(Neon32, q1, d2, 0); |
1582 __ vrecps(q1, q0, q1); | 1589 __ vrecps(q1, q0, q1); |
1583 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrecps)))); | 1590 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrecps)))); |
1584 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); | 1591 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); |
1585 // vrsqrte. | 1592 // vrsqrte. |
1586 __ vmov(s4, 4.0); | 1593 __ vmov(s4, 4.0); |
1587 __ vdup(q0, s4); | 1594 __ vdup(Neon32, q0, d2, 0); |
1588 __ vrsqrte(q1, q0); | 1595 __ vrsqrte(q1, q0); |
1589 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrsqrte)))); | 1596 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrsqrte)))); |
1590 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); | 1597 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); |
1591 // vrsqrts. | 1598 // vrsqrts. |
1592 __ vmov(s4, 2.0); | 1599 __ vmov(s4, 2.0); |
1593 __ vdup(q0, s4); | 1600 __ vdup(Neon32, q0, d2, 0); |
1594 __ vmov(s4, 2.5); | 1601 __ vmov(s4, 2.5); |
1595 __ vdup(q1, s4); | 1602 __ vdup(Neon32, q1, d2, 0); |
1596 __ vrsqrts(q1, q0, q1); | 1603 __ vrsqrts(q1, q0, q1); |
1597 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrsqrts)))); | 1604 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vrsqrts)))); |
1598 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); | 1605 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); |
1599 // vceq (float). | 1606 // vceq (float). |
1600 __ vmov(s4, 1.0); | 1607 __ vmov(s4, 1.0); |
1601 __ vdup(q0, s4); | 1608 __ vdup(Neon32, q0, d2, 0); |
1602 __ vdup(q1, s4); | 1609 __ vdup(Neon32, q1, d2, 0); |
1603 __ vceq(q1, q1, q0); | 1610 __ vceq(q1, q1, q0); |
1604 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vceqf)))); | 1611 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vceqf)))); |
1605 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); | 1612 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); |
1606 // vcge (float). | 1613 // vcge (float). |
1607 __ vmov(s0, 1.0); | 1614 __ vmov(s0, 1.0); |
1608 __ vmov(s1, -1.0); | 1615 __ vmov(s1, -1.0); |
1609 __ vmov(s2, -0.0); | 1616 __ vmov(s2, -0.0); |
1610 __ vmov(s3, 0.0); | 1617 __ vmov(s3, 0.0); |
1611 __ vdup(q1, s3); | 1618 __ vdup(Neon32, q1, d1, 1); |
1612 __ vcge(q2, q1, q0); | 1619 __ vcge(q2, q1, q0); |
1613 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcgef)))); | 1620 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcgef)))); |
1614 __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); | 1621 __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); |
1615 __ vcgt(q2, q1, q0); | 1622 __ vcgt(q2, q1, q0); |
1616 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcgtf)))); | 1623 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vcgtf)))); |
1617 __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); | 1624 __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); |
1618 | 1625 |
1619 // vmin/vmax integer. | 1626 // vmin/vmax integer. |
1620 __ mov(r4, Operand(0x03)); | 1627 __ mov(r4, Operand(0x03)); |
1621 __ vdup(Neon16, q0, r4); | 1628 __ vdup(Neon16, q0, r4); |
(...skipping 185 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1807 __ vshr(NeonS8, q1, q0, 1); | 1814 __ vshr(NeonS8, q1, q0, 1); |
1808 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vshr_s8)))); | 1815 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vshr_s8)))); |
1809 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); | 1816 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); |
1810 __ vshr(NeonU16, q1, q0, 9); | 1817 __ vshr(NeonU16, q1, q0, 9); |
1811 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vshr_u16)))); | 1818 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vshr_u16)))); |
1812 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); | 1819 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); |
1813 __ vshr(NeonS32, q1, q0, 17); | 1820 __ vshr(NeonS32, q1, q0, 17); |
1814 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vshr_s32)))); | 1821 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vshr_s32)))); |
1815 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); | 1822 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); |
1816 | 1823 |
| 1824 // vsli, vsri. |
| 1825 __ mov(r4, Operand(0xffffffff)); |
| 1826 __ mov(r5, Operand(0x1)); |
| 1827 __ vmov(d0, r4, r5); |
| 1828 __ vmov(d1, r5, r5); |
| 1829 __ vsli(Neon64, d1, d0, 32); |
| 1830 __ vstr(d1, r0, offsetof(T, vsli_64)); |
| 1831 __ vmov(d0, r5, r4); |
| 1832 __ vmov(d1, r5, r5); |
| 1833 __ vsri(Neon64, d1, d0, 32); |
| 1834 __ vstr(d1, r0, offsetof(T, vsri_64)); |
| 1835 __ vmov(d0, r4, r5); |
| 1836 __ vmov(d1, r5, r5); |
| 1837 __ vsli(Neon32, d1, d0, 16); |
| 1838 __ vstr(d1, r0, offsetof(T, vsli_32)); |
| 1839 __ vmov(d0, r5, r4); |
| 1840 __ vmov(d1, r5, r5); |
| 1841 __ vsri(Neon32, d1, d0, 16); |
| 1842 __ vstr(d1, r0, offsetof(T, vsri_32)); |
| 1843 |
1817 // vceq. | 1844 // vceq. |
1818 __ mov(r4, Operand(0x03)); | 1845 __ mov(r4, Operand(0x03)); |
1819 __ vdup(Neon8, q0, r4); | 1846 __ vdup(Neon8, q0, r4); |
1820 __ vdup(Neon16, q1, r4); | 1847 __ vdup(Neon16, q1, r4); |
1821 __ vceq(Neon8, q1, q0, q1); | 1848 __ vceq(Neon8, q1, q0, q1); |
1822 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vceq)))); | 1849 __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vceq)))); |
1823 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); | 1850 __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); |
1824 | 1851 |
1825 // vcge/vcgt (integer). | 1852 // vcge/vcgt (integer). |
1826 __ mov(r4, Operand(0x03)); | 1853 __ mov(r4, Operand(0x03)); |
(...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2100 CHECK_EQ(0xFFFFFFFFu, t.vmov_from_scalar_s16); | 2127 CHECK_EQ(0xFFFFFFFFu, t.vmov_from_scalar_s16); |
2101 CHECK_EQ(0xFFFFu, t.vmov_from_scalar_u16); | 2128 CHECK_EQ(0xFFFFu, t.vmov_from_scalar_u16); |
2102 CHECK_EQ(0xFFFFFFFFu, t.vmov_from_scalar_32); | 2129 CHECK_EQ(0xFFFFFFFFu, t.vmov_from_scalar_32); |
2103 | 2130 |
2104 CHECK_EQ_32X4(vmov, 0x03020100u, 0x07060504u, 0x0b0a0908u, 0x0f0e0d0cu); | 2131 CHECK_EQ_32X4(vmov, 0x03020100u, 0x07060504u, 0x0b0a0908u, 0x0f0e0d0cu); |
2105 CHECK_EQ_32X4(vmvn, 0xfcfdfeffu, 0xf8f9fafbu, 0xf4f5f6f7u, 0xf0f1f2f3u); | 2132 CHECK_EQ_32X4(vmvn, 0xfcfdfeffu, 0xf8f9fafbu, 0xf4f5f6f7u, 0xf0f1f2f3u); |
2106 | 2133 |
2107 CHECK_EQ_SPLAT(vdup8, 0x0a0a0a0au); | 2134 CHECK_EQ_SPLAT(vdup8, 0x0a0a0a0au); |
2108 CHECK_EQ_SPLAT(vdup16, 0x000a000au); | 2135 CHECK_EQ_SPLAT(vdup16, 0x000a000au); |
2109 CHECK_EQ_SPLAT(vdup32, 0x0000000au); | 2136 CHECK_EQ_SPLAT(vdup32, 0x0000000au); |
2110 CHECK_EQ_SPLAT(vdupf, -1.0); | 2137 CHECK_EQ_SPLAT(vdupf, -1.0); // bit pattern is 0xbf800000. |
| 2138 CHECK_EQ_32X2(vdupf_16, 0xbf80bf80u, 0xbf80bf80u); |
| 2139 CHECK_EQ_SPLAT(vdupf_8, 0xbfbfbfbfu); |
2111 | 2140 |
2112 // src: [-1, -1, 1, 1] | 2141 // src: [-1, -1, 1, 1] |
2113 CHECK_EQ_32X4(vcvt_s32_f32, -1, -1, 1, 1); | 2142 CHECK_EQ_32X4(vcvt_s32_f32, -1, -1, 1, 1); |
2114 CHECK_EQ_32X4(vcvt_u32_f32, 0u, 0u, 1u, 1u); | 2143 CHECK_EQ_32X4(vcvt_u32_f32, 0u, 0u, 1u, 1u); |
2115 // src: [kMinInt, kMaxInt, kMaxUInt32, kMinInt + 1] | 2144 // src: [kMinInt, kMaxInt, kMaxUInt32, kMinInt + 1] |
2116 CHECK_EQ_32X4(vcvt_f32_s32, INT32_TO_FLOAT(kMinInt), | 2145 CHECK_EQ_32X4(vcvt_f32_s32, INT32_TO_FLOAT(kMinInt), |
2117 INT32_TO_FLOAT(kMaxInt), INT32_TO_FLOAT(kMaxUInt32), | 2146 INT32_TO_FLOAT(kMaxInt), INT32_TO_FLOAT(kMaxUInt32), |
2118 INT32_TO_FLOAT(kMinInt + 1)); | 2147 INT32_TO_FLOAT(kMinInt + 1)); |
2119 CHECK_EQ_32X4(vcvt_f32_u32, UINT32_TO_FLOAT(kMinInt), | 2148 CHECK_EQ_32X4(vcvt_f32_u32, UINT32_TO_FLOAT(kMinInt), |
2120 UINT32_TO_FLOAT(kMaxInt), UINT32_TO_FLOAT(kMaxUInt32), | 2149 UINT32_TO_FLOAT(kMaxInt), UINT32_TO_FLOAT(kMaxUInt32), |
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2182 CHECK_EQ_SPLAT(vsub32, 0xfffffffeu); | 2211 CHECK_EQ_SPLAT(vsub32, 0xfffffffeu); |
2183 CHECK_EQ_SPLAT(vmul8, 0x04040404u); | 2212 CHECK_EQ_SPLAT(vmul8, 0x04040404u); |
2184 CHECK_EQ_SPLAT(vmul16, 0x00040004u); | 2213 CHECK_EQ_SPLAT(vmul16, 0x00040004u); |
2185 CHECK_EQ_SPLAT(vmul32, 0x00000004u); | 2214 CHECK_EQ_SPLAT(vmul32, 0x00000004u); |
2186 CHECK_EQ_SPLAT(vshl8, 0xaaaaaaaau); | 2215 CHECK_EQ_SPLAT(vshl8, 0xaaaaaaaau); |
2187 CHECK_EQ_SPLAT(vshl16, 0xaa00aa00u); | 2216 CHECK_EQ_SPLAT(vshl16, 0xaa00aa00u); |
2188 CHECK_EQ_SPLAT(vshl32, 0xaaaa0000u); | 2217 CHECK_EQ_SPLAT(vshl32, 0xaaaa0000u); |
2189 CHECK_EQ_SPLAT(vshr_s8, 0xc0c0c0c0u); | 2218 CHECK_EQ_SPLAT(vshr_s8, 0xc0c0c0c0u); |
2190 CHECK_EQ_SPLAT(vshr_u16, 0x00400040u); | 2219 CHECK_EQ_SPLAT(vshr_u16, 0x00400040u); |
2191 CHECK_EQ_SPLAT(vshr_s32, 0xffffc040u); | 2220 CHECK_EQ_SPLAT(vshr_s32, 0xffffc040u); |
| 2221 CHECK_EQ_32X2(vsli_64, 0x01u, 0xffffffffu); |
| 2222 CHECK_EQ_32X2(vsri_64, 0xffffffffu, 0x01u); |
| 2223 CHECK_EQ_32X2(vsli_32, 0xffff0001u, 0x00010001u); |
| 2224 CHECK_EQ_32X2(vsri_32, 0x00000000u, 0x0000ffffu); |
2192 CHECK_EQ_SPLAT(vceq, 0x00ff00ffu); | 2225 CHECK_EQ_SPLAT(vceq, 0x00ff00ffu); |
2193 // [0, 3, 0, 3, ...] >= [3, 3, 3, 3, ...] | 2226 // [0, 3, 0, 3, ...] >= [3, 3, 3, 3, ...] |
2194 CHECK_EQ_SPLAT(vcge_s8, 0x00ff00ffu); | 2227 CHECK_EQ_SPLAT(vcge_s8, 0x00ff00ffu); |
2195 CHECK_EQ_SPLAT(vcgt_s8, 0u); | 2228 CHECK_EQ_SPLAT(vcgt_s8, 0u); |
2196 // [0x00ff, 0x00ff, ...] >= [0xffff, 0xffff, ...] | 2229 // [0x00ff, 0x00ff, ...] >= [0xffff, 0xffff, ...] |
2197 CHECK_EQ_SPLAT(vcge_u16, 0u); | 2230 CHECK_EQ_SPLAT(vcge_u16, 0u); |
2198 CHECK_EQ_SPLAT(vcgt_u16, 0u); | 2231 CHECK_EQ_SPLAT(vcgt_u16, 0u); |
2199 // [0x000000ff, 0x000000ff, ...] >= [0xffffffff, 0xffffffff, ...] | 2232 // [0x000000ff, 0x000000ff, ...] >= [0xffffffff, 0xffffffff, ...] |
2200 CHECK_EQ_SPLAT(vcge_s32, 0xffffffffu); | 2233 CHECK_EQ_SPLAT(vcge_s32, 0xffffffffu); |
2201 CHECK_EQ_SPLAT(vcgt_s32, 0xffffffffu); | 2234 CHECK_EQ_SPLAT(vcgt_s32, 0xffffffffu); |
(...skipping 1631 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3833 __ vswp(d30, d31); | 3866 __ vswp(d30, d31); |
3834 __ vstr(d30, r0, offsetof(T, vswp_d30)); | 3867 __ vstr(d30, r0, offsetof(T, vswp_d30)); |
3835 __ vstr(d31, r0, offsetof(T, vswp_d31)); | 3868 __ vstr(d31, r0, offsetof(T, vswp_d31)); |
3836 } | 3869 } |
3837 | 3870 |
3838 // q-register swap. | 3871 // q-register swap. |
3839 const uint32_t test_1 = 0x01234567; | 3872 const uint32_t test_1 = 0x01234567; |
3840 const uint32_t test_2 = 0x89abcdef; | 3873 const uint32_t test_2 = 0x89abcdef; |
3841 __ mov(r4, Operand(test_1)); | 3874 __ mov(r4, Operand(test_1)); |
3842 __ mov(r5, Operand(test_2)); | 3875 __ mov(r5, Operand(test_2)); |
3843 // TODO(bbudge) replace with vdup when implemented. | 3876 __ vdup(Neon32, q4, r4); |
3844 __ vmov(d8, r4, r4); | 3877 __ vdup(Neon32, q5, r5); |
3845 __ vmov(d9, r4, r4); // q4 = [1.0, 1.0] | |
3846 __ vmov(d10, r5, r5); | |
3847 __ vmov(d11, r5, r5); // q5 = [-1.0, -1.0] | |
3848 __ vswp(q4, q5); | 3878 __ vswp(q4, q5); |
3849 __ add(r6, r0, Operand(static_cast<int32_t>(offsetof(T, vswp_q4)))); | 3879 __ add(r6, r0, Operand(static_cast<int32_t>(offsetof(T, vswp_q4)))); |
3850 __ vst1(Neon8, NeonListOperand(q4), NeonMemOperand(r6)); | 3880 __ vst1(Neon8, NeonListOperand(q4), NeonMemOperand(r6)); |
3851 __ add(r6, r0, Operand(static_cast<int32_t>(offsetof(T, vswp_q5)))); | 3881 __ add(r6, r0, Operand(static_cast<int32_t>(offsetof(T, vswp_q5)))); |
3852 __ vst1(Neon8, NeonListOperand(q5), NeonMemOperand(r6)); | 3882 __ vst1(Neon8, NeonListOperand(q5), NeonMemOperand(r6)); |
3853 | 3883 |
3854 __ ldm(ia_w, sp, r4.bit() | r5.bit() | r6.bit() | r7.bit() | pc.bit()); | 3884 __ ldm(ia_w, sp, r4.bit() | r5.bit() | r6.bit() | r7.bit() | pc.bit()); |
3855 __ bx(lr); | 3885 __ bx(lr); |
3856 | 3886 |
3857 CodeDesc desc; | 3887 CodeDesc desc; |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3932 HandleScope scope(isolate); | 3962 HandleScope scope(isolate); |
3933 | 3963 |
3934 Assembler assm(isolate, NULL, 0); | 3964 Assembler assm(isolate, NULL, 0); |
3935 __ mov(r0, Operand(isolate->factory()->infinity_value())); | 3965 __ mov(r0, Operand(isolate->factory()->infinity_value())); |
3936 __ BlockConstPoolFor(1019); | 3966 __ BlockConstPoolFor(1019); |
3937 for (int i = 0; i < 1019; ++i) __ nop(); | 3967 for (int i = 0; i < 1019; ++i) __ nop(); |
3938 __ vldr(d0, MemOperand(r0, 0)); | 3968 __ vldr(d0, MemOperand(r0, 0)); |
3939 } | 3969 } |
3940 | 3970 |
3941 #undef __ | 3971 #undef __ |
OLD | NEW |