Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(95)

Side by Side Diff: simd/jsimd_arm_neon.S

Issue 1270213002: Add support for decoding to 565 to libjpeg_turbo (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libjpeg_turbo.git@master
Patch Set: Link crbug in the README Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « simd/jsimd_arm.c ('k') | simd/jsimd_i386.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * ARMv7 NEON optimizations for libjpeg-turbo 2 * ARMv7 NEON optimizations for libjpeg-turbo
3 * 3 *
4 * Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies). 4 * Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies).
5 * All rights reserved. 5 * All rights reserved.
6 * Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> 6 * Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
7 * Copyright (C) 2014 Linaro Limited. All Rights Reserved.
7 * 8 *
8 * This software is provided 'as-is', without any express or implied 9 * This software is provided 'as-is', without any express or implied
9 * warranty. In no event will the authors be held liable for any damages 10 * warranty. In no event will the authors be held liable for any damages
10 * arising from the use of this software. 11 * arising from the use of this software.
11 * 12 *
12 * Permission is granted to anyone to use this software for any purpose, 13 * Permission is granted to anyone to use this software for any purpose,
13 * including commercial applications, and to alter it and redistribute it 14 * including commercial applications, and to alter it and redistribute it
14 * freely, subject to the following restrictions: 15 * freely, subject to the following restrictions:
15 * 16 *
16 * 1. The origin of this software must not be misrepresented; you must not 17 * 1. The origin of this software must not be misrepresented; you must not
(...skipping 1322 matching lines...) Expand 10 before | Expand all | Expand 10 after
1339 vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [RGB]! 1340 vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [RGB]!
1340 vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [RGB]! 1341 vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [RGB]!
1341 .elseif \size == 2 1342 .elseif \size == 2
1342 vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [RGB]! 1343 vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [RGB]!
1343 vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [RGB]! 1344 vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [RGB]!
1344 .elseif \size == 1 1345 .elseif \size == 1
1345 vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [RGB]! 1346 vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [RGB]!
1346 .else 1347 .else
1347 .error unsupported macroblock size 1348 .error unsupported macroblock size
1348 .endif 1349 .endif
1350 .elseif \bpp == 16
1351 .if \size == 8
1352 vst1.16 {q15}, [RGB]!
1353 .elseif \size == 4
1354 vst1.16 {d30}, [RGB]!
1355 .elseif \size == 2
1356 vst1.16 {d31[0]}, [RGB]!
1357 vst1.16 {d31[1]}, [RGB]!
1358 .elseif \size == 1
1359 vst1.16 {d31[2]}, [RGB]!
1360 .else
1361 .error unsupported macroblock size
1362 .endif
1349 .else 1363 .else
1350 .error unsupported bpp 1364 .error unsupported bpp
1351 .endif 1365 .endif
1352 .endm 1366 .endm
1353 1367
1354 .macro generate_jsimd_ycc_rgb_convert_neon colorid, bpp, r_offs, g_offs, b_offs 1368 .macro generate_jsimd_ycc_rgb_convert_neon colorid, bpp, r_offs, g_offs, b_offs
1355 1369
1356 /* 1370 /*
1357 * 2 stage pipelined YCbCr->RGB conversion 1371 * 2 stage pipelined YCbCr->RGB conversion
1358 */ 1372 */
(...skipping 11 matching lines...) Expand all
1370 vmull.s16 q15, d7, d1[3] /* multiply by 29033 */ 1384 vmull.s16 q15, d7, d1[3] /* multiply by 29033 */
1371 .endm 1385 .endm
1372 1386
1373 .macro do_yuv_to_rgb_stage2 1387 .macro do_yuv_to_rgb_stage2
1374 vrshrn.s32 d20, q10, #15 1388 vrshrn.s32 d20, q10, #15
1375 vrshrn.s32 d21, q11, #15 1389 vrshrn.s32 d21, q11, #15
1376 vrshrn.s32 d24, q12, #14 1390 vrshrn.s32 d24, q12, #14
1377 vrshrn.s32 d25, q13, #14 1391 vrshrn.s32 d25, q13, #14
1378 vrshrn.s32 d28, q14, #14 1392 vrshrn.s32 d28, q14, #14
1379 vrshrn.s32 d29, q15, #14 1393 vrshrn.s32 d29, q15, #14
1380 vaddw.u8 q10, q10, d0 1394 vaddw.u8 q11, q10, d0
1381 vaddw.u8 q12, q12, d0 1395 vaddw.u8 q12, q12, d0
1382 vaddw.u8 q14, q14, d0 1396 vaddw.u8 q14, q14, d0
1383 vqmovun.s16 d1\g_offs, q10 1397 .if \bpp != 16
1398 vqmovun.s16 d1\g_offs, q11
1384 vqmovun.s16 d1\r_offs, q12 1399 vqmovun.s16 d1\r_offs, q12
1385 vqmovun.s16 d1\b_offs, q14 1400 vqmovun.s16 d1\b_offs, q14
1401 .else /* rgb565 */
1402 vqshlu.s16 q13, q11, #8
1403 vqshlu.s16 q15, q12, #8
1404 vqshlu.s16 q14, q14, #8
1405 vsri.u16 q15, q13, #5
1406 vsri.u16 q15, q14, #11
1407 .endif
1386 .endm 1408 .endm
1387 1409
1388 .macro do_yuv_to_rgb_stage2_store_load_stage1 1410 .macro do_yuv_to_rgb_stage2_store_load_stage1
1389 vld1.8 {d4}, [U, :64]! 1411 /* "do_yuv_to_rgb_stage2" and "store" */
1390 vrshrn.s32 d20, q10, #15 1412 vrshrn.s32 d20, q10, #15
1413 /* "load" and "do_yuv_to_rgb_stage1" */
1414 pld [U, #64]
1391 vrshrn.s32 d21, q11, #15 1415 vrshrn.s32 d21, q11, #15
1416 pld [V, #64]
1392 vrshrn.s32 d24, q12, #14 1417 vrshrn.s32 d24, q12, #14
1393 vrshrn.s32 d25, q13, #14 1418 vrshrn.s32 d25, q13, #14
1419 vld1.8 {d4}, [U, :64]!
1394 vrshrn.s32 d28, q14, #14 1420 vrshrn.s32 d28, q14, #14
1395 vld1.8 {d5}, [V, :64]! 1421 vld1.8 {d5}, [V, :64]!
1396 vrshrn.s32 d29, q15, #14 1422 vrshrn.s32 d29, q15, #14
1397 vaddw.u8 q10, q10, d0 1423 vaddw.u8 q3, q1, d4 /* q3 = u - 128 */
1424 vaddw.u8 q4, q1, d5 /* q2 = v - 128 */
1425 vaddw.u8 q11, q10, d0
1426 vmull.s16 q10, d6, d1[1] /* multiply by -11277 */
1427 vmlal.s16 q10, d8, d1[2] /* multiply by -23401 */
1398 vaddw.u8 q12, q12, d0 1428 vaddw.u8 q12, q12, d0
1399 vaddw.u8 q14, q14, d0 1429 vaddw.u8 q14, q14, d0
1400 vqmovun.s16 d1\g_offs, q10 1430 .if \bpp != 16 /**************** rgb24/rgb32 *********************************/
1431 vqmovun.s16 d1\g_offs, q11
1432 pld [Y, #64]
1433 vqmovun.s16 d1\r_offs, q12
1401 vld1.8 {d0}, [Y, :64]! 1434 vld1.8 {d0}, [Y, :64]!
1402 vqmovun.s16 d1\r_offs, q12
1403 pld [U, #64]
1404 pld [V, #64]
1405 pld [Y, #64]
1406 vqmovun.s16 d1\b_offs, q14 1435 vqmovun.s16 d1\b_offs, q14
1407 vaddw.u8 q3, q1, d4 /* q3 = u - 128 */
1408 vaddw.u8 q4, q1, d5 /* q2 = v - 128 */
1409 do_store \bpp, 8
1410 vmull.s16 q10, d6, d1[1] /* multiply by -11277 */
1411 vmlal.s16 q10, d8, d1[2] /* multiply by -23401 */
1412 vmull.s16 q11, d7, d1[1] /* multiply by -11277 */ 1436 vmull.s16 q11, d7, d1[1] /* multiply by -11277 */
1413 vmlal.s16 q11, d9, d1[2] /* multiply by -23401 */ 1437 vmlal.s16 q11, d9, d1[2] /* multiply by -23401 */
1438 do_store \bpp, 8
1414 vmull.s16 q12, d8, d1[0] /* multiply by 22971 */ 1439 vmull.s16 q12, d8, d1[0] /* multiply by 22971 */
1415 vmull.s16 q13, d9, d1[0] /* multiply by 22971 */ 1440 vmull.s16 q13, d9, d1[0] /* multiply by 22971 */
1416 vmull.s16 q14, d6, d1[3] /* multiply by 29033 */ 1441 vmull.s16 q14, d6, d1[3] /* multiply by 29033 */
1417 vmull.s16 q15, d7, d1[3] /* multiply by 29033 */ 1442 vmull.s16 q15, d7, d1[3] /* multiply by 29033 */
1443 .else /**************************** rgb565 ***********************************/
1444 vqshlu.s16 q13, q11, #8
1445 pld [Y, #64]
1446 vqshlu.s16 q15, q12, #8
1447 vqshlu.s16 q14, q14, #8
1448 vld1.8 {d0}, [Y, :64]!
1449 vmull.s16 q11, d7, d1[1]
1450 vmlal.s16 q11, d9, d1[2]
1451 vsri.u16 q15, q13, #5
1452 vmull.s16 q12, d8, d1[0]
1453 vsri.u16 q15, q14, #11
1454 vmull.s16 q13, d9, d1[0]
1455 vmull.s16 q14, d6, d1[3]
1456 do_store \bpp, 8
1457 vmull.s16 q15, d7, d1[3]
1458 .endif
1418 .endm 1459 .endm
1419 1460
1420 .macro do_yuv_to_rgb 1461 .macro do_yuv_to_rgb
1421 do_yuv_to_rgb_stage1 1462 do_yuv_to_rgb_stage1
1422 do_yuv_to_rgb_stage2 1463 do_yuv_to_rgb_stage2
1423 .endm 1464 .endm
1424 1465
1425 /* Apple gas crashes on adrl, work around that by using adr. 1466 /* Apple gas crashes on adrl, work around that by using adr.
1426 * But this requires a copy of these constants for each function. 1467 * But this requires a copy of these constants for each function.
1427 */ 1468 */
(...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after
1549 1590
1550 .endm 1591 .endm
1551 1592
1552 /*--------------------------------- id ----- bpp R G B */ 1593 /*--------------------------------- id ----- bpp R G B */
1553 generate_jsimd_ycc_rgb_convert_neon extrgb, 24, 0, 1, 2 1594 generate_jsimd_ycc_rgb_convert_neon extrgb, 24, 0, 1, 2
1554 generate_jsimd_ycc_rgb_convert_neon extbgr, 24, 2, 1, 0 1595 generate_jsimd_ycc_rgb_convert_neon extbgr, 24, 2, 1, 0
1555 generate_jsimd_ycc_rgb_convert_neon extrgbx, 32, 0, 1, 2 1596 generate_jsimd_ycc_rgb_convert_neon extrgbx, 32, 0, 1, 2
1556 generate_jsimd_ycc_rgb_convert_neon extbgrx, 32, 2, 1, 0 1597 generate_jsimd_ycc_rgb_convert_neon extbgrx, 32, 2, 1, 0
1557 generate_jsimd_ycc_rgb_convert_neon extxbgr, 32, 3, 2, 1 1598 generate_jsimd_ycc_rgb_convert_neon extxbgr, 32, 3, 2, 1
1558 generate_jsimd_ycc_rgb_convert_neon extxrgb, 32, 1, 2, 3 1599 generate_jsimd_ycc_rgb_convert_neon extxrgb, 32, 1, 2, 3
1600 generate_jsimd_ycc_rgb_convert_neon rgb565, 16, 0, 0, 0
1559 1601
1560 .purgem do_load 1602 .purgem do_load
1561 .purgem do_store 1603 .purgem do_store
1562 1604
1563 1605
1564 /*****************************************************************************/ 1606 /*****************************************************************************/
1565 1607
1566 /* 1608 /*
1567 * jsimd_extrgb_ycc_convert_neon 1609 * jsimd_extrgb_ycc_convert_neon
1568 * jsimd_extbgr_ycc_convert_neon 1610 * jsimd_extbgr_ycc_convert_neon
(...skipping 818 matching lines...) Expand 10 before | Expand all | Expand 10 after
2387 2429
2388 .unreq OUTPTR 2430 .unreq OUTPTR
2389 .unreq INPTR 2431 .unreq INPTR
2390 .unreq WIDTH 2432 .unreq WIDTH
2391 .unreq TMP 2433 .unreq TMP
2392 2434
2393 2435
2394 .purgem upsample16 2436 .purgem upsample16
2395 .purgem upsample32 2437 .purgem upsample32
2396 .purgem upsample_row 2438 .purgem upsample_row
OLDNEW
« no previous file with comments | « simd/jsimd_arm.c ('k') | simd/jsimd_i386.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698