Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Unified Diff: simd/jsimd_arm_neon.S

Issue 1270213002: Add support for decoding to 565 to libjpeg_turbo (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libjpeg_turbo.git@master
Patch Set: Link crbug in the README Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « simd/jsimd_arm.c ('k') | simd/jsimd_i386.c » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: simd/jsimd_arm_neon.S
diff --git a/simd/jsimd_arm_neon.S b/simd/jsimd_arm_neon.S
index 44c61fdd5f30183698359e1ed9ac2d87b8ca9b06..7e8e134ce672f639f72b7c96a82c8aa3aaf15bd4 100644
--- a/simd/jsimd_arm_neon.S
+++ b/simd/jsimd_arm_neon.S
@@ -4,6 +4,7 @@
* Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies).
* All rights reserved.
* Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+ * Copyright (C) 2014 Linaro Limited. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
@@ -1346,6 +1347,19 @@ asm_function jsimd_idct_2x2_neon
.else
.error unsupported macroblock size
.endif
+ .elseif \bpp == 16
+ .if \size == 8
+ vst1.16 {q15}, [RGB]!
+ .elseif \size == 4
+ vst1.16 {d30}, [RGB]!
+ .elseif \size == 2
+ vst1.16 {d31[0]}, [RGB]!
+ vst1.16 {d31[1]}, [RGB]!
+ .elseif \size == 1
+ vst1.16 {d31[2]}, [RGB]!
+ .else
+ .error unsupported macroblock size
+ .endif
.else
.error unsupported bpp
.endif
@@ -1377,44 +1391,71 @@ asm_function jsimd_idct_2x2_neon
vrshrn.s32 d25, q13, #14
vrshrn.s32 d28, q14, #14
vrshrn.s32 d29, q15, #14
- vaddw.u8 q10, q10, d0
+ vaddw.u8 q11, q10, d0
vaddw.u8 q12, q12, d0
vaddw.u8 q14, q14, d0
- vqmovun.s16 d1\g_offs, q10
+.if \bpp != 16
+ vqmovun.s16 d1\g_offs, q11
vqmovun.s16 d1\r_offs, q12
vqmovun.s16 d1\b_offs, q14
+.else /* rgb565 */
+ vqshlu.s16 q13, q11, #8
+ vqshlu.s16 q15, q12, #8
+ vqshlu.s16 q14, q14, #8
+ vsri.u16 q15, q13, #5
+ vsri.u16 q15, q14, #11
+.endif
.endm
.macro do_yuv_to_rgb_stage2_store_load_stage1
- vld1.8 {d4}, [U, :64]!
+ /* "do_yuv_to_rgb_stage2" and "store" */
vrshrn.s32 d20, q10, #15
+ /* "load" and "do_yuv_to_rgb_stage1" */
+ pld [U, #64]
vrshrn.s32 d21, q11, #15
+ pld [V, #64]
vrshrn.s32 d24, q12, #14
vrshrn.s32 d25, q13, #14
+ vld1.8 {d4}, [U, :64]!
vrshrn.s32 d28, q14, #14
vld1.8 {d5}, [V, :64]!
vrshrn.s32 d29, q15, #14
- vaddw.u8 q10, q10, d0
- vaddw.u8 q12, q12, d0
- vaddw.u8 q14, q14, d0
- vqmovun.s16 d1\g_offs, q10
- vld1.8 {d0}, [Y, :64]!
- vqmovun.s16 d1\r_offs, q12
- pld [U, #64]
- pld [V, #64]
- pld [Y, #64]
- vqmovun.s16 d1\b_offs, q14
vaddw.u8 q3, q1, d4 /* q3 = u - 128 */
vaddw.u8 q4, q1, d5 /* q2 = v - 128 */
- do_store \bpp, 8
+ vaddw.u8 q11, q10, d0
vmull.s16 q10, d6, d1[1] /* multiply by -11277 */
vmlal.s16 q10, d8, d1[2] /* multiply by -23401 */
+ vaddw.u8 q12, q12, d0
+ vaddw.u8 q14, q14, d0
+.if \bpp != 16 /**************** rgb24/rgb32 *********************************/
+ vqmovun.s16 d1\g_offs, q11
+ pld [Y, #64]
+ vqmovun.s16 d1\r_offs, q12
+ vld1.8 {d0}, [Y, :64]!
+ vqmovun.s16 d1\b_offs, q14
vmull.s16 q11, d7, d1[1] /* multiply by -11277 */
vmlal.s16 q11, d9, d1[2] /* multiply by -23401 */
+ do_store \bpp, 8
vmull.s16 q12, d8, d1[0] /* multiply by 22971 */
vmull.s16 q13, d9, d1[0] /* multiply by 22971 */
vmull.s16 q14, d6, d1[3] /* multiply by 29033 */
vmull.s16 q15, d7, d1[3] /* multiply by 29033 */
+.else /**************************** rgb565 ***********************************/
+ vqshlu.s16 q13, q11, #8
+ pld [Y, #64]
+ vqshlu.s16 q15, q12, #8
+ vqshlu.s16 q14, q14, #8
+ vld1.8 {d0}, [Y, :64]!
+ vmull.s16 q11, d7, d1[1]
+ vmlal.s16 q11, d9, d1[2]
+ vsri.u16 q15, q13, #5
+ vmull.s16 q12, d8, d1[0]
+ vsri.u16 q15, q14, #11
+ vmull.s16 q13, d9, d1[0]
+ vmull.s16 q14, d6, d1[3]
+ do_store \bpp, 8
+ vmull.s16 q15, d7, d1[3]
+.endif
.endm
.macro do_yuv_to_rgb
@@ -1556,6 +1597,7 @@ generate_jsimd_ycc_rgb_convert_neon extrgbx, 32, 0, 1, 2
generate_jsimd_ycc_rgb_convert_neon extbgrx, 32, 2, 1, 0
generate_jsimd_ycc_rgb_convert_neon extxbgr, 32, 3, 2, 1
generate_jsimd_ycc_rgb_convert_neon extxrgb, 32, 1, 2, 3
+generate_jsimd_ycc_rgb_convert_neon rgb565, 16, 0, 0, 0
.purgem do_load
.purgem do_store
« no previous file with comments | « simd/jsimd_arm.c ('k') | simd/jsimd_i386.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698