Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1141)

Side by Side Diff: source/libvpx/vp9/common/arm/neon/vp9_idct32x32_1_add_neon.c

Issue 812033011: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 /*
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <arm_neon.h>
12 #include "vp9/common/vp9_idct.h"
13
14 #include "./vpx_config.h"
15
16 static INLINE void LD_16x8(
17 uint8_t *d,
18 int d_stride,
19 uint8x16_t *q8u8,
20 uint8x16_t *q9u8,
21 uint8x16_t *q10u8,
22 uint8x16_t *q11u8,
23 uint8x16_t *q12u8,
24 uint8x16_t *q13u8,
25 uint8x16_t *q14u8,
26 uint8x16_t *q15u8) {
27 *q8u8 = vld1q_u8(d);
28 d += d_stride;
29 *q9u8 = vld1q_u8(d);
30 d += d_stride;
31 *q10u8 = vld1q_u8(d);
32 d += d_stride;
33 *q11u8 = vld1q_u8(d);
34 d += d_stride;
35 *q12u8 = vld1q_u8(d);
36 d += d_stride;
37 *q13u8 = vld1q_u8(d);
38 d += d_stride;
39 *q14u8 = vld1q_u8(d);
40 d += d_stride;
41 *q15u8 = vld1q_u8(d);
42 return;
43 }
44
45 static INLINE void ADD_DIFF_16x8(
46 uint8x16_t qdiffu8,
47 uint8x16_t *q8u8,
48 uint8x16_t *q9u8,
49 uint8x16_t *q10u8,
50 uint8x16_t *q11u8,
51 uint8x16_t *q12u8,
52 uint8x16_t *q13u8,
53 uint8x16_t *q14u8,
54 uint8x16_t *q15u8) {
55 *q8u8 = vqaddq_u8(*q8u8, qdiffu8);
56 *q9u8 = vqaddq_u8(*q9u8, qdiffu8);
57 *q10u8 = vqaddq_u8(*q10u8, qdiffu8);
58 *q11u8 = vqaddq_u8(*q11u8, qdiffu8);
59 *q12u8 = vqaddq_u8(*q12u8, qdiffu8);
60 *q13u8 = vqaddq_u8(*q13u8, qdiffu8);
61 *q14u8 = vqaddq_u8(*q14u8, qdiffu8);
62 *q15u8 = vqaddq_u8(*q15u8, qdiffu8);
63 return;
64 }
65
66 static INLINE void SUB_DIFF_16x8(
67 uint8x16_t qdiffu8,
68 uint8x16_t *q8u8,
69 uint8x16_t *q9u8,
70 uint8x16_t *q10u8,
71 uint8x16_t *q11u8,
72 uint8x16_t *q12u8,
73 uint8x16_t *q13u8,
74 uint8x16_t *q14u8,
75 uint8x16_t *q15u8) {
76 *q8u8 = vqsubq_u8(*q8u8, qdiffu8);
77 *q9u8 = vqsubq_u8(*q9u8, qdiffu8);
78 *q10u8 = vqsubq_u8(*q10u8, qdiffu8);
79 *q11u8 = vqsubq_u8(*q11u8, qdiffu8);
80 *q12u8 = vqsubq_u8(*q12u8, qdiffu8);
81 *q13u8 = vqsubq_u8(*q13u8, qdiffu8);
82 *q14u8 = vqsubq_u8(*q14u8, qdiffu8);
83 *q15u8 = vqsubq_u8(*q15u8, qdiffu8);
84 return;
85 }
86
87 static INLINE void ST_16x8(
88 uint8_t *d,
89 int d_stride,
90 uint8x16_t *q8u8,
91 uint8x16_t *q9u8,
92 uint8x16_t *q10u8,
93 uint8x16_t *q11u8,
94 uint8x16_t *q12u8,
95 uint8x16_t *q13u8,
96 uint8x16_t *q14u8,
97 uint8x16_t *q15u8) {
98 vst1q_u8(d, *q8u8);
99 d += d_stride;
100 vst1q_u8(d, *q9u8);
101 d += d_stride;
102 vst1q_u8(d, *q10u8);
103 d += d_stride;
104 vst1q_u8(d, *q11u8);
105 d += d_stride;
106 vst1q_u8(d, *q12u8);
107 d += d_stride;
108 vst1q_u8(d, *q13u8);
109 d += d_stride;
110 vst1q_u8(d, *q14u8);
111 d += d_stride;
112 vst1q_u8(d, *q15u8);
113 return;
114 }
115
116 void vp9_idct32x32_1_add_neon(
117 int16_t *input,
118 uint8_t *dest,
119 int dest_stride) {
120 uint8x16_t q0u8, q8u8, q9u8, q10u8, q11u8, q12u8, q13u8, q14u8, q15u8;
121 int i, j, dest_stride8;
122 uint8_t *d;
123 int16_t a1, cospi_16_64 = 11585;
124 int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
125
126 out = dct_const_round_shift(out * cospi_16_64);
127 a1 = ROUND_POWER_OF_TWO(out, 6);
128
129 dest_stride8 = dest_stride * 8;
130 if (a1 >= 0) { // diff_positive_32_32
131 a1 = a1 < 0 ? 0 : a1 > 255 ? 255 : a1;
132 q0u8 = vdupq_n_u8(a1);
133 for (i = 0; i < 2; i++, dest += 16) { // diff_positive_32_32_loop
134 d = dest;
135 for (j = 0; j < 4; j++) {
136 LD_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8,
137 &q12u8, &q13u8, &q14u8, &q15u8);
138 ADD_DIFF_16x8(q0u8, &q8u8, &q9u8, &q10u8, &q11u8,
139 &q12u8, &q13u8, &q14u8, &q15u8);
140 ST_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8,
141 &q12u8, &q13u8, &q14u8, &q15u8);
142 d += dest_stride8;
143 }
144 }
145 } else { // diff_negative_32_32
146 a1 = -a1;
147 a1 = a1 < 0 ? 0 : a1 > 255 ? 255 : a1;
148 q0u8 = vdupq_n_u8(a1);
149 for (i = 0; i < 2; i++, dest += 16) { // diff_negative_32_32_loop
150 d = dest;
151 for (j = 0; j < 4; j++) {
152 LD_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8,
153 &q12u8, &q13u8, &q14u8, &q15u8);
154 SUB_DIFF_16x8(q0u8, &q8u8, &q9u8, &q10u8, &q11u8,
155 &q12u8, &q13u8, &q14u8, &q15u8);
156 ST_16x8(d, dest_stride, &q8u8, &q9u8, &q10u8, &q11u8,
157 &q12u8, &q13u8, &q14u8, &q15u8);
158 d += dest_stride8;
159 }
160 }
161 }
162 return;
163 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698