Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(211)

Side by Side Diff: source/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c

Issue 668403002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include <arm_neon.h> 11 #include <arm_neon.h>
12 #include "./vpx_config.h" 12 #include "./vpx_config.h"
13 #include "vpx_ports/arm.h"
13 14
14 #if (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) 15 #ifdef VPX_INCOMPATIBLE_GCC
15 static INLINE void write_2x8(unsigned char *dst, int pitch,
16 const uint8x8x2_t result,
17 const uint8x8x2_t result2) {
18 vst2_lane_u8(dst, result, 0);
19 dst += pitch;
20 vst2_lane_u8(dst, result, 1);
21 dst += pitch;
22 vst2_lane_u8(dst, result, 2);
23 dst += pitch;
24 vst2_lane_u8(dst, result, 3);
25 dst += pitch;
26 vst2_lane_u8(dst, result, 4);
27 dst += pitch;
28 vst2_lane_u8(dst, result, 5);
29 dst += pitch;
30 vst2_lane_u8(dst, result, 6);
31 dst += pitch;
32 vst2_lane_u8(dst, result, 7);
33 dst += pitch;
34
35 vst2_lane_u8(dst, result2, 0);
36 dst += pitch;
37 vst2_lane_u8(dst, result2, 1);
38 dst += pitch;
39 vst2_lane_u8(dst, result2, 2);
40 dst += pitch;
41 vst2_lane_u8(dst, result2, 3);
42 dst += pitch;
43 vst2_lane_u8(dst, result2, 4);
44 dst += pitch;
45 vst2_lane_u8(dst, result2, 5);
46 dst += pitch;
47 vst2_lane_u8(dst, result2, 6);
48 dst += pitch;
49 vst2_lane_u8(dst, result2, 7);
50 }
51 #else
52 static INLINE void write_2x4(unsigned char *dst, int pitch, 16 static INLINE void write_2x4(unsigned char *dst, int pitch,
53 const uint8x8x2_t result) { 17 const uint8x8x2_t result) {
54 /* 18 /*
55 * uint8x8x2_t result 19 * uint8x8x2_t result
56 00 01 02 03 | 04 05 06 07 20 00 01 02 03 | 04 05 06 07
57 10 11 12 13 | 14 15 16 17 21 10 11 12 13 | 14 15 16 17
58 --- 22 ---
59 * after vtrn_u8 23 * after vtrn_u8
60 00 10 02 12 | 04 14 06 16 24 00 10 02 12 | 04 14 06 16
61 01 11 03 13 | 05 15 07 17 25 01 11 03 13 | 05 15 07 17
(...skipping 19 matching lines...) Expand all
81 vst1_lane_u16((uint16_t *)dst, x_1_5, 3); 45 vst1_lane_u16((uint16_t *)dst, x_1_5, 3);
82 } 46 }
83 47
84 static INLINE void write_2x8(unsigned char *dst, int pitch, 48 static INLINE void write_2x8(unsigned char *dst, int pitch,
85 const uint8x8x2_t result, 49 const uint8x8x2_t result,
86 const uint8x8x2_t result2) { 50 const uint8x8x2_t result2) {
87 write_2x4(dst, pitch, result); 51 write_2x4(dst, pitch, result);
88 dst += pitch * 8; 52 dst += pitch * 8;
89 write_2x4(dst, pitch, result2); 53 write_2x4(dst, pitch, result2);
90 } 54 }
91 #endif 55 #else
56 static INLINE void write_2x8(unsigned char *dst, int pitch,
57 const uint8x8x2_t result,
58 const uint8x8x2_t result2) {
59 vst2_lane_u8(dst, result, 0);
60 dst += pitch;
61 vst2_lane_u8(dst, result, 1);
62 dst += pitch;
63 vst2_lane_u8(dst, result, 2);
64 dst += pitch;
65 vst2_lane_u8(dst, result, 3);
66 dst += pitch;
67 vst2_lane_u8(dst, result, 4);
68 dst += pitch;
69 vst2_lane_u8(dst, result, 5);
70 dst += pitch;
71 vst2_lane_u8(dst, result, 6);
72 dst += pitch;
73 vst2_lane_u8(dst, result, 7);
74 dst += pitch;
75
76 vst2_lane_u8(dst, result2, 0);
77 dst += pitch;
78 vst2_lane_u8(dst, result2, 1);
79 dst += pitch;
80 vst2_lane_u8(dst, result2, 2);
81 dst += pitch;
82 vst2_lane_u8(dst, result2, 3);
83 dst += pitch;
84 vst2_lane_u8(dst, result2, 4);
85 dst += pitch;
86 vst2_lane_u8(dst, result2, 5);
87 dst += pitch;
88 vst2_lane_u8(dst, result2, 6);
89 dst += pitch;
90 vst2_lane_u8(dst, result2, 7);
91 }
92 #endif // VPX_INCOMPATIBLE_GCC
92 93
93 94
94 #if (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) 95 #ifdef VPX_INCOMPATIBLE_GCC
95 static INLINE
96 uint8x8x4_t read_4x8(unsigned char *src, int pitch, uint8x8x4_t x) {
97 x = vld4_lane_u8(src, x, 0);
98 src += pitch;
99 x = vld4_lane_u8(src, x, 1);
100 src += pitch;
101 x = vld4_lane_u8(src, x, 2);
102 src += pitch;
103 x = vld4_lane_u8(src, x, 3);
104 src += pitch;
105 x = vld4_lane_u8(src, x, 4);
106 src += pitch;
107 x = vld4_lane_u8(src, x, 5);
108 src += pitch;
109 x = vld4_lane_u8(src, x, 6);
110 src += pitch;
111 x = vld4_lane_u8(src, x, 7);
112 return x;
113 }
114 #else
115 static INLINE 96 static INLINE
116 uint8x8x4_t read_4x8(unsigned char *src, int pitch, uint8x8x4_t x) { 97 uint8x8x4_t read_4x8(unsigned char *src, int pitch, uint8x8x4_t x) {
117 const uint8x8_t a = vld1_u8(src); 98 const uint8x8_t a = vld1_u8(src);
118 const uint8x8_t b = vld1_u8(src + pitch * 1); 99 const uint8x8_t b = vld1_u8(src + pitch * 1);
119 const uint8x8_t c = vld1_u8(src + pitch * 2); 100 const uint8x8_t c = vld1_u8(src + pitch * 2);
120 const uint8x8_t d = vld1_u8(src + pitch * 3); 101 const uint8x8_t d = vld1_u8(src + pitch * 3);
121 const uint8x8_t e = vld1_u8(src + pitch * 4); 102 const uint8x8_t e = vld1_u8(src + pitch * 4);
122 const uint8x8_t f = vld1_u8(src + pitch * 5); 103 const uint8x8_t f = vld1_u8(src + pitch * 5);
123 const uint8x8_t g = vld1_u8(src + pitch * 6); 104 const uint8x8_t g = vld1_u8(src + pitch * 6);
124 const uint8x8_t h = vld1_u8(src + pitch * 7); 105 const uint8x8_t h = vld1_u8(src + pitch * 7);
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
162 02 12 22 32 | 42 52 62 72 143 02 12 22 32 | 42 52 62 72
163 03 13 23 33 | 43 53 63 73 144 03 13 23 33 | 43 53 63 73
164 */ 145 */
165 x.val[0] = r01_u8.val[0]; 146 x.val[0] = r01_u8.val[0];
166 x.val[1] = r01_u8.val[1]; 147 x.val[1] = r01_u8.val[1];
167 x.val[2] = r23_u8.val[0]; 148 x.val[2] = r23_u8.val[0];
168 x.val[3] = r23_u8.val[1]; 149 x.val[3] = r23_u8.val[1];
169 150
170 return x; 151 return x;
171 } 152 }
172 #endif 153 #else
154 static INLINE
155 uint8x8x4_t read_4x8(unsigned char *src, int pitch, uint8x8x4_t x) {
156 x = vld4_lane_u8(src, x, 0);
157 src += pitch;
158 x = vld4_lane_u8(src, x, 1);
159 src += pitch;
160 x = vld4_lane_u8(src, x, 2);
161 src += pitch;
162 x = vld4_lane_u8(src, x, 3);
163 src += pitch;
164 x = vld4_lane_u8(src, x, 4);
165 src += pitch;
166 x = vld4_lane_u8(src, x, 5);
167 src += pitch;
168 x = vld4_lane_u8(src, x, 6);
169 src += pitch;
170 x = vld4_lane_u8(src, x, 7);
171 return x;
172 }
173 #endif // VPX_INCOMPATIBLE_GCC
173 174
174 static INLINE void vp8_loop_filter_simple_vertical_edge_neon( 175 static INLINE void vp8_loop_filter_simple_vertical_edge_neon(
175 unsigned char *s, 176 unsigned char *s,
176 int p, 177 int p,
177 const unsigned char *blimit) { 178 const unsigned char *blimit) {
178 unsigned char *src1; 179 unsigned char *src1;
179 uint8x16_t qblimit, q0u8; 180 uint8x16_t qblimit, q0u8;
180 uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q11u8, q12u8, q14u8, q15u8; 181 uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q11u8, q12u8, q14u8, q15u8;
181 int16x8_t q2s16, q13s16, q11s16; 182 int16x8_t q2s16, q13s16, q11s16;
182 int8x8_t d28s8, d29s8; 183 int8x8_t d28s8, d29s8;
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
270 return; 271 return;
271 } 272 }
272 273
273 void vp8_loop_filter_mbvs_neon( 274 void vp8_loop_filter_mbvs_neon(
274 unsigned char *y_ptr, 275 unsigned char *y_ptr,
275 int y_stride, 276 int y_stride,
276 const unsigned char *blimit) { 277 const unsigned char *blimit) {
277 vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit); 278 vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
278 return; 279 return;
279 } 280 }
OLDNEW
« no previous file with comments | « source/libvpx/vp8/common/arm/neon/loopfilter_neon.c ('k') | source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698