| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include <arm_neon.h> | 11 #include <arm_neon.h> |
| 12 #include "./vpx_config.h" | 12 #include "./vpx_config.h" |
| 13 #include "vpx_ports/arm.h" |
| 13 | 14 |
| 14 #if (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) | 15 #ifdef VPX_INCOMPATIBLE_GCC |
| 15 static INLINE void write_2x8(unsigned char *dst, int pitch, | |
| 16 const uint8x8x2_t result, | |
| 17 const uint8x8x2_t result2) { | |
| 18 vst2_lane_u8(dst, result, 0); | |
| 19 dst += pitch; | |
| 20 vst2_lane_u8(dst, result, 1); | |
| 21 dst += pitch; | |
| 22 vst2_lane_u8(dst, result, 2); | |
| 23 dst += pitch; | |
| 24 vst2_lane_u8(dst, result, 3); | |
| 25 dst += pitch; | |
| 26 vst2_lane_u8(dst, result, 4); | |
| 27 dst += pitch; | |
| 28 vst2_lane_u8(dst, result, 5); | |
| 29 dst += pitch; | |
| 30 vst2_lane_u8(dst, result, 6); | |
| 31 dst += pitch; | |
| 32 vst2_lane_u8(dst, result, 7); | |
| 33 dst += pitch; | |
| 34 | |
| 35 vst2_lane_u8(dst, result2, 0); | |
| 36 dst += pitch; | |
| 37 vst2_lane_u8(dst, result2, 1); | |
| 38 dst += pitch; | |
| 39 vst2_lane_u8(dst, result2, 2); | |
| 40 dst += pitch; | |
| 41 vst2_lane_u8(dst, result2, 3); | |
| 42 dst += pitch; | |
| 43 vst2_lane_u8(dst, result2, 4); | |
| 44 dst += pitch; | |
| 45 vst2_lane_u8(dst, result2, 5); | |
| 46 dst += pitch; | |
| 47 vst2_lane_u8(dst, result2, 6); | |
| 48 dst += pitch; | |
| 49 vst2_lane_u8(dst, result2, 7); | |
| 50 } | |
| 51 #else | |
| 52 static INLINE void write_2x4(unsigned char *dst, int pitch, | 16 static INLINE void write_2x4(unsigned char *dst, int pitch, |
| 53 const uint8x8x2_t result) { | 17 const uint8x8x2_t result) { |
| 54 /* | 18 /* |
| 55 * uint8x8x2_t result | 19 * uint8x8x2_t result |
| 56 00 01 02 03 | 04 05 06 07 | 20 00 01 02 03 | 04 05 06 07 |
| 57 10 11 12 13 | 14 15 16 17 | 21 10 11 12 13 | 14 15 16 17 |
| 58 --- | 22 --- |
| 59 * after vtrn_u8 | 23 * after vtrn_u8 |
| 60 00 10 02 12 | 04 14 06 16 | 24 00 10 02 12 | 04 14 06 16 |
| 61 01 11 03 13 | 05 15 07 17 | 25 01 11 03 13 | 05 15 07 17 |
| (...skipping 19 matching lines...) Expand all Loading... |
| 81 vst1_lane_u16((uint16_t *)dst, x_1_5, 3); | 45 vst1_lane_u16((uint16_t *)dst, x_1_5, 3); |
| 82 } | 46 } |
| 83 | 47 |
| 84 static INLINE void write_2x8(unsigned char *dst, int pitch, | 48 static INLINE void write_2x8(unsigned char *dst, int pitch, |
| 85 const uint8x8x2_t result, | 49 const uint8x8x2_t result, |
| 86 const uint8x8x2_t result2) { | 50 const uint8x8x2_t result2) { |
| 87 write_2x4(dst, pitch, result); | 51 write_2x4(dst, pitch, result); |
| 88 dst += pitch * 8; | 52 dst += pitch * 8; |
| 89 write_2x4(dst, pitch, result2); | 53 write_2x4(dst, pitch, result2); |
| 90 } | 54 } |
| 91 #endif | 55 #else |
| 56 static INLINE void write_2x8(unsigned char *dst, int pitch, |
| 57 const uint8x8x2_t result, |
| 58 const uint8x8x2_t result2) { |
| 59 vst2_lane_u8(dst, result, 0); |
| 60 dst += pitch; |
| 61 vst2_lane_u8(dst, result, 1); |
| 62 dst += pitch; |
| 63 vst2_lane_u8(dst, result, 2); |
| 64 dst += pitch; |
| 65 vst2_lane_u8(dst, result, 3); |
| 66 dst += pitch; |
| 67 vst2_lane_u8(dst, result, 4); |
| 68 dst += pitch; |
| 69 vst2_lane_u8(dst, result, 5); |
| 70 dst += pitch; |
| 71 vst2_lane_u8(dst, result, 6); |
| 72 dst += pitch; |
| 73 vst2_lane_u8(dst, result, 7); |
| 74 dst += pitch; |
| 75 |
| 76 vst2_lane_u8(dst, result2, 0); |
| 77 dst += pitch; |
| 78 vst2_lane_u8(dst, result2, 1); |
| 79 dst += pitch; |
| 80 vst2_lane_u8(dst, result2, 2); |
| 81 dst += pitch; |
| 82 vst2_lane_u8(dst, result2, 3); |
| 83 dst += pitch; |
| 84 vst2_lane_u8(dst, result2, 4); |
| 85 dst += pitch; |
| 86 vst2_lane_u8(dst, result2, 5); |
| 87 dst += pitch; |
| 88 vst2_lane_u8(dst, result2, 6); |
| 89 dst += pitch; |
| 90 vst2_lane_u8(dst, result2, 7); |
| 91 } |
| 92 #endif // VPX_INCOMPATIBLE_GCC |
| 92 | 93 |
| 93 | 94 |
| 94 #if (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) | 95 #ifdef VPX_INCOMPATIBLE_GCC |
| 95 static INLINE | |
| 96 uint8x8x4_t read_4x8(unsigned char *src, int pitch, uint8x8x4_t x) { | |
| 97 x = vld4_lane_u8(src, x, 0); | |
| 98 src += pitch; | |
| 99 x = vld4_lane_u8(src, x, 1); | |
| 100 src += pitch; | |
| 101 x = vld4_lane_u8(src, x, 2); | |
| 102 src += pitch; | |
| 103 x = vld4_lane_u8(src, x, 3); | |
| 104 src += pitch; | |
| 105 x = vld4_lane_u8(src, x, 4); | |
| 106 src += pitch; | |
| 107 x = vld4_lane_u8(src, x, 5); | |
| 108 src += pitch; | |
| 109 x = vld4_lane_u8(src, x, 6); | |
| 110 src += pitch; | |
| 111 x = vld4_lane_u8(src, x, 7); | |
| 112 return x; | |
| 113 } | |
| 114 #else | |
| 115 static INLINE | 96 static INLINE |
| 116 uint8x8x4_t read_4x8(unsigned char *src, int pitch, uint8x8x4_t x) { | 97 uint8x8x4_t read_4x8(unsigned char *src, int pitch, uint8x8x4_t x) { |
| 117 const uint8x8_t a = vld1_u8(src); | 98 const uint8x8_t a = vld1_u8(src); |
| 118 const uint8x8_t b = vld1_u8(src + pitch * 1); | 99 const uint8x8_t b = vld1_u8(src + pitch * 1); |
| 119 const uint8x8_t c = vld1_u8(src + pitch * 2); | 100 const uint8x8_t c = vld1_u8(src + pitch * 2); |
| 120 const uint8x8_t d = vld1_u8(src + pitch * 3); | 101 const uint8x8_t d = vld1_u8(src + pitch * 3); |
| 121 const uint8x8_t e = vld1_u8(src + pitch * 4); | 102 const uint8x8_t e = vld1_u8(src + pitch * 4); |
| 122 const uint8x8_t f = vld1_u8(src + pitch * 5); | 103 const uint8x8_t f = vld1_u8(src + pitch * 5); |
| 123 const uint8x8_t g = vld1_u8(src + pitch * 6); | 104 const uint8x8_t g = vld1_u8(src + pitch * 6); |
| 124 const uint8x8_t h = vld1_u8(src + pitch * 7); | 105 const uint8x8_t h = vld1_u8(src + pitch * 7); |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 162 02 12 22 32 | 42 52 62 72 | 143 02 12 22 32 | 42 52 62 72 |
| 163 03 13 23 33 | 43 53 63 73 | 144 03 13 23 33 | 43 53 63 73 |
| 164 */ | 145 */ |
| 165 x.val[0] = r01_u8.val[0]; | 146 x.val[0] = r01_u8.val[0]; |
| 166 x.val[1] = r01_u8.val[1]; | 147 x.val[1] = r01_u8.val[1]; |
| 167 x.val[2] = r23_u8.val[0]; | 148 x.val[2] = r23_u8.val[0]; |
| 168 x.val[3] = r23_u8.val[1]; | 149 x.val[3] = r23_u8.val[1]; |
| 169 | 150 |
| 170 return x; | 151 return x; |
| 171 } | 152 } |
| 172 #endif | 153 #else |
| 154 static INLINE |
| 155 uint8x8x4_t read_4x8(unsigned char *src, int pitch, uint8x8x4_t x) { |
| 156 x = vld4_lane_u8(src, x, 0); |
| 157 src += pitch; |
| 158 x = vld4_lane_u8(src, x, 1); |
| 159 src += pitch; |
| 160 x = vld4_lane_u8(src, x, 2); |
| 161 src += pitch; |
| 162 x = vld4_lane_u8(src, x, 3); |
| 163 src += pitch; |
| 164 x = vld4_lane_u8(src, x, 4); |
| 165 src += pitch; |
| 166 x = vld4_lane_u8(src, x, 5); |
| 167 src += pitch; |
| 168 x = vld4_lane_u8(src, x, 6); |
| 169 src += pitch; |
| 170 x = vld4_lane_u8(src, x, 7); |
| 171 return x; |
| 172 } |
| 173 #endif // VPX_INCOMPATIBLE_GCC |
| 173 | 174 |
| 174 static INLINE void vp8_loop_filter_simple_vertical_edge_neon( | 175 static INLINE void vp8_loop_filter_simple_vertical_edge_neon( |
| 175 unsigned char *s, | 176 unsigned char *s, |
| 176 int p, | 177 int p, |
| 177 const unsigned char *blimit) { | 178 const unsigned char *blimit) { |
| 178 unsigned char *src1; | 179 unsigned char *src1; |
| 179 uint8x16_t qblimit, q0u8; | 180 uint8x16_t qblimit, q0u8; |
| 180 uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q11u8, q12u8, q14u8, q15u8; | 181 uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q11u8, q12u8, q14u8, q15u8; |
| 181 int16x8_t q2s16, q13s16, q11s16; | 182 int16x8_t q2s16, q13s16, q11s16; |
| 182 int8x8_t d28s8, d29s8; | 183 int8x8_t d28s8, d29s8; |
| (...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 270 return; | 271 return; |
| 271 } | 272 } |
| 272 | 273 |
| 273 void vp8_loop_filter_mbvs_neon( | 274 void vp8_loop_filter_mbvs_neon( |
| 274 unsigned char *y_ptr, | 275 unsigned char *y_ptr, |
| 275 int y_stride, | 276 int y_stride, |
| 276 const unsigned char *blimit) { | 277 const unsigned char *blimit) { |
| 277 vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit); | 278 vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit); |
| 278 return; | 279 return; |
| 279 } | 280 } |
| OLD | NEW |