OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
| 3 * |
| 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ |
| 10 |
| 11 #include <arm_neon.h> |
| 12 |
| 13 #include "vp8/common/blockd.h" |
| 14 |
| 15 void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x, |
| 16 unsigned char * yabove_row, |
| 17 unsigned char * yleft, |
| 18 int left_stride, |
| 19 unsigned char * ypred_ptr, |
| 20 int y_stride) { |
| 21 const int mode = x->mode_info_context->mbmi.mode; |
| 22 int i; |
| 23 |
| 24 switch (mode) { |
| 25 case DC_PRED: |
| 26 { |
| 27 int shift = x->up_available + x->left_available; |
| 28 uint8x16_t v_expected_dc = vdupq_n_u8(128); |
| 29 |
| 30 if (shift) { |
| 31 unsigned int average = 0; |
| 32 int expected_dc; |
| 33 if (x->up_available) { |
| 34 const uint8x16_t v_above = vld1q_u8(yabove_row); |
| 35 const uint16x8_t a = vpaddlq_u8(v_above); |
| 36 const uint32x4_t b = vpaddlq_u16(a); |
| 37 const uint64x2_t c = vpaddlq_u32(b); |
| 38 const uint32x2_t d = vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)), |
| 39 vreinterpret_u32_u64(vget_high_u64(c))); |
| 40 average = vget_lane_u32(d, 0); |
| 41 } |
| 42 if (x->left_available) { |
| 43 for (i = 0; i < 16; ++i) { |
| 44 average += yleft[0]; |
| 45 yleft += left_stride; |
| 46 } |
| 47 } |
| 48 shift += 3; |
| 49 expected_dc = (average + (1 << (shift - 1))) >> shift; |
| 50 v_expected_dc = vmovq_n_u8((uint8_t)expected_dc); |
| 51 } |
| 52 for (i = 0; i < 16; ++i) { |
| 53 vst1q_u8(ypred_ptr, v_expected_dc); |
| 54 ypred_ptr += y_stride; |
| 55 } |
| 56 } |
| 57 break; |
| 58 case V_PRED: |
| 59 { |
| 60 const uint8x16_t v_above = vld1q_u8(yabove_row); |
| 61 for (i = 0; i < 16; ++i) { |
| 62 vst1q_u8(ypred_ptr, v_above); |
| 63 ypred_ptr += y_stride; |
| 64 } |
| 65 } |
| 66 break; |
| 67 case H_PRED: |
| 68 { |
| 69 for (i = 0; i < 16; ++i) { |
| 70 const uint8x16_t v_yleft = vmovq_n_u8((uint8_t)yleft[0]); |
| 71 yleft += left_stride; |
| 72 vst1q_u8(ypred_ptr, v_yleft); |
| 73 ypred_ptr += y_stride; |
| 74 } |
| 75 } |
| 76 break; |
| 77 case TM_PRED: |
| 78 { |
| 79 const uint16x8_t v_ytop_left = vmovq_n_u16((int16_t)yabove_row[-1]); |
| 80 const uint8x16_t v_above = vld1q_u8(yabove_row); |
| 81 for (i = 0; i < 16; ++i) { |
| 82 const uint8x8_t v_yleft = vmov_n_u8((int8_t)yleft[0]); |
| 83 const uint16x8_t a_lo = vaddl_u8(vget_low_u8(v_above), v_yleft); |
| 84 const uint16x8_t a_hi = vaddl_u8(vget_high_u8(v_above), v_yleft); |
| 85 const int16x8_t b_lo = vsubq_s16(vreinterpretq_s16_u16(a_lo), |
| 86 vreinterpretq_s16_u16(v_ytop_left)); |
| 87 const int16x8_t b_hi = vsubq_s16(vreinterpretq_s16_u16(a_hi), |
| 88 vreinterpretq_s16_u16(v_ytop_left)); |
| 89 const uint8x8_t pred_lo = vqmovun_s16(b_lo); |
| 90 const uint8x8_t pred_hi = vqmovun_s16(b_hi); |
| 91 |
| 92 vst1q_u8(ypred_ptr, vcombine_u8(pred_lo, pred_hi)); |
| 93 ypred_ptr += y_stride; |
| 94 yleft += left_stride; |
| 95 } |
| 96 } |
| 97 break; |
| 98 } |
| 99 } |
| 100 |
| 101 void vp8_build_intra_predictors_mbuv_s_neon(MACROBLOCKD *x, |
| 102 unsigned char * uabove_row, |
| 103 unsigned char * vabove_row, |
| 104 unsigned char * uleft, |
| 105 unsigned char * vleft, |
| 106 int left_stride, |
| 107 unsigned char * upred_ptr, |
| 108 unsigned char * vpred_ptr, |
| 109 int pred_stride) { |
| 110 const int mode = x->mode_info_context->mbmi.uv_mode; |
| 111 int i; |
| 112 |
| 113 switch (mode) { |
| 114 case DC_PRED: |
| 115 { |
| 116 int shift = x->up_available + x->left_available; |
| 117 uint8x8_t v_expected_udc = vdup_n_u8(128); |
| 118 uint8x8_t v_expected_vdc = vdup_n_u8(128); |
| 119 |
| 120 if (shift) { |
| 121 unsigned int average_u = 0; |
| 122 unsigned int average_v = 0; |
| 123 int expected_udc; |
| 124 int expected_vdc; |
| 125 if (x->up_available) { |
| 126 const uint8x8_t v_uabove = vld1_u8(uabove_row); |
| 127 const uint8x8_t v_vabove = vld1_u8(vabove_row); |
| 128 const uint16x8_t a = vpaddlq_u8(vcombine_u8(v_uabove, v_vabove)); |
| 129 const uint32x4_t b = vpaddlq_u16(a); |
| 130 const uint64x2_t c = vpaddlq_u32(b); |
| 131 average_u = vgetq_lane_u32(vreinterpretq_u32_u64((c)), 0); |
| 132 average_v = vgetq_lane_u32(vreinterpretq_u32_u64((c)), 2); |
| 133 } |
| 134 if (x->left_available) { |
| 135 for (i = 0; i < 8; ++i) { |
| 136 average_u += uleft[0]; |
| 137 uleft += left_stride; |
| 138 average_v += vleft[0]; |
| 139 vleft += left_stride; |
| 140 } |
| 141 } |
| 142 shift += 2; |
| 143 expected_udc = (average_u + (1 << (shift - 1))) >> shift; |
| 144 expected_vdc = (average_v + (1 << (shift - 1))) >> shift; |
| 145 v_expected_udc = vmov_n_u8((uint8_t)expected_udc); |
| 146 v_expected_vdc = vmov_n_u8((uint8_t)expected_vdc); |
| 147 } |
| 148 for (i = 0; i < 8; ++i) { |
| 149 vst1_u8(upred_ptr, v_expected_udc); |
| 150 upred_ptr += pred_stride; |
| 151 vst1_u8(vpred_ptr, v_expected_vdc); |
| 152 vpred_ptr += pred_stride; |
| 153 } |
| 154 } |
| 155 break; |
| 156 case V_PRED: |
| 157 { |
| 158 const uint8x8_t v_uabove = vld1_u8(uabove_row); |
| 159 const uint8x8_t v_vabove = vld1_u8(vabove_row); |
| 160 for (i = 0; i < 8; ++i) { |
| 161 vst1_u8(upred_ptr, v_uabove); |
| 162 upred_ptr += pred_stride; |
| 163 vst1_u8(vpred_ptr, v_vabove); |
| 164 vpred_ptr += pred_stride; |
| 165 } |
| 166 } |
| 167 break; |
| 168 case H_PRED: |
| 169 { |
| 170 for (i = 0; i < 8; ++i) { |
| 171 const uint8x8_t v_uleft = vmov_n_u8((uint8_t)uleft[0]); |
| 172 const uint8x8_t v_vleft = vmov_n_u8((uint8_t)vleft[0]); |
| 173 uleft += left_stride; |
| 174 vleft += left_stride; |
| 175 vst1_u8(upred_ptr, v_uleft); |
| 176 upred_ptr += pred_stride; |
| 177 vst1_u8(vpred_ptr, v_vleft); |
| 178 vpred_ptr += pred_stride; |
| 179 } |
| 180 } |
| 181 break; |
| 182 case TM_PRED: |
| 183 { |
| 184 const uint16x8_t v_utop_left = vmovq_n_u16((int16_t)uabove_row[-1]); |
| 185 const uint16x8_t v_vtop_left = vmovq_n_u16((int16_t)vabove_row[-1]); |
| 186 const uint8x8_t v_uabove = vld1_u8(uabove_row); |
| 187 const uint8x8_t v_vabove = vld1_u8(vabove_row); |
| 188 for (i = 0; i < 8; ++i) { |
| 189 const uint8x8_t v_uleft = vmov_n_u8((int8_t)uleft[0]); |
| 190 const uint8x8_t v_vleft = vmov_n_u8((int8_t)vleft[0]); |
| 191 const uint16x8_t a_u = vaddl_u8(v_uabove, v_uleft); |
| 192 const uint16x8_t a_v = vaddl_u8(v_vabove, v_vleft); |
| 193 const int16x8_t b_u = vsubq_s16(vreinterpretq_s16_u16(a_u), |
| 194 vreinterpretq_s16_u16(v_utop_left)); |
| 195 const int16x8_t b_v = vsubq_s16(vreinterpretq_s16_u16(a_v), |
| 196 vreinterpretq_s16_u16(v_vtop_left)); |
| 197 const uint8x8_t pred_u = vqmovun_s16(b_u); |
| 198 const uint8x8_t pred_v = vqmovun_s16(b_v); |
| 199 |
| 200 vst1_u8(upred_ptr, pred_u); |
| 201 vst1_u8(vpred_ptr, pred_v); |
| 202 upred_ptr += pred_stride; |
| 203 vpred_ptr += pred_stride; |
| 204 uleft += left_stride; |
| 205 vleft += left_stride; |
| 206 } |
| 207 } |
| 208 break; |
| 209 } |
| 210 } |
OLD | NEW |