Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(395)

Side by Side Diff: source/libvpx/vp8/encoder/arm/neon/subtract_neon.c

Issue 484923003: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 /*
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <arm_neon.h>
12 #include "vp8/encoder/block.h"
13
14 void vp8_subtract_b_neon(
15 BLOCK *be,
16 BLOCKD *bd,
17 int pitch) {
18 unsigned char *src_ptr, *predictor;
19 int src_stride;
20 int16_t *src_diff;
21 uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
22 uint16x8_t q10u16, q11u16, q12u16, q13u16;
23
24 src_ptr = *be->base_src + be->src;
25 src_stride = be->src_stride;
26 predictor = bd->predictor;
27
28 d0u8 = vld1_u8(src_ptr);
29 src_ptr += src_stride;
30 d2u8 = vld1_u8(src_ptr);
31 src_ptr += src_stride;
32 d4u8 = vld1_u8(src_ptr);
33 src_ptr += src_stride;
34 d6u8 = vld1_u8(src_ptr);
35
36 d1u8 = vld1_u8(predictor);
37 predictor += pitch;
38 d3u8 = vld1_u8(predictor);
39 predictor += pitch;
40 d5u8 = vld1_u8(predictor);
41 predictor += pitch;
42 d7u8 = vld1_u8(predictor);
43
44 q10u16 = vsubl_u8(d0u8, d1u8);
45 q11u16 = vsubl_u8(d2u8, d3u8);
46 q12u16 = vsubl_u8(d4u8, d5u8);
47 q13u16 = vsubl_u8(d6u8, d7u8);
48
49 src_diff = be->src_diff;
50 vst1_u16((uint16_t *)src_diff, vget_low_u16(q10u16));
51 src_diff += pitch;
52 vst1_u16((uint16_t *)src_diff, vget_low_u16(q11u16));
53 src_diff += pitch;
54 vst1_u16((uint16_t *)src_diff, vget_low_u16(q12u16));
55 src_diff += pitch;
56 vst1_u16((uint16_t *)src_diff, vget_low_u16(q13u16));
57 return;
58 }
59
60 void vp8_subtract_mby_neon(
61 int16_t *diff,
62 unsigned char *src,
63 int src_stride,
64 unsigned char *pred,
65 int pred_stride) {
66 int i;
67 uint8x16_t q0u8, q1u8, q2u8, q3u8;
68 uint16x8_t q8u16, q9u16, q10u16, q11u16;
69
70 for (i = 0; i < 8; i++) { // subtract_mby_loop
71 q0u8 = vld1q_u8(src);
72 src += src_stride;
73 q2u8 = vld1q_u8(src);
74 src += src_stride;
75 q1u8 = vld1q_u8(pred);
76 pred += pred_stride;
77 q3u8 = vld1q_u8(pred);
78 pred += pred_stride;
79
80 q8u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q1u8));
81 q9u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q1u8));
82 q10u16 = vsubl_u8(vget_low_u8(q2u8), vget_low_u8(q3u8));
83 q11u16 = vsubl_u8(vget_high_u8(q2u8), vget_high_u8(q3u8));
84
85 vst1q_u16((uint16_t *)diff, q8u16);
86 diff += 8;
87 vst1q_u16((uint16_t *)diff, q9u16);
88 diff += 8;
89 vst1q_u16((uint16_t *)diff, q10u16);
90 diff += 8;
91 vst1q_u16((uint16_t *)diff, q11u16);
92 diff += 8;
93 }
94 return;
95 }
96
97 void vp8_subtract_mbuv_neon(
98 int16_t *diff,
99 unsigned char *usrc,
100 unsigned char *vsrc,
101 int src_stride,
102 unsigned char *upred,
103 unsigned char *vpred,
104 int pred_stride) {
105 int i, j;
106 unsigned char *src_ptr, *pred_ptr;
107 uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
108 uint16x8_t q8u16, q9u16, q10u16, q11u16;
109
110 diff += 256;
111 for (i = 0; i < 2; i++) {
112 if (i == 0) {
113 src_ptr = usrc;
114 pred_ptr = upred;
115 } else if (i == 1) {
116 src_ptr = vsrc;
117 pred_ptr = vpred;
118 }
119
120 for (j = 0; j < 2; j++) {
121 d0u8 = vld1_u8(src_ptr);
122 src_ptr += src_stride;
123 d1u8 = vld1_u8(pred_ptr);
124 pred_ptr += pred_stride;
125 d2u8 = vld1_u8(src_ptr);
126 src_ptr += src_stride;
127 d3u8 = vld1_u8(pred_ptr);
128 pred_ptr += pred_stride;
129 d4u8 = vld1_u8(src_ptr);
130 src_ptr += src_stride;
131 d5u8 = vld1_u8(pred_ptr);
132 pred_ptr += pred_stride;
133 d6u8 = vld1_u8(src_ptr);
134 src_ptr += src_stride;
135 d7u8 = vld1_u8(pred_ptr);
136 pred_ptr += pred_stride;
137
138 q8u16 = vsubl_u8(d0u8, d1u8);
139 q9u16 = vsubl_u8(d2u8, d3u8);
140 q10u16 = vsubl_u8(d4u8, d5u8);
141 q11u16 = vsubl_u8(d6u8, d7u8);
142
143 vst1q_u16((uint16_t *)diff, q8u16);
144 diff += 8;
145 vst1q_u16((uint16_t *)diff, q9u16);
146 diff += 8;
147 vst1q_u16((uint16_t *)diff, q10u16);
148 diff += 8;
149 vst1q_u16((uint16_t *)diff, q11u16);
150 diff += 8;
151 }
152 }
153 return;
154 }
OLDNEW
« no previous file with comments | « source/libvpx/vp8/encoder/arm/neon/subtract_neon.asm ('k') | source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698