Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(626)

Side by Side Diff: source/libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c

Issue 23600008: libvpx: Pull from upstream (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 7 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 /*
2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "./vp9_rtcd.h"
12 #include "vp9/common/vp9_common.h"
13
14 extern void vp9_short_idct16x16_add_neon_pass1(int16_t *input,
15 int16_t *output,
16 int output_stride);
17 extern void vp9_short_idct16x16_add_neon_pass2(int16_t *src,
18 int16_t *output,
19 int16_t *pass1Output,
20 int16_t skip_adding,
21 uint8_t *dest,
22 int dest_stride);
23 extern void vp9_short_idct10_16x16_add_neon_pass1(int16_t *input,
24 int16_t *output,
25 int output_stride);
26 extern void vp9_short_idct10_16x16_add_neon_pass2(int16_t *src,
27 int16_t *output,
28 int16_t *pass1Output,
29 int16_t skip_adding,
30 uint8_t *dest,
31 int dest_stride);
32 extern void save_registers();
33 extern void restore_registers();
34
35
36 void vp9_short_idct16x16_add_neon(int16_t *input,
37 uint8_t *dest, int dest_stride) {
38 int16_t pass1_output[16*16] = {0};
39 int16_t row_idct_output[16*16] = {0};
40
41 // save d8-d15 register values.
42 save_registers();
43
44 /* Parallel idct on the upper 8 rows */
45 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
46 // stage 6 result in pass1_output.
47 vp9_short_idct16x16_add_neon_pass1(input, pass1_output, 8);
48
49 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
50 // with result in pass1(pass1_output) to calculate final result in stage 7
51 // which will be saved into row_idct_output.
52 vp9_short_idct16x16_add_neon_pass2(input+1,
53 row_idct_output,
54 pass1_output,
55 0,
56 dest,
57 dest_stride);
58
59 /* Parallel idct on the lower 8 rows */
60 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
61 // stage 6 result in pass1_output.
62 vp9_short_idct16x16_add_neon_pass1(input+8*16, pass1_output, 8);
63
64 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
65 // with result in pass1(pass1_output) to calculate final result in stage 7
66 // which will be saved into row_idct_output.
67 vp9_short_idct16x16_add_neon_pass2(input+8*16+1,
68 row_idct_output+8,
69 pass1_output,
70 0,
71 dest,
72 dest_stride);
73
74 /* Parallel idct on the left 8 columns */
75 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
76 // stage 6 result in pass1_output.
77 vp9_short_idct16x16_add_neon_pass1(row_idct_output, pass1_output, 8);
78
79 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
80 // with result in pass1(pass1_output) to calculate final result in stage 7.
81 // Then add the result to the destination data.
82 vp9_short_idct16x16_add_neon_pass2(row_idct_output+1,
83 row_idct_output,
84 pass1_output,
85 1,
86 dest,
87 dest_stride);
88
89 /* Parallel idct on the right 8 columns */
90 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
91 // stage 6 result in pass1_output.
92 vp9_short_idct16x16_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
93
94 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
95 // with result in pass1(pass1_output) to calculate final result in stage 7.
96 // Then add the result to the destination data.
97 vp9_short_idct16x16_add_neon_pass2(row_idct_output+8*16+1,
98 row_idct_output+8,
99 pass1_output,
100 1,
101 dest+8,
102 dest_stride);
103
104 // restore d8-d15 register values.
105 restore_registers();
106
107 return;
108 }
109
110 void vp9_short_idct10_16x16_add_neon(int16_t *input,
111 uint8_t *dest, int dest_stride) {
112 int16_t pass1_output[16*16] = {0};
113 int16_t row_idct_output[16*16] = {0};
114
115 // save d8-d15 register values.
116 save_registers();
117
118 /* Parallel idct on the upper 8 rows */
119 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
120 // stage 6 result in pass1_output.
121 vp9_short_idct10_16x16_add_neon_pass1(input, pass1_output, 8);
122
123 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
124 // with result in pass1(pass1_output) to calculate final result in stage 7
125 // which will be saved into row_idct_output.
126 vp9_short_idct10_16x16_add_neon_pass2(input+1,
127 row_idct_output,
128 pass1_output,
129 0,
130 dest,
131 dest_stride);
132
133 /* Skip Parallel idct on the lower 8 rows as they are all 0s */
134
135 /* Parallel idct on the left 8 columns */
136 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
137 // stage 6 result in pass1_output.
138 vp9_short_idct16x16_add_neon_pass1(row_idct_output, pass1_output, 8);
139
140 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
141 // with result in pass1(pass1_output) to calculate final result in stage 7.
142 // Then add the result to the destination data.
143 vp9_short_idct16x16_add_neon_pass2(row_idct_output+1,
144 row_idct_output,
145 pass1_output,
146 1,
147 dest,
148 dest_stride);
149
150 /* Parallel idct on the right 8 columns */
151 // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
152 // stage 6 result in pass1_output.
153 vp9_short_idct16x16_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
154
155 // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
156 // with result in pass1(pass1_output) to calculate final result in stage 7.
157 // Then add the result to the destination data.
158 vp9_short_idct16x16_add_neon_pass2(row_idct_output+8*16+1,
159 row_idct_output+8,
160 pass1_output,
161 1,
162 dest+8,
163 dest_stride);
164
165 // restore d8-d15 register values.
166 restore_registers();
167
168 return;
169 }
OLDNEW
« no previous file with comments | « source/libvpx/vp9/common/arm/neon/vp9_copy_neon.asm ('k') | source/libvpx/vp9/common/arm/neon/vp9_mb_lpf_neon.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698