Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(177)

Side by Side Diff: source/libvpx/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c

Issue 168343002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: libvpx: Pull from upstream Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include <assert.h> 11 #include <assert.h>
12 #include <stdio.h> 12 #include <stdio.h>
13 13
14 #include "./vpx_config.h" 14 #include "./vpx_config.h"
15 #include "./vp9_rtcd.h" 15 #include "./vp9_rtcd.h"
16 #include "vp9/common/vp9_common.h" 16 #include "vp9/common/vp9_common.h"
17 #include "vp9/common/vp9_blockd.h" 17 #include "vp9/common/vp9_blockd.h"
18 #include "vp9/common/vp9_idct.h" 18 #include "vp9/common/vp9_idct.h"
19 #include "vp9/common/mips/dspr2/vp9_common_dspr2.h" 19 #include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
20 20
21 #if HAVE_DSPR2 21 #if HAVE_DSPR2
22 static void idct16_1d_rows_dspr2(const int16_t *input, int16_t *output, 22 static void idct16_rows_dspr2(const int16_t *input, int16_t *output,
23 uint32_t no_rows) { 23 uint32_t no_rows) {
24 int i; 24 int i;
25 int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7; 25 int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;
26 int step1_10, step1_11, step1_12, step1_13; 26 int step1_10, step1_11, step1_12, step1_13;
27 int step2_0, step2_1, step2_2, step2_3; 27 int step2_0, step2_1, step2_2, step2_3;
28 int step2_8, step2_9, step2_10, step2_11; 28 int step2_8, step2_9, step2_10, step2_11;
29 int step2_12, step2_13, step2_14, step2_15; 29 int step2_12, step2_13, step2_14, step2_15;
30 int load1, load2, load3, load4, load5, load6, load7, load8; 30 int load1, load2, load3, load4, load5, load6, load7, load8;
31 int result1, result2, result3, result4; 31 int result1, result2, result3, result4;
32 const int const_2_power_13 = 8192; 32 const int const_2_power_13 = 8192;
33 33
(...skipping 363 matching lines...) Expand 10 before | Expand all | Expand 10 after
397 [step1_4] "r" (step1_4), [step1_5] "r" (step1_5), 397 [step1_4] "r" (step1_4), [step1_5] "r" (step1_5),
398 [step1_10] "r" (step1_10), [step1_11] "r" (step1_11), 398 [step1_10] "r" (step1_10), [step1_11] "r" (step1_11),
399 [step1_12] "r" (step1_12), [step1_13] "r" (step1_13) 399 [step1_12] "r" (step1_12), [step1_13] "r" (step1_13)
400 ); 400 );
401 401
402 input += 16; 402 input += 16;
403 output += 1; 403 output += 1;
404 } 404 }
405 } 405 }
406 406
407 static void idct16_1d_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, 407 static void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
408 int dest_stride) { 408 int dest_stride) {
409 int i; 409 int i;
410 int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7; 410 int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;
411 int step1_8, step1_9, step1_10, step1_11; 411 int step1_8, step1_9, step1_10, step1_11;
412 int step1_12, step1_13, step1_14, step1_15; 412 int step1_12, step1_13, step1_14, step1_15;
413 int step2_0, step2_1, step2_2, step2_3; 413 int step2_0, step2_1, step2_2, step2_3;
414 int step2_8, step2_9, step2_10, step2_11; 414 int step2_8, step2_9, step2_10, step2_11;
415 int step2_12, step2_13, step2_14, step2_15; 415 int step2_12, step2_13, step2_14, step2_15;
416 int load1, load2, load3, load4, load5, load6, load7, load8; 416 int load1, load2, load3, load4, load5, load6, load7, load8;
417 int result1, result2, result3, result4; 417 int result1, result2, result3, result4;
418 const int const_2_power_13 = 8192; 418 const int const_2_power_13 = 8192;
(...skipping 479 matching lines...) Expand 10 before | Expand all | Expand 10 after
898 uint32_t pos = 45; 898 uint32_t pos = 45;
899 899
900 /* bit positon for extract from acc */ 900 /* bit positon for extract from acc */
901 __asm__ __volatile__ ( 901 __asm__ __volatile__ (
902 "wrdsp %[pos], 1 \n\t" 902 "wrdsp %[pos], 1 \n\t"
903 : 903 :
904 : [pos] "r" (pos) 904 : [pos] "r" (pos)
905 ); 905 );
906 906
907 // First transform rows 907 // First transform rows
908 idct16_1d_rows_dspr2(input, out, 16); 908 idct16_rows_dspr2(input, out, 16);
909 909
910 // Then transform columns and add to dest 910 // Then transform columns and add to dest
911 idct16_1d_cols_add_blk_dspr2(out, dest, dest_stride); 911 idct16_cols_add_blk_dspr2(out, dest, dest_stride);
912 } 912 }
913 913
914 static void iadst16_1d(const int16_t *input, int16_t *output) { 914 static void iadst16(const int16_t *input, int16_t *output) {
915 int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15; 915 int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;
916 916
917 int x0 = input[15]; 917 int x0 = input[15];
918 int x1 = input[0]; 918 int x1 = input[0];
919 int x2 = input[13]; 919 int x2 = input[13];
920 int x3 = input[2]; 920 int x3 = input[2];
921 int x4 = input[11]; 921 int x4 = input[11];
922 int x5 = input[4]; 922 int x5 = input[4];
923 int x6 = input[9]; 923 int x6 = input[9];
924 int x7 = input[6]; 924 int x7 = input[6];
(...skipping 167 matching lines...) Expand 10 before | Expand all | Expand 10 after
1092 1092
1093 /* bit positon for extract from acc */ 1093 /* bit positon for extract from acc */
1094 __asm__ __volatile__ ( 1094 __asm__ __volatile__ (
1095 "wrdsp %[pos], 1 \n\t" 1095 "wrdsp %[pos], 1 \n\t"
1096 : 1096 :
1097 : [pos] "r" (pos) 1097 : [pos] "r" (pos)
1098 ); 1098 );
1099 1099
1100 switch (tx_type) { 1100 switch (tx_type) {
1101 case DCT_DCT: // DCT in both horizontal and vertical 1101 case DCT_DCT: // DCT in both horizontal and vertical
1102 idct16_1d_rows_dspr2(input, outptr, 16); 1102 idct16_rows_dspr2(input, outptr, 16);
1103 idct16_1d_cols_add_blk_dspr2(out, dest, pitch); 1103 idct16_cols_add_blk_dspr2(out, dest, pitch);
1104 break; 1104 break;
1105 case ADST_DCT: // ADST in vertical, DCT in horizontal 1105 case ADST_DCT: // ADST in vertical, DCT in horizontal
1106 idct16_1d_rows_dspr2(input, outptr, 16); 1106 idct16_rows_dspr2(input, outptr, 16);
1107 1107
1108 outptr = out; 1108 outptr = out;
1109 1109
1110 for (i = 0; i < 16; ++i) { 1110 for (i = 0; i < 16; ++i) {
1111 iadst16_1d(outptr, temp_out); 1111 iadst16(outptr, temp_out);
1112 1112
1113 for (j = 0; j < 16; ++j) 1113 for (j = 0; j < 16; ++j)
1114 dest[j * pitch + i] = 1114 dest[j * pitch + i] =
1115 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) 1115 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
1116 + dest[j * pitch + i]); 1116 + dest[j * pitch + i]);
1117 outptr += 16; 1117 outptr += 16;
1118 } 1118 }
1119 break; 1119 break;
1120 case DCT_ADST: // DCT in vertical, ADST in horizontal 1120 case DCT_ADST: // DCT in vertical, ADST in horizontal
1121 { 1121 {
1122 int16_t temp_in[16 * 16]; 1122 int16_t temp_in[16 * 16];
1123 1123
1124 for (i = 0; i < 16; ++i) { 1124 for (i = 0; i < 16; ++i) {
1125 /* prefetch row */ 1125 /* prefetch row */
1126 vp9_prefetch_load((const uint8_t *)(input + 16)); 1126 vp9_prefetch_load((const uint8_t *)(input + 16));
1127 1127
1128 iadst16_1d(input, outptr); 1128 iadst16(input, outptr);
1129 input += 16; 1129 input += 16;
1130 outptr += 16; 1130 outptr += 16;
1131 } 1131 }
1132 1132
1133 for (i = 0; i < 16; ++i) 1133 for (i = 0; i < 16; ++i)
1134 for (j = 0; j < 16; ++j) 1134 for (j = 0; j < 16; ++j)
1135 temp_in[j * 16 + i] = out[i * 16 + j]; 1135 temp_in[j * 16 + i] = out[i * 16 + j];
1136 1136
1137 idct16_1d_cols_add_blk_dspr2(temp_in, dest, pitch); 1137 idct16_cols_add_blk_dspr2(temp_in, dest, pitch);
1138 } 1138 }
1139 break; 1139 break;
1140 case ADST_ADST: // ADST in both directions 1140 case ADST_ADST: // ADST in both directions
1141 { 1141 {
1142 int16_t temp_in[16]; 1142 int16_t temp_in[16];
1143 1143
1144 for (i = 0; i < 16; ++i) { 1144 for (i = 0; i < 16; ++i) {
1145 /* prefetch row */ 1145 /* prefetch row */
1146 vp9_prefetch_load((const uint8_t *)(input + 16)); 1146 vp9_prefetch_load((const uint8_t *)(input + 16));
1147 1147
1148 iadst16_1d(input, outptr); 1148 iadst16(input, outptr);
1149 input += 16; 1149 input += 16;
1150 outptr += 16; 1150 outptr += 16;
1151 } 1151 }
1152 1152
1153 for (i = 0; i < 16; ++i) { 1153 for (i = 0; i < 16; ++i) {
1154 for (j = 0; j < 16; ++j) 1154 for (j = 0; j < 16; ++j)
1155 temp_in[j] = out[j * 16 + i]; 1155 temp_in[j] = out[j * 16 + i];
1156 iadst16_1d(temp_in, temp_out); 1156 iadst16(temp_in, temp_out);
1157 for (j = 0; j < 16; ++j) 1157 for (j = 0; j < 16; ++j)
1158 dest[j * pitch + i] = 1158 dest[j * pitch + i] =
1159 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) 1159 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
1160 + dest[j * pitch + i]); 1160 + dest[j * pitch + i]);
1161 } 1161 }
1162 } 1162 }
1163 break; 1163 break;
1164 default: 1164 default:
1165 printf("vp9_short_iht16x16_add_dspr2 : Invalid tx_type\n"); 1165 printf("vp9_short_iht16x16_add_dspr2 : Invalid tx_type\n");
1166 break; 1166 break;
1167 } 1167 }
1168 } 1168 }
1169 1169
1170 void vp9_idct16x16_10_add_dspr2(const int16_t *input, uint8_t *dest, 1170 void vp9_idct16x16_10_add_dspr2(const int16_t *input, uint8_t *dest,
1171 int dest_stride) { 1171 int dest_stride) {
1172 DECLARE_ALIGNED(32, int16_t, out[16 * 16]); 1172 DECLARE_ALIGNED(32, int16_t, out[16 * 16]);
1173 int16_t *outptr = out; 1173 int16_t *outptr = out;
1174 uint32_t i; 1174 uint32_t i;
1175 uint32_t pos = 45; 1175 uint32_t pos = 45;
1176 1176
1177 /* bit positon for extract from acc */ 1177 /* bit positon for extract from acc */
1178 __asm__ __volatile__ ( 1178 __asm__ __volatile__ (
1179 "wrdsp %[pos], 1 \n\t" 1179 "wrdsp %[pos], 1 \n\t"
1180 : 1180 :
1181 : [pos] "r" (pos) 1181 : [pos] "r" (pos)
1182 ); 1182 );
1183 1183
1184 // First transform rows. Since all non-zero dct coefficients are in 1184 // First transform rows. Since all non-zero dct coefficients are in
1185 // upper-left 4x4 area, we only need to calculate first 4 rows here. 1185 // upper-left 4x4 area, we only need to calculate first 4 rows here.
1186 idct16_1d_rows_dspr2(input, outptr, 4); 1186 idct16_rows_dspr2(input, outptr, 4);
1187 1187
1188 outptr += 4; 1188 outptr += 4;
1189 for (i = 0; i < 6; ++i) { 1189 for (i = 0; i < 6; ++i) {
1190 __asm__ __volatile__ ( 1190 __asm__ __volatile__ (
1191 "sw $zero, 0(%[outptr]) \n\t" 1191 "sw $zero, 0(%[outptr]) \n\t"
1192 "sw $zero, 32(%[outptr]) \n\t" 1192 "sw $zero, 32(%[outptr]) \n\t"
1193 "sw $zero, 64(%[outptr]) \n\t" 1193 "sw $zero, 64(%[outptr]) \n\t"
1194 "sw $zero, 96(%[outptr]) \n\t" 1194 "sw $zero, 96(%[outptr]) \n\t"
1195 "sw $zero, 128(%[outptr]) \n\t" 1195 "sw $zero, 128(%[outptr]) \n\t"
1196 "sw $zero, 160(%[outptr]) \n\t" 1196 "sw $zero, 160(%[outptr]) \n\t"
1197 "sw $zero, 192(%[outptr]) \n\t" 1197 "sw $zero, 192(%[outptr]) \n\t"
1198 "sw $zero, 224(%[outptr]) \n\t" 1198 "sw $zero, 224(%[outptr]) \n\t"
1199 "sw $zero, 256(%[outptr]) \n\t" 1199 "sw $zero, 256(%[outptr]) \n\t"
1200 "sw $zero, 288(%[outptr]) \n\t" 1200 "sw $zero, 288(%[outptr]) \n\t"
1201 "sw $zero, 320(%[outptr]) \n\t" 1201 "sw $zero, 320(%[outptr]) \n\t"
1202 "sw $zero, 352(%[outptr]) \n\t" 1202 "sw $zero, 352(%[outptr]) \n\t"
1203 "sw $zero, 384(%[outptr]) \n\t" 1203 "sw $zero, 384(%[outptr]) \n\t"
1204 "sw $zero, 416(%[outptr]) \n\t" 1204 "sw $zero, 416(%[outptr]) \n\t"
1205 "sw $zero, 448(%[outptr]) \n\t" 1205 "sw $zero, 448(%[outptr]) \n\t"
1206 "sw $zero, 480(%[outptr]) \n\t" 1206 "sw $zero, 480(%[outptr]) \n\t"
1207 1207
1208 : 1208 :
1209 : [outptr] "r" (outptr) 1209 : [outptr] "r" (outptr)
1210 ); 1210 );
1211 1211
1212 outptr += 2; 1212 outptr += 2;
1213 } 1213 }
1214 1214
1215 // Then transform columns 1215 // Then transform columns
1216 idct16_1d_cols_add_blk_dspr2(out, dest, dest_stride); 1216 idct16_cols_add_blk_dspr2(out, dest, dest_stride);
1217 } 1217 }
1218 1218
1219 void vp9_idct16x16_1_add_dspr2(const int16_t *input, uint8_t *dest, 1219 void vp9_idct16x16_1_add_dspr2(const int16_t *input, uint8_t *dest,
1220 int dest_stride) { 1220 int dest_stride) {
1221 uint32_t pos = 45; 1221 uint32_t pos = 45;
1222 int32_t out; 1222 int32_t out;
1223 int32_t r; 1223 int32_t r;
1224 int32_t a1, absa1; 1224 int32_t a1, absa1;
1225 int32_t vector_a1; 1225 int32_t vector_a1;
1226 int32_t t1, t2, t3, t4; 1226 int32_t t1, t2, t3, t4;
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
1306 : [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3), [t4] "=&r" (t4), 1306 : [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3), [t4] "=&r" (t4),
1307 [vector_1] "=&r" (vector_1), [vector_2] "=&r" (vector_2), 1307 [vector_1] "=&r" (vector_1), [vector_2] "=&r" (vector_2),
1308 [vector_3] "=&r" (vector_3), [vector_4] "=&r" (vector_4), 1308 [vector_3] "=&r" (vector_3), [vector_4] "=&r" (vector_4),
1309 [dest] "+&r" (dest) 1309 [dest] "+&r" (dest)
1310 : [dest_stride] "r" (dest_stride), [vector_a1] "r" (vector_a1) 1310 : [dest_stride] "r" (dest_stride), [vector_a1] "r" (vector_a1)
1311 ); 1311 );
1312 } 1312 }
1313 } 1313 }
1314 } 1314 }
1315 #endif // #if HAVE_DSPR2 1315 #endif // #if HAVE_DSPR2
OLDNEW
« no previous file with comments | « source/libvpx/vp9/common/mips/dspr2/vp9_common_dspr2.h ('k') | source/libvpx/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698