Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1922)

Side by Side Diff: source/libvpx/vpx_dsp/mips/itrans16_dspr2.c

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include <assert.h>
12 #include <stdio.h>
13
14 #include "./vpx_config.h" 11 #include "./vpx_config.h"
15 #include "./vp9_rtcd.h" 12 #include "./vpx_dsp_rtcd.h"
16 #include "vp9/common/vp9_common.h" 13 #include "vpx_dsp/mips/inv_txfm_dspr2.h"
17 #include "vp9/common/vp9_blockd.h"
18 #include "vp9/common/vp9_idct.h"
19 #include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
20 #include "vpx_dsp/txfm_common.h" 14 #include "vpx_dsp/txfm_common.h"
21 #include "vpx_ports/mem.h"
22 15
23 #if HAVE_DSPR2 16 #if HAVE_DSPR2
24 static void idct16_rows_dspr2(const int16_t *input, int16_t *output, 17 void idct16_rows_dspr2(const int16_t *input, int16_t *output,
25 uint32_t no_rows) { 18 uint32_t no_rows) {
26 int i; 19 int i;
27 int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7; 20 int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;
28 int step1_10, step1_11, step1_12, step1_13; 21 int step1_10, step1_11, step1_12, step1_13;
29 int step2_0, step2_1, step2_2, step2_3; 22 int step2_0, step2_1, step2_2, step2_3;
30 int step2_8, step2_9, step2_10, step2_11; 23 int step2_8, step2_9, step2_10, step2_11;
31 int step2_12, step2_13, step2_14, step2_15; 24 int step2_12, step2_13, step2_14, step2_15;
32 int load1, load2, load3, load4, load5, load6, load7, load8; 25 int load1, load2, load3, load4, load5, load6, load7, load8;
33 int result1, result2, result3, result4; 26 int result1, result2, result3, result4;
34 const int const_2_power_13 = 8192; 27 const int const_2_power_13 = 8192;
35 28
(...skipping 363 matching lines...) Expand 10 before | Expand all | Expand 10 after
399 [step1_4] "r" (step1_4), [step1_5] "r" (step1_5), 392 [step1_4] "r" (step1_4), [step1_5] "r" (step1_5),
400 [step1_10] "r" (step1_10), [step1_11] "r" (step1_11), 393 [step1_10] "r" (step1_10), [step1_11] "r" (step1_11),
401 [step1_12] "r" (step1_12), [step1_13] "r" (step1_13) 394 [step1_12] "r" (step1_12), [step1_13] "r" (step1_13)
402 ); 395 );
403 396
404 input += 16; 397 input += 16;
405 output += 1; 398 output += 1;
406 } 399 }
407 } 400 }
408 401
409 static void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, 402 void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest,
410 int dest_stride) { 403 int dest_stride) {
411 int i; 404 int i;
412 int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7; 405 int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;
413 int step1_8, step1_9, step1_10, step1_11; 406 int step1_8, step1_9, step1_10, step1_11;
414 int step1_12, step1_13, step1_14, step1_15; 407 int step1_12, step1_13, step1_14, step1_15;
415 int step2_0, step2_1, step2_2, step2_3; 408 int step2_0, step2_1, step2_2, step2_3;
416 int step2_8, step2_9, step2_10, step2_11; 409 int step2_8, step2_9, step2_10, step2_11;
417 int step2_12, step2_13, step2_14, step2_15; 410 int step2_12, step2_13, step2_14, step2_15;
418 int load1, load2, load3, load4, load5, load6, load7, load8; 411 int load1, load2, load3, load4, load5, load6, load7, load8;
419 int result1, result2, result3, result4; 412 int result1, result2, result3, result4;
420 const int const_2_power_13 = 8192; 413 const int const_2_power_13 = 8192;
(...skipping 466 matching lines...) Expand 10 before | Expand all | Expand 10 after
887 [step1_8] "r" (step1_8), [step1_9] "r" (step1_9), 880 [step1_8] "r" (step1_8), [step1_9] "r" (step1_9),
888 [step1_10] "r" (step1_10), [step1_11] "r" (step1_11), 881 [step1_10] "r" (step1_10), [step1_11] "r" (step1_11),
889 [step1_12] "r" (step1_12), [step1_13] "r" (step1_13), 882 [step1_12] "r" (step1_12), [step1_13] "r" (step1_13),
890 [step1_14] "r" (step1_14), [step1_15] "r" (step1_15) 883 [step1_14] "r" (step1_14), [step1_15] "r" (step1_15)
891 ); 884 );
892 885
893 input += 16; 886 input += 16;
894 } 887 }
895 } 888 }
896 889
897 void vp9_idct16x16_256_add_dspr2(const int16_t *input, uint8_t *dest, 890 void vpx_idct16x16_256_add_dspr2(const int16_t *input, uint8_t *dest,
898 int dest_stride) { 891 int dest_stride) {
899 DECLARE_ALIGNED(32, int16_t, out[16 * 16]); 892 DECLARE_ALIGNED(32, int16_t, out[16 * 16]);
900 uint32_t pos = 45; 893 uint32_t pos = 45;
901 894
902 /* bit positon for extract from acc */ 895 /* bit positon for extract from acc */
903 __asm__ __volatile__ ( 896 __asm__ __volatile__ (
904 "wrdsp %[pos], 1 \n\t" 897 "wrdsp %[pos], 1 \n\t"
905 : 898 :
906 : [pos] "r" (pos) 899 : [pos] "r" (pos)
907 ); 900 );
908 901
909 // First transform rows 902 // First transform rows
910 idct16_rows_dspr2(input, out, 16); 903 idct16_rows_dspr2(input, out, 16);
911 904
912 // Then transform columns and add to dest 905 // Then transform columns and add to dest
913 idct16_cols_add_blk_dspr2(out, dest, dest_stride); 906 idct16_cols_add_blk_dspr2(out, dest, dest_stride);
914 } 907 }
915 908
916 static void iadst16(const int16_t *input, int16_t *output) { 909 void vpx_idct16x16_10_add_dspr2(const int16_t *input, uint8_t *dest,
910 int dest_stride) {
911 DECLARE_ALIGNED(32, int16_t, out[16 * 16]);
912 int16_t *outptr = out;
913 uint32_t i;
914 uint32_t pos = 45;
915
916 /* bit positon for extract from acc */
917 __asm__ __volatile__ (
918 "wrdsp %[pos], 1 \n\t"
919 :
920 : [pos] "r" (pos)
921 );
922
923 // First transform rows. Since all non-zero dct coefficients are in
924 // upper-left 4x4 area, we only need to calculate first 4 rows here.
925 idct16_rows_dspr2(input, outptr, 4);
926
927 outptr += 4;
928 for (i = 0; i < 6; ++i) {
929 __asm__ __volatile__ (
930 "sw $zero, 0(%[outptr]) \n\t"
931 "sw $zero, 32(%[outptr]) \n\t"
932 "sw $zero, 64(%[outptr]) \n\t"
933 "sw $zero, 96(%[outptr]) \n\t"
934 "sw $zero, 128(%[outptr]) \n\t"
935 "sw $zero, 160(%[outptr]) \n\t"
936 "sw $zero, 192(%[outptr]) \n\t"
937 "sw $zero, 224(%[outptr]) \n\t"
938 "sw $zero, 256(%[outptr]) \n\t"
939 "sw $zero, 288(%[outptr]) \n\t"
940 "sw $zero, 320(%[outptr]) \n\t"
941 "sw $zero, 352(%[outptr]) \n\t"
942 "sw $zero, 384(%[outptr]) \n\t"
943 "sw $zero, 416(%[outptr]) \n\t"
944 "sw $zero, 448(%[outptr]) \n\t"
945 "sw $zero, 480(%[outptr]) \n\t"
946
947 :
948 : [outptr] "r" (outptr)
949 );
950
951 outptr += 2;
952 }
953
954 // Then transform columns
955 idct16_cols_add_blk_dspr2(out, dest, dest_stride);
956 }
957
958 void vpx_idct16x16_1_add_dspr2(const int16_t *input, uint8_t *dest,
959 int dest_stride) {
960 uint32_t pos = 45;
961 int32_t out;
962 int32_t r;
963 int32_t a1, absa1;
964 int32_t vector_a1;
965 int32_t t1, t2, t3, t4;
966 int32_t vector_1, vector_2, vector_3, vector_4;
967
968 /* bit positon for extract from acc */
969 __asm__ __volatile__ (
970 "wrdsp %[pos], 1 \n\t"
971
972 :
973 : [pos] "r" (pos)
974 );
975
976 out = DCT_CONST_ROUND_SHIFT_TWICE_COSPI_16_64(input[0]);
977 __asm__ __volatile__ (
978 "addi %[out], %[out], 32 \n\t"
979 "sra %[a1], %[out], 6 \n\t"
980
981 : [out] "+r" (out), [a1] "=r" (a1)
982 :
983 );
984
985 if (a1 < 0) {
986 /* use quad-byte
987 * input and output memory are four byte aligned */
988 __asm__ __volatile__ (
989 "abs %[absa1], %[a1] \n\t"
990 "replv.qb %[vector_a1], %[absa1] \n\t"
991
992 : [absa1] "=r" (absa1), [vector_a1] "=r" (vector_a1)
993 : [a1] "r" (a1)
994 );
995
996 for (r = 16; r--;) {
997 __asm__ __volatile__ (
998 "lw %[t1], 0(%[dest]) \n\t"
999 "lw %[t2], 4(%[dest]) \n\t"
1000 "lw %[t3], 8(%[dest]) \n\t"
1001 "lw %[t4], 12(%[dest]) \n\t"
1002 "subu_s.qb %[vector_1], %[t1], %[vector_a1] \n\t"
1003 "subu_s.qb %[vector_2], %[t2], %[vector_a1] \n\t"
1004 "subu_s.qb %[vector_3], %[t3], %[vector_a1] \n\t"
1005 "subu_s.qb %[vector_4], %[t4], %[vector_a1] \n\t"
1006 "sw %[vector_1], 0(%[dest]) \n\t"
1007 "sw %[vector_2], 4(%[dest]) \n\t"
1008 "sw %[vector_3], 8(%[dest]) \n\t"
1009 "sw %[vector_4], 12(%[dest]) \n\t"
1010 "add %[dest], %[dest], %[dest_stride] \n\t"
1011
1012 : [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3), [t4] "=&r" (t4),
1013 [vector_1] "=&r" (vector_1), [vector_2] "=&r" (vector_2),
1014 [vector_3] "=&r" (vector_3), [vector_4] "=&r" (vector_4),
1015 [dest] "+&r" (dest)
1016 : [dest_stride] "r" (dest_stride), [vector_a1] "r" (vector_a1)
1017 );
1018 }
1019 } else {
1020 /* use quad-byte
1021 * input and output memory are four byte aligned */
1022 __asm__ __volatile__ (
1023 "replv.qb %[vector_a1], %[a1] \n\t"
1024
1025 : [vector_a1] "=r" (vector_a1)
1026 : [a1] "r" (a1)
1027 );
1028
1029 for (r = 16; r--;) {
1030 __asm__ __volatile__ (
1031 "lw %[t1], 0(%[dest]) \n\t"
1032 "lw %[t2], 4(%[dest]) \n\t"
1033 "lw %[t3], 8(%[dest]) \n\t"
1034 "lw %[t4], 12(%[dest]) \n\t"
1035 "addu_s.qb %[vector_1], %[t1], %[vector_a1] \n\t"
1036 "addu_s.qb %[vector_2], %[t2], %[vector_a1] \n\t"
1037 "addu_s.qb %[vector_3], %[t3], %[vector_a1] \n\t"
1038 "addu_s.qb %[vector_4], %[t4], %[vector_a1] \n\t"
1039 "sw %[vector_1], 0(%[dest]) \n\t"
1040 "sw %[vector_2], 4(%[dest]) \n\t"
1041 "sw %[vector_3], 8(%[dest]) \n\t"
1042 "sw %[vector_4], 12(%[dest]) \n\t"
1043 "add %[dest], %[dest], %[dest_stride] \n\t"
1044
1045 : [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3), [t4] "=&r" (t4),
1046 [vector_1] "=&r" (vector_1), [vector_2] "=&r" (vector_2),
1047 [vector_3] "=&r" (vector_3), [vector_4] "=&r" (vector_4),
1048 [dest] "+&r" (dest)
1049 : [dest_stride] "r" (dest_stride), [vector_a1] "r" (vector_a1)
1050 );
1051 }
1052 }
1053 }
1054
1055 void iadst16_dspr2(const int16_t *input, int16_t *output) {
917 int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15; 1056 int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;
918 1057
919 int x0 = input[15]; 1058 int x0 = input[15];
920 int x1 = input[0]; 1059 int x1 = input[0];
921 int x2 = input[13]; 1060 int x2 = input[13];
922 int x3 = input[2]; 1061 int x3 = input[2];
923 int x4 = input[11]; 1062 int x4 = input[11];
924 int x5 = input[4]; 1063 int x5 = input[4];
925 int x6 = input[9]; 1064 int x6 = input[9];
926 int x7 = input[6]; 1065 int x7 = input[6];
(...skipping 150 matching lines...) Expand 10 before | Expand all | Expand 10 after
1077 output[8] = x3; 1216 output[8] = x3;
1078 output[9] = x11; 1217 output[9] = x11;
1079 output[10] = x15; 1218 output[10] = x15;
1080 output[11] = x7; 1219 output[11] = x7;
1081 output[12] = x5; 1220 output[12] = x5;
1082 output[13] = -x13; 1221 output[13] = -x13;
1083 output[14] = x9; 1222 output[14] = x9;
1084 output[15] = -x1; 1223 output[15] = -x1;
1085 } 1224 }
1086 1225
1087 void vp9_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest,
1088 int pitch, int tx_type) {
1089 int i, j;
1090 DECLARE_ALIGNED(32, int16_t, out[16 * 16]);
1091 int16_t *outptr = out;
1092 int16_t temp_out[16];
1093 uint32_t pos = 45;
1094 1226
1095 /* bit positon for extract from acc */ 1227 #endif // HAVE_DSPR2
1096 __asm__ __volatile__ (
1097 "wrdsp %[pos], 1 \n\t"
1098 :
1099 : [pos] "r" (pos)
1100 );
1101
1102 switch (tx_type) {
1103 case DCT_DCT: // DCT in both horizontal and vertical
1104 idct16_rows_dspr2(input, outptr, 16);
1105 idct16_cols_add_blk_dspr2(out, dest, pitch);
1106 break;
1107 case ADST_DCT: // ADST in vertical, DCT in horizontal
1108 idct16_rows_dspr2(input, outptr, 16);
1109
1110 outptr = out;
1111
1112 for (i = 0; i < 16; ++i) {
1113 iadst16(outptr, temp_out);
1114
1115 for (j = 0; j < 16; ++j)
1116 dest[j * pitch + i] =
1117 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
1118 + dest[j * pitch + i]);
1119 outptr += 16;
1120 }
1121 break;
1122 case DCT_ADST: // DCT in vertical, ADST in horizontal
1123 {
1124 int16_t temp_in[16 * 16];
1125
1126 for (i = 0; i < 16; ++i) {
1127 /* prefetch row */
1128 prefetch_load((const uint8_t *)(input + 16));
1129
1130 iadst16(input, outptr);
1131 input += 16;
1132 outptr += 16;
1133 }
1134
1135 for (i = 0; i < 16; ++i)
1136 for (j = 0; j < 16; ++j)
1137 temp_in[j * 16 + i] = out[i * 16 + j];
1138
1139 idct16_cols_add_blk_dspr2(temp_in, dest, pitch);
1140 }
1141 break;
1142 case ADST_ADST: // ADST in both directions
1143 {
1144 int16_t temp_in[16];
1145
1146 for (i = 0; i < 16; ++i) {
1147 /* prefetch row */
1148 prefetch_load((const uint8_t *)(input + 16));
1149
1150 iadst16(input, outptr);
1151 input += 16;
1152 outptr += 16;
1153 }
1154
1155 for (i = 0; i < 16; ++i) {
1156 for (j = 0; j < 16; ++j)
1157 temp_in[j] = out[j * 16 + i];
1158 iadst16(temp_in, temp_out);
1159 for (j = 0; j < 16; ++j)
1160 dest[j * pitch + i] =
1161 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
1162 + dest[j * pitch + i]);
1163 }
1164 }
1165 break;
1166 default:
1167 printf("vp9_short_iht16x16_add_dspr2 : Invalid tx_type\n");
1168 break;
1169 }
1170 }
1171
1172 void vp9_idct16x16_10_add_dspr2(const int16_t *input, uint8_t *dest,
1173 int dest_stride) {
1174 DECLARE_ALIGNED(32, int16_t, out[16 * 16]);
1175 int16_t *outptr = out;
1176 uint32_t i;
1177 uint32_t pos = 45;
1178
1179 /* bit positon for extract from acc */
1180 __asm__ __volatile__ (
1181 "wrdsp %[pos], 1 \n\t"
1182 :
1183 : [pos] "r" (pos)
1184 );
1185
1186 // First transform rows. Since all non-zero dct coefficients are in
1187 // upper-left 4x4 area, we only need to calculate first 4 rows here.
1188 idct16_rows_dspr2(input, outptr, 4);
1189
1190 outptr += 4;
1191 for (i = 0; i < 6; ++i) {
1192 __asm__ __volatile__ (
1193 "sw $zero, 0(%[outptr]) \n\t"
1194 "sw $zero, 32(%[outptr]) \n\t"
1195 "sw $zero, 64(%[outptr]) \n\t"
1196 "sw $zero, 96(%[outptr]) \n\t"
1197 "sw $zero, 128(%[outptr]) \n\t"
1198 "sw $zero, 160(%[outptr]) \n\t"
1199 "sw $zero, 192(%[outptr]) \n\t"
1200 "sw $zero, 224(%[outptr]) \n\t"
1201 "sw $zero, 256(%[outptr]) \n\t"
1202 "sw $zero, 288(%[outptr]) \n\t"
1203 "sw $zero, 320(%[outptr]) \n\t"
1204 "sw $zero, 352(%[outptr]) \n\t"
1205 "sw $zero, 384(%[outptr]) \n\t"
1206 "sw $zero, 416(%[outptr]) \n\t"
1207 "sw $zero, 448(%[outptr]) \n\t"
1208 "sw $zero, 480(%[outptr]) \n\t"
1209
1210 :
1211 : [outptr] "r" (outptr)
1212 );
1213
1214 outptr += 2;
1215 }
1216
1217 // Then transform columns
1218 idct16_cols_add_blk_dspr2(out, dest, dest_stride);
1219 }
1220
1221 void vp9_idct16x16_1_add_dspr2(const int16_t *input, uint8_t *dest,
1222 int dest_stride) {
1223 uint32_t pos = 45;
1224 int32_t out;
1225 int32_t r;
1226 int32_t a1, absa1;
1227 int32_t vector_a1;
1228 int32_t t1, t2, t3, t4;
1229 int32_t vector_1, vector_2, vector_3, vector_4;
1230
1231 /* bit positon for extract from acc */
1232 __asm__ __volatile__ (
1233 "wrdsp %[pos], 1 \n\t"
1234
1235 :
1236 : [pos] "r" (pos)
1237 );
1238
1239 out = DCT_CONST_ROUND_SHIFT_TWICE_COSPI_16_64(input[0]);
1240 __asm__ __volatile__ (
1241 "addi %[out], %[out], 32 \n\t"
1242 "sra %[a1], %[out], 6 \n\t"
1243
1244 : [out] "+r" (out), [a1] "=r" (a1)
1245 :
1246 );
1247
1248 if (a1 < 0) {
1249 /* use quad-byte
1250 * input and output memory are four byte aligned */
1251 __asm__ __volatile__ (
1252 "abs %[absa1], %[a1] \n\t"
1253 "replv.qb %[vector_a1], %[absa1] \n\t"
1254
1255 : [absa1] "=r" (absa1), [vector_a1] "=r" (vector_a1)
1256 : [a1] "r" (a1)
1257 );
1258
1259 for (r = 16; r--;) {
1260 __asm__ __volatile__ (
1261 "lw %[t1], 0(%[dest]) \n\t"
1262 "lw %[t2], 4(%[dest]) \n\t"
1263 "lw %[t3], 8(%[dest]) \n\t"
1264 "lw %[t4], 12(%[dest]) \n\t"
1265 "subu_s.qb %[vector_1], %[t1], %[vector_a1] \n\t"
1266 "subu_s.qb %[vector_2], %[t2], %[vector_a1] \n\t"
1267 "subu_s.qb %[vector_3], %[t3], %[vector_a1] \n\t"
1268 "subu_s.qb %[vector_4], %[t4], %[vector_a1] \n\t"
1269 "sw %[vector_1], 0(%[dest]) \n\t"
1270 "sw %[vector_2], 4(%[dest]) \n\t"
1271 "sw %[vector_3], 8(%[dest]) \n\t"
1272 "sw %[vector_4], 12(%[dest]) \n\t"
1273 "add %[dest], %[dest], %[dest_stride] \n\t"
1274
1275 : [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3), [t4] "=&r" (t4),
1276 [vector_1] "=&r" (vector_1), [vector_2] "=&r" (vector_2),
1277 [vector_3] "=&r" (vector_3), [vector_4] "=&r" (vector_4),
1278 [dest] "+&r" (dest)
1279 : [dest_stride] "r" (dest_stride), [vector_a1] "r" (vector_a1)
1280 );
1281 }
1282 } else {
1283 /* use quad-byte
1284 * input and output memory are four byte aligned */
1285 __asm__ __volatile__ (
1286 "replv.qb %[vector_a1], %[a1] \n\t"
1287
1288 : [vector_a1] "=r" (vector_a1)
1289 : [a1] "r" (a1)
1290 );
1291
1292 for (r = 16; r--;) {
1293 __asm__ __volatile__ (
1294 "lw %[t1], 0(%[dest]) \n\t"
1295 "lw %[t2], 4(%[dest]) \n\t"
1296 "lw %[t3], 8(%[dest]) \n\t"
1297 "lw %[t4], 12(%[dest]) \n\t"
1298 "addu_s.qb %[vector_1], %[t1], %[vector_a1] \n\t"
1299 "addu_s.qb %[vector_2], %[t2], %[vector_a1] \n\t"
1300 "addu_s.qb %[vector_3], %[t3], %[vector_a1] \n\t"
1301 "addu_s.qb %[vector_4], %[t4], %[vector_a1] \n\t"
1302 "sw %[vector_1], 0(%[dest]) \n\t"
1303 "sw %[vector_2], 4(%[dest]) \n\t"
1304 "sw %[vector_3], 8(%[dest]) \n\t"
1305 "sw %[vector_4], 12(%[dest]) \n\t"
1306 "add %[dest], %[dest], %[dest_stride] \n\t"
1307
1308 : [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3), [t4] "=&r" (t4),
1309 [vector_1] "=&r" (vector_1), [vector_2] "=&r" (vector_2),
1310 [vector_3] "=&r" (vector_3), [vector_4] "=&r" (vector_4),
1311 [dest] "+&r" (dest)
1312 : [dest_stride] "r" (dest_stride), [vector_a1] "r" (vector_a1)
1313 );
1314 }
1315 }
1316 }
1317 #endif // #if HAVE_DSPR2
OLDNEW
« no previous file with comments | « source/libvpx/vpx_dsp/mips/inv_txfm_msa.h ('k') | source/libvpx/vpx_dsp/mips/itrans32_cols_dspr2.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698