Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(314)

Side by Side Diff: source/libvpx/vpx_dsp/mips/vpx_convolve8_msa.c

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include <assert.h>
11 #include "./vpx_dsp_rtcd.h" 12 #include "./vpx_dsp_rtcd.h"
12 #include "vpx_dsp/mips/vpx_convolve_msa.h" 13 #include "vpx_dsp/mips/vpx_convolve_msa.h"
13 14
14 const uint8_t mc_filt_mask_arr[16 * 3] = { 15 const uint8_t mc_filt_mask_arr[16 * 3] = {
15 /* 8 width cases */ 16 /* 8 width cases */
16 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 17 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
17 /* 4 width cases */ 18 /* 4 width cases */
18 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20, 19 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20,
19 /* 4 width cases */ 20 /* 4 width cases */
20 8, 9, 9, 10, 10, 11, 11, 12, 24, 25, 25, 26, 26, 27, 27, 28 21 8, 9, 9, 10, 10, 11, 11, 12, 24, 25, 25, 26, 26, 27, 27, 28
(...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after
249 LD_SB5(src, src_stride, src0, src1, src2, src3, src4); 250 LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
250 hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, FILTER_BITS); 251 hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, FILTER_BITS);
251 hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, FILTER_BITS); 252 hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, FILTER_BITS);
252 hz_out4 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS); 253 hz_out4 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
253 hz_out1 = (v8u16)__msa_sldi_b((v16i8)hz_out2, (v16i8)hz_out0, 8); 254 hz_out1 = (v8u16)__msa_sldi_b((v16i8)hz_out2, (v16i8)hz_out0, 8);
254 hz_out3 = (v8u16)__msa_pckod_d((v2i64)hz_out4, (v2i64)hz_out2); 255 hz_out3 = (v8u16)__msa_pckod_d((v2i64)hz_out4, (v2i64)hz_out2);
255 256
256 ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); 257 ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
257 DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1); 258 DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
258 SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); 259 SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
259 SAT_UH2_UH(tmp0, tmp1, 7);
260 PCKEV_B2_UB(tmp0, tmp0, tmp1, tmp1, res0, res1); 260 PCKEV_B2_UB(tmp0, tmp0, tmp1, tmp1, res0, res1);
261 ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride); 261 ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
262 } 262 }
263 263
264 static void common_hv_2ht_2vt_4x8_msa(const uint8_t *src, int32_t src_stride, 264 static void common_hv_2ht_2vt_4x8_msa(const uint8_t *src, int32_t src_stride,
265 uint8_t *dst, int32_t dst_stride, 265 uint8_t *dst, int32_t dst_stride,
266 int8_t *filter_horiz, 266 int8_t *filter_horiz,
267 int8_t *filter_vert) { 267 int8_t *filter_vert) {
268 v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, mask; 268 v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, mask;
269 v16i8 res0, res1, res2, res3; 269 v16i8 res0, res1, res2, res3;
(...skipping 21 matching lines...) Expand all
291 hz_out8 = HORIZ_2TAP_FILT_UH(src8, src8, mask, filt_hz, FILTER_BITS); 291 hz_out8 = HORIZ_2TAP_FILT_UH(src8, src8, mask, filt_hz, FILTER_BITS);
292 SLDI_B3_UH(hz_out2, hz_out4, hz_out6, hz_out0, hz_out2, hz_out4, hz_out1, 292 SLDI_B3_UH(hz_out2, hz_out4, hz_out6, hz_out0, hz_out2, hz_out4, hz_out1,
293 hz_out3, hz_out5, 8); 293 hz_out3, hz_out5, 8);
294 hz_out7 = (v8u16)__msa_pckod_d((v2i64)hz_out8, (v2i64)hz_out6); 294 hz_out7 = (v8u16)__msa_pckod_d((v2i64)hz_out8, (v2i64)hz_out6);
295 295
296 ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); 296 ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
297 ILVEV_B2_UB(hz_out4, hz_out5, hz_out6, hz_out7, vec2, vec3); 297 ILVEV_B2_UB(hz_out4, hz_out5, hz_out6, hz_out7, vec2, vec3);
298 DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt_vt, filt_vt, filt_vt, filt_vt, 298 DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt_vt, filt_vt, filt_vt, filt_vt,
299 vec4, vec5, vec6, vec7); 299 vec4, vec5, vec6, vec7);
300 SRARI_H4_UH(vec4, vec5, vec6, vec7, FILTER_BITS); 300 SRARI_H4_UH(vec4, vec5, vec6, vec7, FILTER_BITS);
301 SAT_UH4_UH(vec4, vec5, vec6, vec7, 7);
302 PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7, res0, res1, 301 PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7, res0, res1,
303 res2, res3); 302 res2, res3);
304 ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride); 303 ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
305 dst += (4 * dst_stride); 304 dst += (4 * dst_stride);
306 ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride); 305 ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
307 } 306 }
308 307
309 static void common_hv_2ht_2vt_4w_msa(const uint8_t *src, int32_t src_stride, 308 static void common_hv_2ht_2vt_4w_msa(const uint8_t *src, int32_t src_stride,
310 uint8_t *dst, int32_t dst_stride, 309 uint8_t *dst, int32_t dst_stride,
311 int8_t *filter_horiz, int8_t *filter_vert, 310 int8_t *filter_horiz, int8_t *filter_vert,
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
350 349
351 hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS); 350 hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
352 vec2 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); 351 vec2 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
353 tmp2 = __msa_dotp_u_h(vec2, filt_vt); 352 tmp2 = __msa_dotp_u_h(vec2, filt_vt);
354 353
355 hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS); 354 hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
356 vec3 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); 355 vec3 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
357 tmp3 = __msa_dotp_u_h(vec3, filt_vt); 356 tmp3 = __msa_dotp_u_h(vec3, filt_vt);
358 357
359 SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS); 358 SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
360 SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
361 PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1); 359 PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
362 ST8x4_UB(out0, out1, dst, dst_stride); 360 ST8x4_UB(out0, out1, dst, dst_stride);
363 } 361 }
364 362
365 static void common_hv_2ht_2vt_8x8mult_msa(const uint8_t *src, 363 static void common_hv_2ht_2vt_8x8mult_msa(const uint8_t *src,
366 int32_t src_stride, 364 int32_t src_stride,
367 uint8_t *dst, 365 uint8_t *dst,
368 int32_t dst_stride, 366 int32_t dst_stride,
369 int8_t *filter_horiz, 367 int8_t *filter_horiz,
370 int8_t *filter_vert, 368 int8_t *filter_vert,
(...skipping 24 matching lines...) Expand all
395 393
396 hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS); 394 hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
397 vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); 395 vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
398 tmp1 = __msa_dotp_u_h(vec0, filt_vt); 396 tmp1 = __msa_dotp_u_h(vec0, filt_vt);
399 397
400 hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS); 398 hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
401 vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); 399 vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
402 tmp2 = __msa_dotp_u_h(vec0, filt_vt); 400 tmp2 = __msa_dotp_u_h(vec0, filt_vt);
403 401
404 SRARI_H2_UH(tmp1, tmp2, FILTER_BITS); 402 SRARI_H2_UH(tmp1, tmp2, FILTER_BITS);
405 SAT_UH2_UH(tmp1, tmp2, 7);
406 403
407 hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS); 404 hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
408 vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); 405 vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
409 tmp3 = __msa_dotp_u_h(vec0, filt_vt); 406 tmp3 = __msa_dotp_u_h(vec0, filt_vt);
410 407
411 hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS); 408 hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
412 LD_SB4(src, src_stride, src1, src2, src3, src4); 409 LD_SB4(src, src_stride, src1, src2, src3, src4);
413 src += (4 * src_stride); 410 src += (4 * src_stride);
414 vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); 411 vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
415 tmp4 = __msa_dotp_u_h(vec0, filt_vt); 412 tmp4 = __msa_dotp_u_h(vec0, filt_vt);
416 413
417 SRARI_H2_UH(tmp3, tmp4, FILTER_BITS); 414 SRARI_H2_UH(tmp3, tmp4, FILTER_BITS);
418 SAT_UH2_UH(tmp3, tmp4, 7);
419 PCKEV_B2_SB(tmp2, tmp1, tmp4, tmp3, out0, out1); 415 PCKEV_B2_SB(tmp2, tmp1, tmp4, tmp3, out0, out1);
420 ST8x4_UB(out0, out1, dst, dst_stride); 416 ST8x4_UB(out0, out1, dst, dst_stride);
421 dst += (4 * dst_stride); 417 dst += (4 * dst_stride);
422 418
423 hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS); 419 hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
424 vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); 420 vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
425 tmp5 = __msa_dotp_u_h(vec0, filt_vt); 421 tmp5 = __msa_dotp_u_h(vec0, filt_vt);
426 422
427 hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS); 423 hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
428 vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); 424 vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
429 tmp6 = __msa_dotp_u_h(vec0, filt_vt); 425 tmp6 = __msa_dotp_u_h(vec0, filt_vt);
430 426
431 hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS); 427 hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
432 vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); 428 vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
433 tmp7 = __msa_dotp_u_h(vec0, filt_vt); 429 tmp7 = __msa_dotp_u_h(vec0, filt_vt);
434 430
435 hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS); 431 hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
436 vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); 432 vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
437 tmp8 = __msa_dotp_u_h(vec0, filt_vt); 433 tmp8 = __msa_dotp_u_h(vec0, filt_vt);
438 434
439 SRARI_H4_UH(tmp5, tmp6, tmp7, tmp8, FILTER_BITS); 435 SRARI_H4_UH(tmp5, tmp6, tmp7, tmp8, FILTER_BITS);
440 SAT_UH4_UH(tmp5, tmp6, tmp7, tmp8, 7);
441 PCKEV_B2_SB(tmp6, tmp5, tmp8, tmp7, out0, out1); 436 PCKEV_B2_SB(tmp6, tmp5, tmp8, tmp7, out0, out1);
442 ST8x4_UB(out0, out1, dst, dst_stride); 437 ST8x4_UB(out0, out1, dst, dst_stride);
443 dst += (4 * dst_stride); 438 dst += (4 * dst_stride);
444 } 439 }
445 } 440 }
446 441
447 static void common_hv_2ht_2vt_8w_msa(const uint8_t *src, int32_t src_stride, 442 static void common_hv_2ht_2vt_8w_msa(const uint8_t *src, int32_t src_stride,
448 uint8_t *dst, int32_t dst_stride, 443 uint8_t *dst, int32_t dst_stride,
449 int8_t *filter_horiz, int8_t *filter_vert, 444 int8_t *filter_horiz, int8_t *filter_vert,
450 int32_t height) { 445 int32_t height) {
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
485 for (loop_cnt = (height >> 2); loop_cnt--;) { 480 for (loop_cnt = (height >> 2); loop_cnt--;) {
486 LD_SB4(src, src_stride, src0, src2, src4, src6); 481 LD_SB4(src, src_stride, src0, src2, src4, src6);
487 LD_SB4(src + 8, src_stride, src1, src3, src5, src7); 482 LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
488 src += (4 * src_stride); 483 src += (4 * src_stride);
489 484
490 hz_out1 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS); 485 hz_out1 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
491 hz_out3 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS); 486 hz_out3 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
492 ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); 487 ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
493 DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2); 488 DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
494 SRARI_H2_UH(tmp1, tmp2, FILTER_BITS); 489 SRARI_H2_UH(tmp1, tmp2, FILTER_BITS);
495 SAT_UH2_UH(tmp1, tmp2, 7);
496 PCKEV_ST_SB(tmp1, tmp2, dst); 490 PCKEV_ST_SB(tmp1, tmp2, dst);
497 dst += dst_stride; 491 dst += dst_stride;
498 492
499 hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS); 493 hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
500 hz_out2 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS); 494 hz_out2 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
501 ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1); 495 ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
502 DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2); 496 DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
503 SRARI_H2_UH(tmp1, tmp2, FILTER_BITS); 497 SRARI_H2_UH(tmp1, tmp2, FILTER_BITS);
504 SAT_UH2_UH(tmp1, tmp2, 7);
505 PCKEV_ST_SB(tmp1, tmp2, dst); 498 PCKEV_ST_SB(tmp1, tmp2, dst);
506 dst += dst_stride; 499 dst += dst_stride;
507 500
508 hz_out1 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS); 501 hz_out1 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
509 hz_out3 = HORIZ_2TAP_FILT_UH(src5, src5, mask, filt_hz, FILTER_BITS); 502 hz_out3 = HORIZ_2TAP_FILT_UH(src5, src5, mask, filt_hz, FILTER_BITS);
510 ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); 503 ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
511 DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2); 504 DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
512 SRARI_H2_UH(tmp1, tmp2, FILTER_BITS); 505 SRARI_H2_UH(tmp1, tmp2, FILTER_BITS);
513 SAT_UH2_UH(tmp1, tmp2, 7);
514 PCKEV_ST_SB(tmp1, tmp2, dst); 506 PCKEV_ST_SB(tmp1, tmp2, dst);
515 dst += dst_stride; 507 dst += dst_stride;
516 508
517 hz_out0 = HORIZ_2TAP_FILT_UH(src6, src6, mask, filt_hz, FILTER_BITS); 509 hz_out0 = HORIZ_2TAP_FILT_UH(src6, src6, mask, filt_hz, FILTER_BITS);
518 hz_out2 = HORIZ_2TAP_FILT_UH(src7, src7, mask, filt_hz, FILTER_BITS); 510 hz_out2 = HORIZ_2TAP_FILT_UH(src7, src7, mask, filt_hz, FILTER_BITS);
519 ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1); 511 ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
520 DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2); 512 DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
521 SRARI_H2_UH(tmp1, tmp2, FILTER_BITS); 513 SRARI_H2_UH(tmp1, tmp2, FILTER_BITS);
522 SAT_UH2_UH(tmp1, tmp2, 7);
523 PCKEV_ST_SB(tmp1, tmp2, dst); 514 PCKEV_ST_SB(tmp1, tmp2, dst);
524 dst += dst_stride; 515 dst += dst_stride;
525 } 516 }
526 } 517 }
527 518
528 static void common_hv_2ht_2vt_32w_msa(const uint8_t *src, int32_t src_stride, 519 static void common_hv_2ht_2vt_32w_msa(const uint8_t *src, int32_t src_stride,
529 uint8_t *dst, int32_t dst_stride, 520 uint8_t *dst, int32_t dst_stride,
530 int8_t *filter_horiz, int8_t *filter_vert, 521 int8_t *filter_horiz, int8_t *filter_vert,
531 int32_t height) { 522 int32_t height) {
532 int32_t multiple8_cnt; 523 int32_t multiple8_cnt;
(...skipping 18 matching lines...) Expand all
551 } 542 }
552 } 543 }
553 544
554 void vpx_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride, 545 void vpx_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride,
555 uint8_t *dst, ptrdiff_t dst_stride, 546 uint8_t *dst, ptrdiff_t dst_stride,
556 const int16_t *filter_x, int32_t x_step_q4, 547 const int16_t *filter_x, int32_t x_step_q4,
557 const int16_t *filter_y, int32_t y_step_q4, 548 const int16_t *filter_y, int32_t y_step_q4,
558 int32_t w, int32_t h) { 549 int32_t w, int32_t h) {
559 int8_t cnt, filt_hor[8], filt_ver[8]; 550 int8_t cnt, filt_hor[8], filt_ver[8];
560 551
561 if (16 != x_step_q4 || 16 != y_step_q4) { 552 assert(x_step_q4 == 16);
562 vpx_convolve8_c(src, src_stride, dst, dst_stride, 553 assert(y_step_q4 == 16);
563 filter_x, x_step_q4, filter_y, y_step_q4, 554 assert(((const int32_t *)filter_x)[1] != 0x800000);
564 w, h); 555 assert(((const int32_t *)filter_y)[1] != 0x800000);
565 return;
566 }
567
568 if (((const int32_t *)filter_x)[1] == 0x800000 &&
569 ((const int32_t *)filter_y)[1] == 0x800000) {
570 vpx_convolve_copy(src, src_stride, dst, dst_stride,
571 filter_x, x_step_q4, filter_y, y_step_q4,
572 w, h);
573 return;
574 }
575 556
576 for (cnt = 0; cnt < 8; ++cnt) { 557 for (cnt = 0; cnt < 8; ++cnt) {
577 filt_hor[cnt] = filter_x[cnt]; 558 filt_hor[cnt] = filter_x[cnt];
578 filt_ver[cnt] = filter_y[cnt]; 559 filt_ver[cnt] = filter_y[cnt];
579 } 560 }
580 561
581 if (((const int32_t *)filter_x)[0] == 0 && 562 if (((const int32_t *)filter_x)[0] == 0 &&
582 ((const int32_t *)filter_y)[0] == 0) { 563 ((const int32_t *)filter_y)[0] == 0) {
583 switch (w) { 564 switch (w) {
584 case 4: 565 case 4:
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
645 filt_hor, filt_ver, (int32_t)h); 626 filt_hor, filt_ver, (int32_t)h);
646 break; 627 break;
647 default: 628 default:
648 vpx_convolve8_c(src, src_stride, dst, dst_stride, 629 vpx_convolve8_c(src, src_stride, dst, dst_stride,
649 filter_x, x_step_q4, filter_y, y_step_q4, 630 filter_x, x_step_q4, filter_y, y_step_q4,
650 w, h); 631 w, h);
651 break; 632 break;
652 } 633 }
653 } 634 }
654 } 635 }
OLDNEW
« no previous file with comments | « source/libvpx/vpx_dsp/mips/vpx_convolve8_horiz_msa.c ('k') | source/libvpx/vpx_dsp/mips/vpx_convolve8_vert_dspr2.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698