| Index: source/libvpx/third_party/libyuv/source/rotate_mips.cc
|
| ===================================================================
|
| --- source/libvpx/third_party/libyuv/source/rotate_mips.cc (revision 0)
|
| +++ source/libvpx/third_party/libyuv/source/rotate_mips.cc (revision 0)
|
| @@ -0,0 +1,485 @@
|
| +/*
|
| + * Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
| + *
|
| + * Use of this source code is governed by a BSD-style license
|
| + * that can be found in the LICENSE file in the root of the source
|
| + * tree. An additional intellectual property rights grant can be found
|
| + * in the file PATENTS. All contributing project authors may
|
| + * be found in the AUTHORS file in the root of the source tree.
|
| + */
|
| +
|
| +#include "libyuv/row.h"
|
| +
|
| +#include "libyuv/basic_types.h"
|
| +
|
| +#ifdef __cplusplus
|
| +namespace libyuv {
|
| +extern "C" {
|
| +#endif
|
| +
|
| +#if !defined(LIBYUV_DISABLE_MIPS) && \
|
| + defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \
|
| + (_MIPS_SIM == _MIPS_SIM_ABI32)
|
| +
|
| +void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
|
| + uint8* dst, int dst_stride,
|
| + int width) {
|
| + __asm__ __volatile__ (
|
| + ".set push \n"
|
| + ".set noreorder \n"
|
| + "sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
|
| + "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
|
| + "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
|
| + "addu $t3, $t2, %[src_stride] \n"
|
| + "addu $t5, $t4, %[src_stride] \n"
|
| + "addu $t6, $t2, $t4 \n"
|
| + "andi $t0, %[dst], 0x3 \n"
|
| + "andi $t1, %[dst_stride], 0x3 \n"
|
| + "or $t0, $t0, $t1 \n"
|
| + "bnez $t0, 11f \n"
|
| + " subu $t7, $t9, %[src_stride] \n"
|
| +//dst + dst_stride word aligned
|
| + "1: \n"
|
| + "lbu $t0, 0(%[src]) \n"
|
| + "lbux $t1, %[src_stride](%[src]) \n"
|
| + "lbux $t8, $t2(%[src]) \n"
|
| + "lbux $t9, $t3(%[src]) \n"
|
| + "sll $t1, $t1, 16 \n"
|
| + "sll $t9, $t9, 16 \n"
|
| + "or $t0, $t0, $t1 \n"
|
| + "or $t8, $t8, $t9 \n"
|
| + "precr.qb.ph $s0, $t8, $t0 \n"
|
| + "lbux $t0, $t4(%[src]) \n"
|
| + "lbux $t1, $t5(%[src]) \n"
|
| + "lbux $t8, $t6(%[src]) \n"
|
| + "lbux $t9, $t7(%[src]) \n"
|
| + "sll $t1, $t1, 16 \n"
|
| + "sll $t9, $t9, 16 \n"
|
| + "or $t0, $t0, $t1 \n"
|
| + "or $t8, $t8, $t9 \n"
|
| + "precr.qb.ph $s1, $t8, $t0 \n"
|
| + "sw $s0, 0(%[dst]) \n"
|
| + "addiu %[width], -1 \n"
|
| + "addiu %[src], 1 \n"
|
| + "sw $s1, 4(%[dst]) \n"
|
| + "bnez %[width], 1b \n"
|
| + " addu %[dst], %[dst], %[dst_stride] \n"
|
| + "b 2f \n"
|
| +//dst + dst_stride unaligned
|
| + "11: \n"
|
| + "lbu $t0, 0(%[src]) \n"
|
| + "lbux $t1, %[src_stride](%[src]) \n"
|
| + "lbux $t8, $t2(%[src]) \n"
|
| + "lbux $t9, $t3(%[src]) \n"
|
| + "sll $t1, $t1, 16 \n"
|
| + "sll $t9, $t9, 16 \n"
|
| + "or $t0, $t0, $t1 \n"
|
| + "or $t8, $t8, $t9 \n"
|
| + "precr.qb.ph $s0, $t8, $t0 \n"
|
| + "lbux $t0, $t4(%[src]) \n"
|
| + "lbux $t1, $t5(%[src]) \n"
|
| + "lbux $t8, $t6(%[src]) \n"
|
| + "lbux $t9, $t7(%[src]) \n"
|
| + "sll $t1, $t1, 16 \n"
|
| + "sll $t9, $t9, 16 \n"
|
| + "or $t0, $t0, $t1 \n"
|
| + "or $t8, $t8, $t9 \n"
|
| + "precr.qb.ph $s1, $t8, $t0 \n"
|
| + "swr $s0, 0(%[dst]) \n"
|
| + "swl $s0, 3(%[dst]) \n"
|
| + "addiu %[width], -1 \n"
|
| + "addiu %[src], 1 \n"
|
| + "swr $s1, 4(%[dst]) \n"
|
| + "swl $s1, 7(%[dst]) \n"
|
| + "bnez %[width], 11b \n"
|
| + "addu %[dst], %[dst], %[dst_stride] \n"
|
| + "2: \n"
|
| + ".set pop \n"
|
| + :[src] "+r" (src),
|
| + [dst] "+r" (dst),
|
| + [width] "+r" (width)
|
| + :[src_stride] "r" (src_stride),
|
| + [dst_stride] "r" (dst_stride)
|
| + : "t0", "t1", "t2", "t3", "t4", "t5",
|
| + "t6", "t7", "t8", "t9",
|
| + "s0", "s1"
|
| + );
|
| +}
|
| +
|
| +void TransposeWx8_FAST_MIPS_DSPR2(const uint8* src, int src_stride,
|
| + uint8* dst, int dst_stride,
|
| + int width) {
|
| + __asm__ __volatile__ (
|
| + ".set noat \n"
|
| + ".set push \n"
|
| + ".set noreorder \n"
|
| + "beqz %[width], 2f \n"
|
| + " sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
|
| + "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
|
| + "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
|
| + "addu $t3, $t2, %[src_stride] \n"
|
| + "addu $t5, $t4, %[src_stride] \n"
|
| + "addu $t6, $t2, $t4 \n"
|
| +
|
| + "srl $AT, %[width], 0x2 \n"
|
| + "andi $t0, %[dst], 0x3 \n"
|
| + "andi $t1, %[dst_stride], 0x3 \n"
|
| + "or $t0, $t0, $t1 \n"
|
| + "bnez $t0, 11f \n"
|
| + " subu $t7, $t9, %[src_stride] \n"
|
| +//dst + dst_stride word aligned
|
| + "1: \n"
|
| + "lw $t0, 0(%[src]) \n"
|
| + "lwx $t1, %[src_stride](%[src]) \n"
|
| + "lwx $t8, $t2(%[src]) \n"
|
| + "lwx $t9, $t3(%[src]) \n"
|
| +
|
| +// t0 = | 30 | 20 | 10 | 00 |
|
| +// t1 = | 31 | 21 | 11 | 01 |
|
| +// t8 = | 32 | 22 | 12 | 02 |
|
| +// t9 = | 33 | 23 | 13 | 03 |
|
| +
|
| + "precr.qb.ph $s0, $t1, $t0 \n"
|
| + "precr.qb.ph $s1, $t9, $t8 \n"
|
| + "precrq.qb.ph $s2, $t1, $t0 \n"
|
| + "precrq.qb.ph $s3, $t9, $t8 \n"
|
| +
|
| + // s0 = | 21 | 01 | 20 | 00 |
|
| + // s1 = | 23 | 03 | 22 | 02 |
|
| + // s2 = | 31 | 11 | 30 | 10 |
|
| + // s3 = | 33 | 13 | 32 | 12 |
|
| +
|
| + "precr.qb.ph $s4, $s1, $s0 \n"
|
| + "precrq.qb.ph $s5, $s1, $s0 \n"
|
| + "precr.qb.ph $s6, $s3, $s2 \n"
|
| + "precrq.qb.ph $s7, $s3, $s2 \n"
|
| +
|
| + // s4 = | 03 | 02 | 01 | 00 |
|
| + // s5 = | 23 | 22 | 21 | 20 |
|
| + // s6 = | 13 | 12 | 11 | 10 |
|
| + // s7 = | 33 | 32 | 31 | 30 |
|
| +
|
| + "lwx $t0, $t4(%[src]) \n"
|
| + "lwx $t1, $t5(%[src]) \n"
|
| + "lwx $t8, $t6(%[src]) \n"
|
| + "lwx $t9, $t7(%[src]) \n"
|
| +
|
| +// t0 = | 34 | 24 | 14 | 04 |
|
| +// t1 = | 35 | 25 | 15 | 05 |
|
| +// t8 = | 36 | 26 | 16 | 06 |
|
| +// t9 = | 37 | 27 | 17 | 07 |
|
| +
|
| + "precr.qb.ph $s0, $t1, $t0 \n"
|
| + "precr.qb.ph $s1, $t9, $t8 \n"
|
| + "precrq.qb.ph $s2, $t1, $t0 \n"
|
| + "precrq.qb.ph $s3, $t9, $t8 \n"
|
| +
|
| + // s0 = | 25 | 05 | 24 | 04 |
|
| + // s1 = | 27 | 07 | 26 | 06 |
|
| + // s2 = | 35 | 15 | 34 | 14 |
|
| + // s3 = | 37 | 17 | 36 | 16 |
|
| +
|
| + "precr.qb.ph $t0, $s1, $s0 \n"
|
| + "precrq.qb.ph $t1, $s1, $s0 \n"
|
| + "precr.qb.ph $t8, $s3, $s2 \n"
|
| + "precrq.qb.ph $t9, $s3, $s2 \n"
|
| +
|
| + // t0 = | 07 | 06 | 05 | 04 |
|
| + // t1 = | 27 | 26 | 25 | 24 |
|
| + // t8 = | 17 | 16 | 15 | 14 |
|
| + // t9 = | 37 | 36 | 35 | 34 |
|
| +
|
| + "addu $s0, %[dst], %[dst_stride] \n"
|
| + "addu $s1, $s0, %[dst_stride] \n"
|
| + "addu $s2, $s1, %[dst_stride] \n"
|
| +
|
| + "sw $s4, 0(%[dst]) \n"
|
| + "sw $t0, 4(%[dst]) \n"
|
| + "sw $s6, 0($s0) \n"
|
| + "sw $t8, 4($s0) \n"
|
| + "sw $s5, 0($s1) \n"
|
| + "sw $t1, 4($s1) \n"
|
| + "sw $s7, 0($s2) \n"
|
| + "sw $t9, 4($s2) \n"
|
| +
|
| + "addiu $AT, -1 \n"
|
| + "addiu %[src], 4 \n"
|
| +
|
| + "bnez $AT, 1b \n"
|
| + " addu %[dst], $s2, %[dst_stride] \n"
|
| + "b 2f \n"
|
| +//dst + dst_stride unaligned
|
| + "11: \n"
|
| + "lw $t0, 0(%[src]) \n"
|
| + "lwx $t1, %[src_stride](%[src]) \n"
|
| + "lwx $t8, $t2(%[src]) \n"
|
| + "lwx $t9, $t3(%[src]) \n"
|
| +
|
| +// t0 = | 30 | 20 | 10 | 00 |
|
| +// t1 = | 31 | 21 | 11 | 01 |
|
| +// t8 = | 32 | 22 | 12 | 02 |
|
| +// t9 = | 33 | 23 | 13 | 03 |
|
| +
|
| + "precr.qb.ph $s0, $t1, $t0 \n"
|
| + "precr.qb.ph $s1, $t9, $t8 \n"
|
| + "precrq.qb.ph $s2, $t1, $t0 \n"
|
| + "precrq.qb.ph $s3, $t9, $t8 \n"
|
| +
|
| + // s0 = | 21 | 01 | 20 | 00 |
|
| + // s1 = | 23 | 03 | 22 | 02 |
|
| + // s2 = | 31 | 11 | 30 | 10 |
|
| + // s3 = | 33 | 13 | 32 | 12 |
|
| +
|
| + "precr.qb.ph $s4, $s1, $s0 \n"
|
| + "precrq.qb.ph $s5, $s1, $s0 \n"
|
| + "precr.qb.ph $s6, $s3, $s2 \n"
|
| + "precrq.qb.ph $s7, $s3, $s2 \n"
|
| +
|
| + // s4 = | 03 | 02 | 01 | 00 |
|
| + // s5 = | 23 | 22 | 21 | 20 |
|
| + // s6 = | 13 | 12 | 11 | 10 |
|
| + // s7 = | 33 | 32 | 31 | 30 |
|
| +
|
| + "lwx $t0, $t4(%[src]) \n"
|
| + "lwx $t1, $t5(%[src]) \n"
|
| + "lwx $t8, $t6(%[src]) \n"
|
| + "lwx $t9, $t7(%[src]) \n"
|
| +
|
| +// t0 = | 34 | 24 | 14 | 04 |
|
| +// t1 = | 35 | 25 | 15 | 05 |
|
| +// t8 = | 36 | 26 | 16 | 06 |
|
| +// t9 = | 37 | 27 | 17 | 07 |
|
| +
|
| + "precr.qb.ph $s0, $t1, $t0 \n"
|
| + "precr.qb.ph $s1, $t9, $t8 \n"
|
| + "precrq.qb.ph $s2, $t1, $t0 \n"
|
| + "precrq.qb.ph $s3, $t9, $t8 \n"
|
| +
|
| + // s0 = | 25 | 05 | 24 | 04 |
|
| + // s1 = | 27 | 07 | 26 | 06 |
|
| + // s2 = | 35 | 15 | 34 | 14 |
|
| + // s3 = | 37 | 17 | 36 | 16 |
|
| +
|
| + "precr.qb.ph $t0, $s1, $s0 \n"
|
| + "precrq.qb.ph $t1, $s1, $s0 \n"
|
| + "precr.qb.ph $t8, $s3, $s2 \n"
|
| + "precrq.qb.ph $t9, $s3, $s2 \n"
|
| +
|
| + // t0 = | 07 | 06 | 05 | 04 |
|
| + // t1 = | 27 | 26 | 25 | 24 |
|
| + // t8 = | 17 | 16 | 15 | 14 |
|
| + // t9 = | 37 | 36 | 35 | 34 |
|
| +
|
| + "addu $s0, %[dst], %[dst_stride] \n"
|
| + "addu $s1, $s0, %[dst_stride] \n"
|
| + "addu $s2, $s1, %[dst_stride] \n"
|
| +
|
| + "swr $s4, 0(%[dst]) \n"
|
| + "swl $s4, 3(%[dst]) \n"
|
| + "swr $t0, 4(%[dst]) \n"
|
| + "swl $t0, 7(%[dst]) \n"
|
| + "swr $s6, 0($s0) \n"
|
| + "swl $s6, 3($s0) \n"
|
| + "swr $t8, 4($s0) \n"
|
| + "swl $t8, 7($s0) \n"
|
| + "swr $s5, 0($s1) \n"
|
| + "swl $s5, 3($s1) \n"
|
| + "swr $t1, 4($s1) \n"
|
| + "swl $t1, 7($s1) \n"
|
| + "swr $s7, 0($s2) \n"
|
| + "swl $s7, 3($s2) \n"
|
| + "swr $t9, 4($s2) \n"
|
| + "swl $t9, 7($s2) \n"
|
| +
|
| + "addiu $AT, -1 \n"
|
| + "addiu %[src], 4 \n"
|
| +
|
| + "bnez $AT, 11b \n"
|
| + " addu %[dst], $s2, %[dst_stride] \n"
|
| + "2: \n"
|
| + ".set pop \n"
|
| + ".set at \n"
|
| + :[src] "+r" (src),
|
| + [dst] "+r" (dst),
|
| + [width] "+r" (width)
|
| + :[src_stride] "r" (src_stride),
|
| + [dst_stride] "r" (dst_stride)
|
| + : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9",
|
| + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7"
|
| + );
|
| +}
|
| +
|
| +void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
|
| + uint8* dst_a, int dst_stride_a,
|
| + uint8* dst_b, int dst_stride_b,
|
| + int width) {
|
| + __asm__ __volatile__ (
|
| + ".set push \n"
|
| + ".set noreorder \n"
|
| + "beqz %[width], 2f \n"
|
| + " sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
|
| + "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
|
| + "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
|
| + "addu $t3, $t2, %[src_stride] \n"
|
| + "addu $t5, $t4, %[src_stride] \n"
|
| + "addu $t6, $t2, $t4 \n"
|
| + "subu $t7, $t9, %[src_stride] \n"
|
| + "srl $t1, %[width], 1 \n"
|
| +
|
| +// check word aligment for dst_a, dst_b, dst_stride_a and dst_stride_b
|
| + "andi $t0, %[dst_a], 0x3 \n"
|
| + "andi $t8, %[dst_b], 0x3 \n"
|
| + "or $t0, $t0, $t8 \n"
|
| + "andi $t8, %[dst_stride_a], 0x3 \n"
|
| + "andi $s5, %[dst_stride_b], 0x3 \n"
|
| + "or $t8, $t8, $s5 \n"
|
| + "or $t0, $t0, $t8 \n"
|
| + "bnez $t0, 11f \n"
|
| + " nop \n"
|
| +// dst + dst_stride word aligned (both, a & b dst addresses)
|
| + "1: \n"
|
| + "lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
|
| + "lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
|
| + "addu $s5, %[dst_a], %[dst_stride_a] \n"
|
| + "lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
|
| + "lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
|
| + "addu $s6, %[dst_b], %[dst_stride_b] \n"
|
| +
|
| + "precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
|
| + "precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
|
| + "precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
|
| + "precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
|
| +
|
| + "sll $t0, $t0, 16 \n"
|
| + "packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
|
| + "sll $t9, $t9, 16 \n"
|
| + "packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
|
| +
|
| + "sw $s3, 0($s5) \n"
|
| + "sw $s4, 0($s6) \n"
|
| +
|
| + "precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
|
| + "precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
|
| +
|
| + "lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
|
| + "lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
|
| + "lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
|
| + "lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
|
| + "sw $s3, 0(%[dst_a]) \n"
|
| + "sw $s4, 0(%[dst_b]) \n"
|
| +
|
| + "precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
|
| + "precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
|
| + "precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
|
| + "precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
|
| +
|
| + "sll $t0, $t0, 16 \n"
|
| + "packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
|
| + "sll $t9, $t9, 16 \n"
|
| + "packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
|
| + "sw $s3, 4($s5) \n"
|
| + "sw $s4, 4($s6) \n"
|
| +
|
| + "precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
|
| + "precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
|
| +
|
| + "addiu %[src], 4 \n"
|
| + "addiu $t1, -1 \n"
|
| + "sll $t0, %[dst_stride_a], 1 \n"
|
| + "sll $t8, %[dst_stride_b], 1 \n"
|
| + "sw $s3, 4(%[dst_a]) \n"
|
| + "sw $s4, 4(%[dst_b]) \n"
|
| + "addu %[dst_a], %[dst_a], $t0 \n"
|
| + "bnez $t1, 1b \n"
|
| + " addu %[dst_b], %[dst_b], $t8 \n"
|
| + "b 2f \n"
|
| + " nop \n"
|
| +
|
| +// dst_a or dst_b or dst_stride_a or dst_stride_b not word aligned
|
| + "11: \n"
|
| + "lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
|
| + "lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
|
| + "addu $s5, %[dst_a], %[dst_stride_a] \n"
|
| + "lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
|
| + "lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
|
| + "addu $s6, %[dst_b], %[dst_stride_b] \n"
|
| +
|
| + "precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
|
| + "precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
|
| + "precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
|
| + "precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
|
| +
|
| + "sll $t0, $t0, 16 \n"
|
| + "packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
|
| + "sll $t9, $t9, 16 \n"
|
| + "packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
|
| +
|
| + "swr $s3, 0($s5) \n"
|
| + "swl $s3, 3($s5) \n"
|
| + "swr $s4, 0($s6) \n"
|
| + "swl $s4, 3($s6) \n"
|
| +
|
| + "precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
|
| + "precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
|
| +
|
| + "lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
|
| + "lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
|
| + "lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
|
| + "lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
|
| + "swr $s3, 0(%[dst_a]) \n"
|
| + "swl $s3, 3(%[dst_a]) \n"
|
| + "swr $s4, 0(%[dst_b]) \n"
|
| + "swl $s4, 3(%[dst_b]) \n"
|
| +
|
| + "precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
|
| + "precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
|
| + "precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
|
| + "precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
|
| +
|
| + "sll $t0, $t0, 16 \n"
|
| + "packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
|
| + "sll $t9, $t9, 16 \n"
|
| + "packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
|
| +
|
| + "swr $s3, 4($s5) \n"
|
| + "swl $s3, 7($s5) \n"
|
| + "swr $s4, 4($s6) \n"
|
| + "swl $s4, 7($s6) \n"
|
| +
|
| + "precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
|
| + "precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
|
| +
|
| + "addiu %[src], 4 \n"
|
| + "addiu $t1, -1 \n"
|
| + "sll $t0, %[dst_stride_a], 1 \n"
|
| + "sll $t8, %[dst_stride_b], 1 \n"
|
| + "swr $s3, 4(%[dst_a]) \n"
|
| + "swl $s3, 7(%[dst_a]) \n"
|
| + "swr $s4, 4(%[dst_b]) \n"
|
| + "swl $s4, 7(%[dst_b]) \n"
|
| + "addu %[dst_a], %[dst_a], $t0 \n"
|
| + "bnez $t1, 11b \n"
|
| + " addu %[dst_b], %[dst_b], $t8 \n"
|
| +
|
| + "2: \n"
|
| + ".set pop \n"
|
| + : [src] "+r" (src),
|
| + [dst_a] "+r" (dst_a),
|
| + [dst_b] "+r" (dst_b),
|
| + [width] "+r" (width),
|
| + [src_stride] "+r" (src_stride)
|
| + : [dst_stride_a] "r" (dst_stride_a),
|
| + [dst_stride_b] "r" (dst_stride_b)
|
| + : "t0", "t1", "t2", "t3", "t4", "t5",
|
| + "t6", "t7", "t8", "t9",
|
| + "s0", "s1", "s2", "s3",
|
| + "s4", "s5", "s6"
|
| + );
|
| +}
|
| +
|
| +#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
|
| +
|
| +#ifdef __cplusplus
|
| +} // extern "C"
|
| +} // namespace libyuv
|
| +#endif
|
|
|