Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(617)

Side by Side Diff: source/scale_mips.cc

Issue 2626123003: Libyuv MIPS DSPR2 optimizations. (Closed)
Patch Set: Created 3 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/scale_any.cc ('k') | unit_test/convert_test.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2012 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2012 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 24 matching lines...) Expand all
35 "1: \n" 35 "1: \n"
36 "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| 36 "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
37 "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4| 37 "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
38 "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8| 38 "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
39 "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12| 39 "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
40 "lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16| 40 "lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16|
41 "lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20| 41 "lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20|
42 "lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24| 42 "lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
43 "lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28| 43 "lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
44 // TODO(fbarchard): Use odd pixels instead of even. 44 // TODO(fbarchard): Use odd pixels instead of even.
45 "precr.qb.ph $t8, $t1, $t0 \n" // |6|4|2|0| 45 "precrq.qb.ph $t8, $t1, $t0 \n" // |7|5|3|1|
46 "precr.qb.ph $t0, $t3, $t2 \n" // |14|12|10|8| 46 "precrq.qb.ph $t0, $t3, $t2 \n" // |15|13|11|9|
47 "precr.qb.ph $t1, $t5, $t4 \n" // |22|20|18|16| 47 "precrq.qb.ph $t1, $t5, $t4 \n" // |23|21|19|17|
48 "precr.qb.ph $t2, $t7, $t6 \n" // |30|28|26|24| 48 "precrq.qb.ph $t2, $t7, $t6 \n" // |31|29|27|25|
49 "addiu %[src_ptr], %[src_ptr], 32 \n" 49 "addiu %[src_ptr], %[src_ptr], 32 \n"
50 "addiu $t9, $t9, -1 \n" 50 "addiu $t9, $t9, -1 \n"
51 "sw $t8, 0(%[dst]) \n" 51 "sw $t8, 0(%[dst]) \n"
52 "sw $t0, 4(%[dst]) \n" 52 "sw $t0, 4(%[dst]) \n"
53 "sw $t1, 8(%[dst]) \n" 53 "sw $t1, 8(%[dst]) \n"
54 "sw $t2, 12(%[dst]) \n" 54 "sw $t2, 12(%[dst]) \n"
55 "bgtz $t9, 1b \n" 55 "bgtz $t9, 1b \n"
56 " addiu %[dst], %[dst], 16 \n" 56 " addiu %[dst], %[dst], 16 \n"
57 57
58 "2: \n" 58 "2: \n"
59 "andi $t9, %[dst_width], 0xf \n" // residue 59 "andi $t9, %[dst_width], 0xf \n" // residue
60 "beqz $t9, 3f \n" 60 "beqz $t9, 3f \n"
61 " nop \n" 61 " nop \n"
62 62
63 "21: \n" 63 "21: \n"
64 "lbu $t0, 0(%[src_ptr]) \n" 64 "lbu $t0, 1(%[src_ptr]) \n"
65 "addiu %[src_ptr], %[src_ptr], 2 \n" 65 "addiu %[src_ptr], %[src_ptr], 2 \n"
66 "addiu $t9, $t9, -1 \n" 66 "addiu $t9, $t9, -1 \n"
67 "sb $t0, 0(%[dst]) \n" 67 "sb $t0, 0(%[dst]) \n"
68 "bgtz $t9, 21b \n" 68 "bgtz $t9, 21b \n"
69 " addiu %[dst], %[dst], 1 \n" 69 " addiu %[dst], %[dst], 1 \n"
70 70
71 "3: \n" 71 "3: \n"
72 ".set pop \n" 72 ".set pop \n"
73 : [src_ptr] "+r"(src_ptr), [dst] "+r"(dst) 73 : [src_ptr] "+r"(src_ptr), [dst] "+r"(dst)
74 : [dst_width] "r"(dst_width) 74 : [dst_width] "r"(dst_width)
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after
191 "lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8| 191 "lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8|
192 "lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12| 192 "lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12|
193 "lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16| 193 "lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16|
194 "lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20| 194 "lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20|
195 "lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24| 195 "lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24|
196 "lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28| 196 "lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28|
197 "precr.qb.ph $t1, $t2, $t1 \n" // |6|4|2|0| 197 "precr.qb.ph $t1, $t2, $t1 \n" // |6|4|2|0|
198 "precr.qb.ph $t2, $t4, $t3 \n" // |14|12|10|8| 198 "precr.qb.ph $t2, $t4, $t3 \n" // |14|12|10|8|
199 "precr.qb.ph $t5, $t6, $t5 \n" // |22|20|18|16| 199 "precr.qb.ph $t5, $t6, $t5 \n" // |22|20|18|16|
200 "precr.qb.ph $t6, $t8, $t7 \n" // |30|28|26|24| 200 "precr.qb.ph $t6, $t8, $t7 \n" // |30|28|26|24|
201 "precr.qb.ph $t1, $t2, $t1 \n" // |12|8|4|0| 201 "precrq.qb.ph $t1, $t2, $t1 \n" // |14|10|6|2|
202 "precr.qb.ph $t5, $t6, $t5 \n" // |28|24|20|16| 202 "precrq.qb.ph $t5, $t6, $t5 \n" // |30|26|22|18|
203 "addiu %[src_ptr], %[src_ptr], 32 \n" 203 "addiu %[src_ptr], %[src_ptr], 32 \n"
204 "addiu $t9, $t9, -1 \n" 204 "addiu $t9, $t9, -1 \n"
205 "sw $t1, 0(%[dst]) \n" 205 "sw $t1, 0(%[dst]) \n"
206 "sw $t5, 4(%[dst]) \n" 206 "sw $t5, 4(%[dst]) \n"
207 "bgtz $t9, 1b \n" 207 "bgtz $t9, 1b \n"
208 " addiu %[dst], %[dst], 8 \n" 208 " addiu %[dst], %[dst], 8 \n"
209 209
210 "2: \n" 210 "2: \n"
211 "andi $t9, %[dst_width], 7 \n" // residue 211 "andi $t9, %[dst_width], 7 \n" // residue
212 "beqz $t9, 3f \n" 212 "beqz $t9, 3f \n"
213 " nop \n" 213 " nop \n"
214 214
215 "21: \n" 215 "21: \n"
216 "lbu $t1, 0(%[src_ptr]) \n" 216 "lbu $t1, 2(%[src_ptr]) \n"
217 "addiu %[src_ptr], %[src_ptr], 4 \n" 217 "addiu %[src_ptr], %[src_ptr], 4 \n"
218 "addiu $t9, $t9, -1 \n" 218 "addiu $t9, $t9, -1 \n"
219 "sb $t1, 0(%[dst]) \n" 219 "sb $t1, 0(%[dst]) \n"
220 "bgtz $t9, 21b \n" 220 "bgtz $t9, 21b \n"
221 " addiu %[dst], %[dst], 1 \n" 221 " addiu %[dst], %[dst], 1 \n"
222 222
223 "3: \n" 223 "3: \n"
224 ".set pop \n" 224 ".set pop \n"
225 : [src_ptr] "+r"(src_ptr), [dst] "+r"(dst) 225 : [src_ptr] "+r"(src_ptr), [dst] "+r"(dst)
226 : [dst_width] "r"(dst_width) 226 : [dst_width] "r"(dst_width)
(...skipping 381 matching lines...) Expand 10 before | Expand all | Expand 10 after
608 "sb $t7, -2(%[dst_ptr]) \n" 608 "sb $t7, -2(%[dst_ptr]) \n"
609 "bgtz %[dst_width], 1b \n" 609 "bgtz %[dst_width], 1b \n"
610 " sb $t0, -3(%[dst_ptr]) \n" 610 " sb $t0, -3(%[dst_ptr]) \n"
611 ".set pop \n" 611 ".set pop \n"
612 : [src_ptr] "+r"(src_ptr), [dst_ptr] "+r"(dst_ptr), [s1] "+r"(s1), 612 : [src_ptr] "+r"(src_ptr), [dst_ptr] "+r"(dst_ptr), [s1] "+r"(s1),
613 [s2] "+r"(s2), [dst_width] "+r"(dst_width) 613 [s2] "+r"(s2), [dst_width] "+r"(dst_width)
614 : [c1] "r"(c1), [c2] "r"(c2) 614 : [c1] "r"(c1), [c2] "r"(c2)
615 : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8"); 615 : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8");
616 } 616 }
617 617
618 void ScaleAddRow_DSPR2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
619 int x;
620 for (x = 0; x < ((src_width - 1)); x += 8) {
621 uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4;
622 uint32 tmp_t5, tmp_t6, tmp_t7, tmp_t8;
623 __asm__ __volatile__(
624 ".set push \n"
625 ".set noreorder \n"
626 "lw %[tmp_t5], 0(%[src_ptr]) \n"
627 "lw %[tmp_t6], 4(%[src_ptr]) \n"
628 "lw %[tmp_t1], 0(%[dst_ptr]) \n"
629 "lw %[tmp_t2], 4(%[dst_ptr]) \n"
630 "lw %[tmp_t3], 8(%[dst_ptr]) \n"
631 "lw %[tmp_t4], 12(%[dst_ptr]) \n"
632 "preceu.ph.qbr %[tmp_t7], %[tmp_t5] \n"
633 "preceu.ph.qbl %[tmp_t8], %[tmp_t5] \n"
634 "addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t7] \n"
635 "addu.ph %[tmp_t2], %[tmp_t2], %[tmp_t8] \n"
636 "preceu.ph.qbr %[tmp_t7], %[tmp_t6] \n"
637 "preceu.ph.qbl %[tmp_t8], %[tmp_t6] \n"
638 "addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t7] \n"
639 "addu.ph %[tmp_t4], %[tmp_t4], %[tmp_t8] \n"
640 "sw %[tmp_t1], 0(%[dst_ptr]) \n"
641 "sw %[tmp_t2], 4(%[dst_ptr]) \n"
642 "sw %[tmp_t3], 8(%[dst_ptr]) \n"
643 "sw %[tmp_t4], 12(%[dst_ptr]) \n"
644 ".set pop \n"
645 :
646 [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), [tmp_t3] "=&r"(tmp_t3),
647 [tmp_t4] "=&r"(tmp_t4), [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
648 [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [src_ptr] "+r"(src_ptr)
649 : [dst_ptr] "r"(dst_ptr));
650 src_ptr += 8;
651 dst_ptr += 8;
652 }
653
654 if ((src_width)&7) {
655 for (x = 0; x < ((src_width - 1) & 7); x += 1) {
656 dst_ptr[0] += src_ptr[0];
657 src_ptr += 1;
658 dst_ptr += 1;
659 }
660 }
661 }
662
618 #endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2) 663 #endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
619 664
620 #ifdef __cplusplus 665 #ifdef __cplusplus
621 } // extern "C" 666 } // extern "C"
622 } // namespace libyuv 667 } // namespace libyuv
623 #endif 668 #endif
OLDNEW
« no previous file with comments | « source/scale_any.cc ('k') | unit_test/convert_test.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698