OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2012 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2012 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include "libyuv/basic_types.h" | 11 #include "libyuv/basic_types.h" |
12 #include "libyuv/row.h" | 12 #include "libyuv/row.h" |
13 | 13 |
14 #ifdef __cplusplus | 14 #ifdef __cplusplus |
15 namespace libyuv { | 15 namespace libyuv { |
16 extern "C" { | 16 extern "C" { |
17 #endif | 17 #endif |
18 | 18 |
19 // This module is for GCC MIPS DSPR2 | 19 // This module is for GCC MIPS DSPR2 |
20 #if !defined(LIBYUV_DISABLE_MIPS) && \ | 20 #if !defined(LIBYUV_DISABLE_MIPS) && \ |
21 defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \ | 21 defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \ |
22 (_MIPS_SIM == _MIPS_SIM_ABI32) | 22 (_MIPS_SIM == _MIPS_SIM_ABI32) |
23 | 23 |
24 void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | 24 void ScaleRowDown2_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, |
25 uint8* dst, int dst_width) { | 25 uint8* dst, int dst_width) { |
26 __asm__ __volatile__( | 26 __asm__ __volatile__( |
27 ".set push \n" | 27 ".set push \n" |
28 ".set noreorder \n" | 28 ".set noreorder \n" |
29 | 29 |
30 "srl $t9, %[dst_width], 4 \n" // iterations -> by 16 | 30 "srl $t9, %[dst_width], 4 \n" // iterations -> by 16 |
31 "beqz $t9, 2f \n" | 31 "beqz $t9, 2f \n" |
32 " nop \n" | 32 " nop \n" |
33 | 33 |
34 "1: \n" | 34 "1: \n" |
35 "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| | 35 "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
70 "3: \n" | 70 "3: \n" |
71 ".set pop \n" | 71 ".set pop \n" |
72 : [src_ptr] "+r" (src_ptr), | 72 : [src_ptr] "+r" (src_ptr), |
73 [dst] "+r" (dst) | 73 [dst] "+r" (dst) |
74 : [dst_width] "r" (dst_width) | 74 : [dst_width] "r" (dst_width) |
75 : "t0", "t1", "t2", "t3", "t4", "t5", | 75 : "t0", "t1", "t2", "t3", "t4", "t5", |
76 "t6", "t7", "t8", "t9" | 76 "t6", "t7", "t8", "t9" |
77 ); | 77 ); |
78 } | 78 } |
79 | 79 |
80 void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | 80 void ScaleRowDown2Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, |
81 uint8* dst, int dst_width) { | 81 uint8* dst, int dst_width) { |
82 const uint8* t = src_ptr + src_stride; | 82 const uint8* t = src_ptr + src_stride; |
83 | 83 |
84 __asm__ __volatile__ ( | 84 __asm__ __volatile__ ( |
85 ".set push \n" | 85 ".set push \n" |
86 ".set noreorder \n" | 86 ".set noreorder \n" |
87 | 87 |
88 "srl $t9, %[dst_width], 3 \n" // iterations -> step 8 | 88 "srl $t9, %[dst_width], 3 \n" // iterations -> step 8 |
89 "bltz $t9, 2f \n" | 89 "bltz $t9, 2f \n" |
90 " nop \n" | 90 " nop \n" |
91 | 91 |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
169 ".set pop \n" | 169 ".set pop \n" |
170 | 170 |
171 : [src_ptr] "+r" (src_ptr), | 171 : [src_ptr] "+r" (src_ptr), |
172 [dst] "+r" (dst), [t] "+r" (t) | 172 [dst] "+r" (dst), [t] "+r" (t) |
173 : [dst_width] "r" (dst_width) | 173 : [dst_width] "r" (dst_width) |
174 : "t0", "t1", "t2", "t3", "t4", "t5", | 174 : "t0", "t1", "t2", "t3", "t4", "t5", |
175 "t6", "t7", "t8", "t9" | 175 "t6", "t7", "t8", "t9" |
176 ); | 176 ); |
177 } | 177 } |
178 | 178 |
179 void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | 179 void ScaleRowDown4_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, |
180 uint8* dst, int dst_width) { | 180 uint8* dst, int dst_width) { |
181 __asm__ __volatile__ ( | 181 __asm__ __volatile__ ( |
182 ".set push \n" | 182 ".set push \n" |
183 ".set noreorder \n" | 183 ".set noreorder \n" |
184 | 184 |
185 "srl $t9, %[dst_width], 3 \n" | 185 "srl $t9, %[dst_width], 3 \n" |
186 "beqz $t9, 2f \n" | 186 "beqz $t9, 2f \n" |
187 " nop \n" | 187 " nop \n" |
188 | 188 |
189 "1: \n" | 189 "1: \n" |
190 "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0| | 190 "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0| |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
224 "3: \n" | 224 "3: \n" |
225 ".set pop \n" | 225 ".set pop \n" |
226 : [src_ptr] "+r" (src_ptr), | 226 : [src_ptr] "+r" (src_ptr), |
227 [dst] "+r" (dst) | 227 [dst] "+r" (dst) |
228 : [dst_width] "r" (dst_width) | 228 : [dst_width] "r" (dst_width) |
229 : "t1", "t2", "t3", "t4", "t5", | 229 : "t1", "t2", "t3", "t4", "t5", |
230 "t6", "t7", "t8", "t9" | 230 "t6", "t7", "t8", "t9" |
231 ); | 231 ); |
232 } | 232 } |
233 | 233 |
234 void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | 234 void ScaleRowDown4Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, |
235 uint8* dst, int dst_width) { | 235 uint8* dst, int dst_width) { |
236 intptr_t stride = src_stride; | 236 intptr_t stride = src_stride; |
237 const uint8* s1 = src_ptr + stride; | 237 const uint8* s1 = src_ptr + stride; |
238 const uint8* s2 = s1 + stride; | 238 const uint8* s2 = s1 + stride; |
239 const uint8* s3 = s2 + stride; | 239 const uint8* s3 = s2 + stride; |
240 | 240 |
241 __asm__ __volatile__ ( | 241 __asm__ __volatile__ ( |
242 ".set push \n" | 242 ".set push \n" |
243 ".set noreorder \n" | 243 ".set noreorder \n" |
244 | 244 |
245 "srl $t9, %[dst_width], 1 \n" | 245 "srl $t9, %[dst_width], 1 \n" |
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
303 [dst] "+r" (dst), | 303 [dst] "+r" (dst), |
304 [s1] "+r" (s1), | 304 [s1] "+r" (s1), |
305 [s2] "+r" (s2), | 305 [s2] "+r" (s2), |
306 [s3] "+r" (s3) | 306 [s3] "+r" (s3) |
307 : [dst_width] "r" (dst_width) | 307 : [dst_width] "r" (dst_width) |
308 : "t0", "t1", "t2", "t3", "t4", "t5", | 308 : "t0", "t1", "t2", "t3", "t4", "t5", |
309 "t6","t7", "t8", "t9" | 309 "t6","t7", "t8", "t9" |
310 ); | 310 ); |
311 } | 311 } |
312 | 312 |
313 void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | 313 void ScaleRowDown34_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, |
314 uint8* dst, int dst_width) { | 314 uint8* dst, int dst_width) { |
315 __asm__ __volatile__ ( | 315 __asm__ __volatile__ ( |
316 ".set push \n" | 316 ".set push \n" |
317 ".set noreorder \n" | 317 ".set noreorder \n" |
318 "1: \n" | 318 "1: \n" |
319 "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0| | 319 "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0| |
320 "lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4| | 320 "lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4| |
321 "lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8| | 321 "lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8| |
322 "lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12| | 322 "lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12| |
323 "lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16| | 323 "lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16| |
324 "lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20| | 324 "lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20| |
(...skipping 24 matching lines...) Expand all Loading... |
349 ".set pop \n" | 349 ".set pop \n" |
350 : [src_ptr] "+r" (src_ptr), | 350 : [src_ptr] "+r" (src_ptr), |
351 [dst] "+r" (dst), | 351 [dst] "+r" (dst), |
352 [dst_width] "+r" (dst_width) | 352 [dst_width] "+r" (dst_width) |
353 : | 353 : |
354 : "t0", "t1", "t2", "t3", "t4", "t5", | 354 : "t0", "t1", "t2", "t3", "t4", "t5", |
355 "t6","t7", "t8", "t9" | 355 "t6","t7", "t8", "t9" |
356 ); | 356 ); |
357 } | 357 } |
358 | 358 |
359 void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | 359 void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, |
360 uint8* d, int dst_width) { | 360 uint8* d, int dst_width) { |
361 __asm__ __volatile__ ( | 361 __asm__ __volatile__ ( |
362 ".set push \n" | 362 ".set push \n" |
363 ".set noreorder \n" | 363 ".set noreorder \n" |
364 "repl.ph $t3, 3 \n" // 0x00030003 | 364 "repl.ph $t3, 3 \n" // 0x00030003 |
365 | 365 |
366 "1: \n" | 366 "1: \n" |
367 "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| | 367 "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| |
368 "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0| | 368 "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0| |
369 "rotr $t2, $t0, 8 \n" // |S0|S3|S2|S1| | 369 "rotr $t2, $t0, 8 \n" // |S0|S3|S2|S1| |
370 "rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1| | 370 "rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1| |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
405 : [src_ptr] "+r" (src_ptr), | 405 : [src_ptr] "+r" (src_ptr), |
406 [src_stride] "+r" (src_stride), | 406 [src_stride] "+r" (src_stride), |
407 [d] "+r" (d), | 407 [d] "+r" (d), |
408 [dst_width] "+r" (dst_width) | 408 [dst_width] "+r" (dst_width) |
409 : | 409 : |
410 : "t0", "t1", "t2", "t3", | 410 : "t0", "t1", "t2", "t3", |
411 "t4", "t5", "t6" | 411 "t4", "t5", "t6" |
412 ); | 412 ); |
413 } | 413 } |
414 | 414 |
415 void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | 415 void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, |
416 uint8* d, int dst_width) { | 416 uint8* d, int dst_width) { |
417 __asm__ __volatile__ ( | 417 __asm__ __volatile__ ( |
418 ".set push \n" | 418 ".set push \n" |
419 ".set noreorder \n" | 419 ".set noreorder \n" |
420 "repl.ph $t2, 3 \n" // 0x00030003 | 420 "repl.ph $t2, 3 \n" // 0x00030003 |
421 | 421 |
422 "1: \n" | 422 "1: \n" |
423 "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| | 423 "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| |
424 "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0| | 424 "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0| |
425 "rotr $t4, $t0, 8 \n" // |S0|S3|S2|S1| | 425 "rotr $t4, $t0, 8 \n" // |S0|S3|S2|S1| |
426 "rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1| | 426 "rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1| |
(...skipping 30 matching lines...) Expand all Loading... |
457 : [src_ptr] "+r" (src_ptr), | 457 : [src_ptr] "+r" (src_ptr), |
458 [src_stride] "+r" (src_stride), | 458 [src_stride] "+r" (src_stride), |
459 [d] "+r" (d), | 459 [d] "+r" (d), |
460 [dst_width] "+r" (dst_width) | 460 [dst_width] "+r" (dst_width) |
461 : | 461 : |
462 : "t0", "t1", "t2", "t3", | 462 : "t0", "t1", "t2", "t3", |
463 "t4", "t5", "t6" | 463 "t4", "t5", "t6" |
464 ); | 464 ); |
465 } | 465 } |
466 | 466 |
467 void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | 467 void ScaleRowDown38_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, |
468 uint8* dst, int dst_width) { | 468 uint8* dst, int dst_width) { |
469 __asm__ __volatile__ ( | 469 __asm__ __volatile__ ( |
470 ".set push \n" | 470 ".set push \n" |
471 ".set noreorder \n" | 471 ".set noreorder \n" |
472 | 472 |
473 "1: \n" | 473 "1: \n" |
474 "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| | 474 "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| |
475 "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4| | 475 "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4| |
476 "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8| | 476 "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8| |
477 "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12| | 477 "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12| |
478 "lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16| | 478 "lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16| |
(...skipping 24 matching lines...) Expand all Loading... |
503 ".set pop \n" | 503 ".set pop \n" |
504 : [src_ptr] "+r" (src_ptr), | 504 : [src_ptr] "+r" (src_ptr), |
505 [dst] "+r" (dst), | 505 [dst] "+r" (dst), |
506 [dst_width] "+r" (dst_width) | 506 [dst_width] "+r" (dst_width) |
507 : | 507 : |
508 : "t0", "t1", "t2", "t3", "t4", | 508 : "t0", "t1", "t2", "t3", "t4", |
509 "t5", "t6", "t7", "t8" | 509 "t5", "t6", "t7", "t8" |
510 ); | 510 ); |
511 } | 511 } |
512 | 512 |
513 void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | 513 void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, |
514 uint8* dst_ptr, int dst_width) { | 514 uint8* dst_ptr, int dst_width) { |
515 intptr_t stride = src_stride; | 515 intptr_t stride = src_stride; |
516 const uint8* t = src_ptr + stride; | 516 const uint8* t = src_ptr + stride; |
517 const int c = 0x2AAA; | 517 const int c = 0x2AAA; |
518 | 518 |
519 __asm__ __volatile__ ( | 519 __asm__ __volatile__ ( |
520 ".set push \n" | 520 ".set push \n" |
521 ".set noreorder \n" | 521 ".set noreorder \n" |
522 | 522 |
523 "1: \n" | 523 "1: \n" |
524 "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| | 524 "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
556 ".set pop \n" | 556 ".set pop \n" |
557 : [src_ptr] "+r" (src_ptr), | 557 : [src_ptr] "+r" (src_ptr), |
558 [dst_ptr] "+r" (dst_ptr), | 558 [dst_ptr] "+r" (dst_ptr), |
559 [t] "+r" (t), | 559 [t] "+r" (t), |
560 [dst_width] "+r" (dst_width) | 560 [dst_width] "+r" (dst_width) |
561 : [c] "r" (c) | 561 : [c] "r" (c) |
562 : "t0", "t1", "t2", "t3", "t4", "t5", "t6" | 562 : "t0", "t1", "t2", "t3", "t4", "t5", "t6" |
563 ); | 563 ); |
564 } | 564 } |
565 | 565 |
566 void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr, | 566 void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr, |
567 ptrdiff_t src_stride, | 567 ptrdiff_t src_stride, |
568 uint8* dst_ptr, int dst_width) { | 568 uint8* dst_ptr, int dst_width) { |
569 intptr_t stride = src_stride; | 569 intptr_t stride = src_stride; |
570 const uint8* s1 = src_ptr + stride; | 570 const uint8* s1 = src_ptr + stride; |
571 stride += stride; | 571 stride += stride; |
572 const uint8* s2 = src_ptr + stride; | 572 const uint8* s2 = src_ptr + stride; |
573 const int c1 = 0x1C71; | 573 const int c1 = 0x1C71; |
574 const int c2 = 0x2AAA; | 574 const int c2 = 0x2AAA; |
575 | 575 |
576 __asm__ __volatile__ ( | 576 __asm__ __volatile__ ( |
577 ".set push \n" | 577 ".set push \n" |
578 ".set noreorder \n" | 578 ".set noreorder \n" |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
635 ); | 635 ); |
636 } | 636 } |
637 | 637 |
638 #endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2) | 638 #endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2) |
639 | 639 |
640 #ifdef __cplusplus | 640 #ifdef __cplusplus |
641 } // extern "C" | 641 } // extern "C" |
642 } // namespace libyuv | 642 } // namespace libyuv |
643 #endif | 643 #endif |
644 | 644 |
OLD | NEW |