| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2012 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2012 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include "libyuv/basic_types.h" | 11 #include "libyuv/basic_types.h" |
| 12 #include "libyuv/row.h" | 12 #include "libyuv/row.h" |
| 13 | 13 |
| 14 #ifdef __cplusplus | 14 #ifdef __cplusplus |
| 15 namespace libyuv { | 15 namespace libyuv { |
| 16 extern "C" { | 16 extern "C" { |
| 17 #endif | 17 #endif |
| 18 | 18 |
| 19 // This module is for GCC MIPS DSPR2 | 19 // This module is for GCC MIPS DSPR2 |
| 20 #if !defined(LIBYUV_DISABLE_MIPS) && \ | 20 #if !defined(LIBYUV_DISABLE_MIPS) && \ |
| 21 defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \ | 21 defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \ |
| 22 (_MIPS_SIM == _MIPS_SIM_ABI32) | 22 (_MIPS_SIM == _MIPS_SIM_ABI32) |
| 23 | 23 |
| 24 void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | 24 void ScaleRowDown2_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, |
| 25 uint8* dst, int dst_width) { | 25 uint8* dst, int dst_width) { |
| 26 __asm__ __volatile__( | 26 __asm__ __volatile__( |
| 27 ".set push \n" | 27 ".set push \n" |
| 28 ".set noreorder \n" | 28 ".set noreorder \n" |
| 29 | 29 |
| 30 "srl $t9, %[dst_width], 4 \n" // iterations -> by 16 | 30 "srl $t9, %[dst_width], 4 \n" // iterations -> by 16 |
| 31 "beqz $t9, 2f \n" | 31 "beqz $t9, 2f \n" |
| 32 " nop \n" | 32 " nop \n" |
| 33 | 33 |
| 34 "1: \n" | 34 "1: \n" |
| 35 "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| | 35 "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 70 "3: \n" | 70 "3: \n" |
| 71 ".set pop \n" | 71 ".set pop \n" |
| 72 : [src_ptr] "+r" (src_ptr), | 72 : [src_ptr] "+r" (src_ptr), |
| 73 [dst] "+r" (dst) | 73 [dst] "+r" (dst) |
| 74 : [dst_width] "r" (dst_width) | 74 : [dst_width] "r" (dst_width) |
| 75 : "t0", "t1", "t2", "t3", "t4", "t5", | 75 : "t0", "t1", "t2", "t3", "t4", "t5", |
| 76 "t6", "t7", "t8", "t9" | 76 "t6", "t7", "t8", "t9" |
| 77 ); | 77 ); |
| 78 } | 78 } |
| 79 | 79 |
| 80 void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | 80 void ScaleRowDown2Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, |
| 81 uint8* dst, int dst_width) { | 81 uint8* dst, int dst_width) { |
| 82 const uint8* t = src_ptr + src_stride; | 82 const uint8* t = src_ptr + src_stride; |
| 83 | 83 |
| 84 __asm__ __volatile__ ( | 84 __asm__ __volatile__ ( |
| 85 ".set push \n" | 85 ".set push \n" |
| 86 ".set noreorder \n" | 86 ".set noreorder \n" |
| 87 | 87 |
| 88 "srl $t9, %[dst_width], 3 \n" // iterations -> step 8 | 88 "srl $t9, %[dst_width], 3 \n" // iterations -> step 8 |
| 89 "bltz $t9, 2f \n" | 89 "bltz $t9, 2f \n" |
| 90 " nop \n" | 90 " nop \n" |
| 91 | 91 |
| (...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 169 ".set pop \n" | 169 ".set pop \n" |
| 170 | 170 |
| 171 : [src_ptr] "+r" (src_ptr), | 171 : [src_ptr] "+r" (src_ptr), |
| 172 [dst] "+r" (dst), [t] "+r" (t) | 172 [dst] "+r" (dst), [t] "+r" (t) |
| 173 : [dst_width] "r" (dst_width) | 173 : [dst_width] "r" (dst_width) |
| 174 : "t0", "t1", "t2", "t3", "t4", "t5", | 174 : "t0", "t1", "t2", "t3", "t4", "t5", |
| 175 "t6", "t7", "t8", "t9" | 175 "t6", "t7", "t8", "t9" |
| 176 ); | 176 ); |
| 177 } | 177 } |
| 178 | 178 |
| 179 void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | 179 void ScaleRowDown4_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, |
| 180 uint8* dst, int dst_width) { | 180 uint8* dst, int dst_width) { |
| 181 __asm__ __volatile__ ( | 181 __asm__ __volatile__ ( |
| 182 ".set push \n" | 182 ".set push \n" |
| 183 ".set noreorder \n" | 183 ".set noreorder \n" |
| 184 | 184 |
| 185 "srl $t9, %[dst_width], 3 \n" | 185 "srl $t9, %[dst_width], 3 \n" |
| 186 "beqz $t9, 2f \n" | 186 "beqz $t9, 2f \n" |
| 187 " nop \n" | 187 " nop \n" |
| 188 | 188 |
| 189 "1: \n" | 189 "1: \n" |
| 190 "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0| | 190 "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0| |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 224 "3: \n" | 224 "3: \n" |
| 225 ".set pop \n" | 225 ".set pop \n" |
| 226 : [src_ptr] "+r" (src_ptr), | 226 : [src_ptr] "+r" (src_ptr), |
| 227 [dst] "+r" (dst) | 227 [dst] "+r" (dst) |
| 228 : [dst_width] "r" (dst_width) | 228 : [dst_width] "r" (dst_width) |
| 229 : "t1", "t2", "t3", "t4", "t5", | 229 : "t1", "t2", "t3", "t4", "t5", |
| 230 "t6", "t7", "t8", "t9" | 230 "t6", "t7", "t8", "t9" |
| 231 ); | 231 ); |
| 232 } | 232 } |
| 233 | 233 |
| 234 void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | 234 void ScaleRowDown4Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, |
| 235 uint8* dst, int dst_width) { | 235 uint8* dst, int dst_width) { |
| 236 intptr_t stride = src_stride; | 236 intptr_t stride = src_stride; |
| 237 const uint8* s1 = src_ptr + stride; | 237 const uint8* s1 = src_ptr + stride; |
| 238 const uint8* s2 = s1 + stride; | 238 const uint8* s2 = s1 + stride; |
| 239 const uint8* s3 = s2 + stride; | 239 const uint8* s3 = s2 + stride; |
| 240 | 240 |
| 241 __asm__ __volatile__ ( | 241 __asm__ __volatile__ ( |
| 242 ".set push \n" | 242 ".set push \n" |
| 243 ".set noreorder \n" | 243 ".set noreorder \n" |
| 244 | 244 |
| 245 "srl $t9, %[dst_width], 1 \n" | 245 "srl $t9, %[dst_width], 1 \n" |
| (...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 303 [dst] "+r" (dst), | 303 [dst] "+r" (dst), |
| 304 [s1] "+r" (s1), | 304 [s1] "+r" (s1), |
| 305 [s2] "+r" (s2), | 305 [s2] "+r" (s2), |
| 306 [s3] "+r" (s3) | 306 [s3] "+r" (s3) |
| 307 : [dst_width] "r" (dst_width) | 307 : [dst_width] "r" (dst_width) |
| 308 : "t0", "t1", "t2", "t3", "t4", "t5", | 308 : "t0", "t1", "t2", "t3", "t4", "t5", |
| 309 "t6","t7", "t8", "t9" | 309 "t6","t7", "t8", "t9" |
| 310 ); | 310 ); |
| 311 } | 311 } |
| 312 | 312 |
| 313 void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | 313 void ScaleRowDown34_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, |
| 314 uint8* dst, int dst_width) { | 314 uint8* dst, int dst_width) { |
| 315 __asm__ __volatile__ ( | 315 __asm__ __volatile__ ( |
| 316 ".set push \n" | 316 ".set push \n" |
| 317 ".set noreorder \n" | 317 ".set noreorder \n" |
| 318 "1: \n" | 318 "1: \n" |
| 319 "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0| | 319 "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0| |
| 320 "lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4| | 320 "lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4| |
| 321 "lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8| | 321 "lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8| |
| 322 "lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12| | 322 "lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12| |
| 323 "lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16| | 323 "lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16| |
| 324 "lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20| | 324 "lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20| |
| (...skipping 24 matching lines...) Expand all Loading... |
| 349 ".set pop \n" | 349 ".set pop \n" |
| 350 : [src_ptr] "+r" (src_ptr), | 350 : [src_ptr] "+r" (src_ptr), |
| 351 [dst] "+r" (dst), | 351 [dst] "+r" (dst), |
| 352 [dst_width] "+r" (dst_width) | 352 [dst_width] "+r" (dst_width) |
| 353 : | 353 : |
| 354 : "t0", "t1", "t2", "t3", "t4", "t5", | 354 : "t0", "t1", "t2", "t3", "t4", "t5", |
| 355 "t6","t7", "t8", "t9" | 355 "t6","t7", "t8", "t9" |
| 356 ); | 356 ); |
| 357 } | 357 } |
| 358 | 358 |
| 359 void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | 359 void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, |
| 360 uint8* d, int dst_width) { | 360 uint8* d, int dst_width) { |
| 361 __asm__ __volatile__ ( | 361 __asm__ __volatile__ ( |
| 362 ".set push \n" | 362 ".set push \n" |
| 363 ".set noreorder \n" | 363 ".set noreorder \n" |
| 364 "repl.ph $t3, 3 \n" // 0x00030003 | 364 "repl.ph $t3, 3 \n" // 0x00030003 |
| 365 | 365 |
| 366 "1: \n" | 366 "1: \n" |
| 367 "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| | 367 "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| |
| 368 "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0| | 368 "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0| |
| 369 "rotr $t2, $t0, 8 \n" // |S0|S3|S2|S1| | 369 "rotr $t2, $t0, 8 \n" // |S0|S3|S2|S1| |
| 370 "rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1| | 370 "rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1| |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 405 : [src_ptr] "+r" (src_ptr), | 405 : [src_ptr] "+r" (src_ptr), |
| 406 [src_stride] "+r" (src_stride), | 406 [src_stride] "+r" (src_stride), |
| 407 [d] "+r" (d), | 407 [d] "+r" (d), |
| 408 [dst_width] "+r" (dst_width) | 408 [dst_width] "+r" (dst_width) |
| 409 : | 409 : |
| 410 : "t0", "t1", "t2", "t3", | 410 : "t0", "t1", "t2", "t3", |
| 411 "t4", "t5", "t6" | 411 "t4", "t5", "t6" |
| 412 ); | 412 ); |
| 413 } | 413 } |
| 414 | 414 |
| 415 void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | 415 void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, |
| 416 uint8* d, int dst_width) { | 416 uint8* d, int dst_width) { |
| 417 __asm__ __volatile__ ( | 417 __asm__ __volatile__ ( |
| 418 ".set push \n" | 418 ".set push \n" |
| 419 ".set noreorder \n" | 419 ".set noreorder \n" |
| 420 "repl.ph $t2, 3 \n" // 0x00030003 | 420 "repl.ph $t2, 3 \n" // 0x00030003 |
| 421 | 421 |
| 422 "1: \n" | 422 "1: \n" |
| 423 "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| | 423 "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| |
| 424 "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0| | 424 "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0| |
| 425 "rotr $t4, $t0, 8 \n" // |S0|S3|S2|S1| | 425 "rotr $t4, $t0, 8 \n" // |S0|S3|S2|S1| |
| 426 "rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1| | 426 "rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1| |
| (...skipping 30 matching lines...) Expand all Loading... |
| 457 : [src_ptr] "+r" (src_ptr), | 457 : [src_ptr] "+r" (src_ptr), |
| 458 [src_stride] "+r" (src_stride), | 458 [src_stride] "+r" (src_stride), |
| 459 [d] "+r" (d), | 459 [d] "+r" (d), |
| 460 [dst_width] "+r" (dst_width) | 460 [dst_width] "+r" (dst_width) |
| 461 : | 461 : |
| 462 : "t0", "t1", "t2", "t3", | 462 : "t0", "t1", "t2", "t3", |
| 463 "t4", "t5", "t6" | 463 "t4", "t5", "t6" |
| 464 ); | 464 ); |
| 465 } | 465 } |
| 466 | 466 |
| 467 void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | 467 void ScaleRowDown38_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, |
| 468 uint8* dst, int dst_width) { | 468 uint8* dst, int dst_width) { |
| 469 __asm__ __volatile__ ( | 469 __asm__ __volatile__ ( |
| 470 ".set push \n" | 470 ".set push \n" |
| 471 ".set noreorder \n" | 471 ".set noreorder \n" |
| 472 | 472 |
| 473 "1: \n" | 473 "1: \n" |
| 474 "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| | 474 "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| |
| 475 "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4| | 475 "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4| |
| 476 "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8| | 476 "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8| |
| 477 "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12| | 477 "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12| |
| 478 "lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16| | 478 "lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16| |
| (...skipping 24 matching lines...) Expand all Loading... |
| 503 ".set pop \n" | 503 ".set pop \n" |
| 504 : [src_ptr] "+r" (src_ptr), | 504 : [src_ptr] "+r" (src_ptr), |
| 505 [dst] "+r" (dst), | 505 [dst] "+r" (dst), |
| 506 [dst_width] "+r" (dst_width) | 506 [dst_width] "+r" (dst_width) |
| 507 : | 507 : |
| 508 : "t0", "t1", "t2", "t3", "t4", | 508 : "t0", "t1", "t2", "t3", "t4", |
| 509 "t5", "t6", "t7", "t8" | 509 "t5", "t6", "t7", "t8" |
| 510 ); | 510 ); |
| 511 } | 511 } |
| 512 | 512 |
| 513 void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, | 513 void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, |
| 514 uint8* dst_ptr, int dst_width) { | 514 uint8* dst_ptr, int dst_width) { |
| 515 intptr_t stride = src_stride; | 515 intptr_t stride = src_stride; |
| 516 const uint8* t = src_ptr + stride; | 516 const uint8* t = src_ptr + stride; |
| 517 const int c = 0x2AAA; | 517 const int c = 0x2AAA; |
| 518 | 518 |
| 519 __asm__ __volatile__ ( | 519 __asm__ __volatile__ ( |
| 520 ".set push \n" | 520 ".set push \n" |
| 521 ".set noreorder \n" | 521 ".set noreorder \n" |
| 522 | 522 |
| 523 "1: \n" | 523 "1: \n" |
| 524 "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| | 524 "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 556 ".set pop \n" | 556 ".set pop \n" |
| 557 : [src_ptr] "+r" (src_ptr), | 557 : [src_ptr] "+r" (src_ptr), |
| 558 [dst_ptr] "+r" (dst_ptr), | 558 [dst_ptr] "+r" (dst_ptr), |
| 559 [t] "+r" (t), | 559 [t] "+r" (t), |
| 560 [dst_width] "+r" (dst_width) | 560 [dst_width] "+r" (dst_width) |
| 561 : [c] "r" (c) | 561 : [c] "r" (c) |
| 562 : "t0", "t1", "t2", "t3", "t4", "t5", "t6" | 562 : "t0", "t1", "t2", "t3", "t4", "t5", "t6" |
| 563 ); | 563 ); |
| 564 } | 564 } |
| 565 | 565 |
| 566 void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr, | 566 void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr, |
| 567 ptrdiff_t src_stride, | 567 ptrdiff_t src_stride, |
| 568 uint8* dst_ptr, int dst_width) { | 568 uint8* dst_ptr, int dst_width) { |
| 569 intptr_t stride = src_stride; | 569 intptr_t stride = src_stride; |
| 570 const uint8* s1 = src_ptr + stride; | 570 const uint8* s1 = src_ptr + stride; |
| 571 stride += stride; | 571 stride += stride; |
| 572 const uint8* s2 = src_ptr + stride; | 572 const uint8* s2 = src_ptr + stride; |
| 573 const int c1 = 0x1C71; | 573 const int c1 = 0x1C71; |
| 574 const int c2 = 0x2AAA; | 574 const int c2 = 0x2AAA; |
| 575 | 575 |
| 576 __asm__ __volatile__ ( | 576 __asm__ __volatile__ ( |
| 577 ".set push \n" | 577 ".set push \n" |
| 578 ".set noreorder \n" | 578 ".set noreorder \n" |
| (...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 635 ); | 635 ); |
| 636 } | 636 } |
| 637 | 637 |
| 638 #endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2) | 638 #endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2) |
| 639 | 639 |
| 640 #ifdef __cplusplus | 640 #ifdef __cplusplus |
| 641 } // extern "C" | 641 } // extern "C" |
| 642 } // namespace libyuv | 642 } // namespace libyuv |
| 643 #endif | 643 #endif |
| 644 | 644 |
| OLD | NEW |