| OLD | NEW | 
|---|
| 1 /* | 1 /* | 
| 2  *  Copyright (c) 2012 The LibYuv project authors. All Rights Reserved. | 2  *  Copyright (c) 2012 The LibYuv project authors. All Rights Reserved. | 
| 3  * | 3  * | 
| 4  *  Use of this source code is governed by a BSD-style license | 4  *  Use of this source code is governed by a BSD-style license | 
| 5  *  that can be found in the LICENSE file in the root of the source | 5  *  that can be found in the LICENSE file in the root of the source | 
| 6  *  tree. An additional intellectual property rights grant can be found | 6  *  tree. An additional intellectual property rights grant can be found | 
| 7  *  in the file PATENTS. All contributing project authors may | 7  *  in the file PATENTS. All contributing project authors may | 
| 8  *  be found in the AUTHORS file in the root of the source tree. | 8  *  be found in the AUTHORS file in the root of the source tree. | 
| 9  */ | 9  */ | 
| 10 | 10 | 
| (...skipping 567 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 578       " addiu          %[dst_v], %[dst_v], 1        \n" | 578       " addiu          %[dst_v], %[dst_v], 1        \n" | 
| 579 | 579 | 
| 580       "3:                                            \n" | 580       "3:                                            \n" | 
| 581       ".set pop                                     \n" | 581       ".set pop                                     \n" | 
| 582       : [src_uv] "+r"(src_uv), [dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v), | 582       : [src_uv] "+r"(src_uv), [dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v), | 
| 583         [x] "=&r"(x), [y] "=&r"(y) | 583         [x] "=&r"(x), [y] "=&r"(y) | 
| 584       : [width] "r"(width) | 584       : [width] "r"(width) | 
| 585       : "t0", "t1", "t2", "t3", "t4", "t5", "t7", "t8", "t9"); | 585       : "t0", "t1", "t2", "t3", "t4", "t5", "t7", "t8", "t9"); | 
| 586 } | 586 } | 
| 587 | 587 | 
| 588 // Convert (4 Y and 2 VU) I422 and arrange RGB values into | 588 void I422ToARGBRow_DSPR2(const uint8* src_y, | 
| 589 // t5 = | 0 | B0 | 0 | b0 | | 589                          const uint8* src_u, | 
| 590 // t4 = | 0 | B1 | 0 | b1 | | 590                          const uint8* src_v, | 
| 591 // t9 = | 0 | G0 | 0 | g0 | |  | 
| 592 // t8 = | 0 | G1 | 0 | g1 | |  | 
| 593 // t2 = | 0 | R0 | 0 | r0 | |  | 
| 594 // t1 = | 0 | R1 | 0 | r1 | |  | 
| 595 #define YUVTORGB                                \ |  | 
| 596   "lw                $t0, 0(%[y_buf])       \n" \ |  | 
| 597   "lhu               $t1, 0(%[u_buf])       \n" \ |  | 
| 598   "lhu               $t2, 0(%[v_buf])       \n" \ |  | 
| 599   "preceu.ph.qbr     $t1, $t1               \n" \ |  | 
| 600   "preceu.ph.qbr     $t2, $t2               \n" \ |  | 
| 601   "preceu.ph.qbra    $t3, $t0               \n" \ |  | 
| 602   "preceu.ph.qbla    $t0, $t0               \n" \ |  | 
| 603   "subu.ph           $t1, $t1, $s5          \n" \ |  | 
| 604   "subu.ph           $t2, $t2, $s5          \n" \ |  | 
| 605   "subu.ph           $t3, $t3, $s4          \n" \ |  | 
| 606   "subu.ph           $t0, $t0, $s4          \n" \ |  | 
| 607   "mul.ph            $t3, $t3, $s0          \n" \ |  | 
| 608   "mul.ph            $t0, $t0, $s0          \n" \ |  | 
| 609   "shll.ph           $t4, $t1, 0x7          \n" \ |  | 
| 610   "subu.ph           $t4, $t4, $t1          \n" \ |  | 
| 611   "mul.ph            $t6, $t1, $s1          \n" \ |  | 
| 612   "mul.ph            $t1, $t2, $s2          \n" \ |  | 
| 613   "addq_s.ph         $t5, $t4, $t3          \n" \ |  | 
| 614   "addq_s.ph         $t4, $t4, $t0          \n" \ |  | 
| 615   "shra.ph           $t5, $t5, 6            \n" \ |  | 
| 616   "shra.ph           $t4, $t4, 6            \n" \ |  | 
| 617   "addiu             %[u_buf], 2            \n" \ |  | 
| 618   "addiu             %[v_buf], 2            \n" \ |  | 
| 619   "addu.ph           $t6, $t6, $t1          \n" \ |  | 
| 620   "mul.ph            $t1, $t2, $s3          \n" \ |  | 
| 621   "addu.ph           $t9, $t6, $t3          \n" \ |  | 
| 622   "addu.ph           $t8, $t6, $t0          \n" \ |  | 
| 623   "shra.ph           $t9, $t9, 6            \n" \ |  | 
| 624   "shra.ph           $t8, $t8, 6            \n" \ |  | 
| 625   "addu.ph           $t2, $t1, $t3          \n" \ |  | 
| 626   "addu.ph           $t1, $t1, $t0          \n" \ |  | 
| 627   "shra.ph           $t2, $t2, 6            \n" \ |  | 
| 628   "shra.ph           $t1, $t1, 6            \n" \ |  | 
| 629   "subu.ph           $t5, $t5, $s5          \n" \ |  | 
| 630   "subu.ph           $t4, $t4, $s5          \n" \ |  | 
| 631   "subu.ph           $t9, $t9, $s5          \n" \ |  | 
| 632   "subu.ph           $t8, $t8, $s5          \n" \ |  | 
| 633   "subu.ph           $t2, $t2, $s5          \n" \ |  | 
| 634   "subu.ph           $t1, $t1, $s5          \n" \ |  | 
| 635   "shll_s.ph         $t5, $t5, 8            \n" \ |  | 
| 636   "shll_s.ph         $t4, $t4, 8            \n" \ |  | 
| 637   "shll_s.ph         $t9, $t9, 8            \n" \ |  | 
| 638   "shll_s.ph         $t8, $t8, 8            \n" \ |  | 
| 639   "shll_s.ph         $t2, $t2, 8            \n" \ |  | 
| 640   "shll_s.ph         $t1, $t1, 8            \n" \ |  | 
| 641   "shra.ph           $t5, $t5, 8            \n" \ |  | 
| 642   "shra.ph           $t4, $t4, 8            \n" \ |  | 
| 643   "shra.ph           $t9, $t9, 8            \n" \ |  | 
| 644   "shra.ph           $t8, $t8, 8            \n" \ |  | 
| 645   "shra.ph           $t2, $t2, 8            \n" \ |  | 
| 646   "shra.ph           $t1, $t1, 8            \n" \ |  | 
| 647   "addu.ph           $t5, $t5, $s5          \n" \ |  | 
| 648   "addu.ph           $t4, $t4, $s5          \n" \ |  | 
| 649   "addu.ph           $t9, $t9, $s5          \n" \ |  | 
| 650   "addu.ph           $t8, $t8, $s5          \n" \ |  | 
| 651   "addu.ph           $t2, $t2, $s5          \n" \ |  | 
| 652   "addu.ph           $t1, $t1, $s5          \n" |  | 
| 653 |  | 
| 654 // TODO(fbarchard): accept yuv conversion constants. |  | 
| 655 void I422ToARGBRow_DSPR2(const uint8* y_buf, |  | 
| 656                          const uint8* u_buf, |  | 
| 657                          const uint8* v_buf, |  | 
| 658                          uint8* rgb_buf, | 591                          uint8* rgb_buf, | 
| 659                          const struct YuvConstants* yuvconstants, | 592                          const struct YuvConstants* yuvconstants, | 
| 660                          int width) { | 593                          int width) { | 
| 661   __asm__ __volatile__( | 594   int x; | 
| 662       ".set push                                \n" | 595   uint32 tmp_ub = yuvconstants->kUVToB[0]; | 
| 663       ".set noreorder                           \n" | 596   uint32 tmp_ug = yuvconstants->kUVToG[0]; | 
| 664       "beqz              %[width], 2f           \n" | 597   uint32 tmp_vg = yuvconstants->kUVToG[1]; | 
| 665       " repl.ph          $s0, 74                \n"  // |YG|YG| = |74|74| | 598   uint32 tmp_vr = yuvconstants->kUVToR[1]; | 
| 666       "repl.ph           $s1, -25               \n"  // |UG|UG| = |-25|-25| | 599   uint32 tmp_bb = yuvconstants->kUVBiasB[0]; | 
| 667       "repl.ph           $s2, -52               \n"  // |VG|VG| = |-52|-52| | 600   uint32 tmp_bg = yuvconstants->kUVBiasG[0]; | 
| 668       "repl.ph           $s3, 102               \n"  // |VR|VR| = |102|102| | 601   uint32 tmp_br = yuvconstants->kUVBiasR[0]; | 
| 669       "repl.ph           $s4, 16                \n"  // |0|16|0|16| | 602   uint32 yg = yuvconstants->kYToRgb[0]; | 
| 670       "repl.ph           $s5, 128               \n"  // |128|128| // clipping | 603   uint32 tmp_yg; | 
| 671       "lui               $s6, 0xff00            \n" | 604   uint32 tmp_mask = 0x7fff7fff; | 
| 672       "ori               $s6, 0xff00            \n"  // |ff|00|ff|00|ff| | 605   tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff); | 
|  | 606   tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff); | 
|  | 607   tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff); | 
|  | 608   tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff); | 
|  | 609   tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001; | 
|  | 610   tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff); | 
|  | 611   tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff); | 
|  | 612   tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001; | 
|  | 613   yg = yg * 0x0101; | 
| 673 | 614 | 
| 674       "1:                                        \n" YUVTORGB | 615   for (x = 0; x < width - 1; x += 2) { | 
| 675       // Arranging into argb format | 616     uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; | 
| 676       "precr.qb.ph       $t4, $t8, $t4          \n"  // |G1|g1|B1|b1| | 617     uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9; | 
| 677       "precr.qb.ph       $t5, $t9, $t5          \n"  // |G0|g0|B0|b0| | 618     __asm__ __volatile__( | 
| 678       "addiu             %[width], -4           \n" | 619         ".set push                                             \n" | 
| 679       "precrq.qb.ph      $t8, $t4, $t5          \n"  // |G1|B1|G0|B0| | 620         ".set noreorder                                        \n" | 
| 680       "precr.qb.ph       $t9, $t4, $t5          \n"  // |g1|b1|g0|b0| | 621         "lbu              %[tmp_t7], 0(%[src_y])               \n" | 
| 681       "precr.qb.ph       $t2, $t1, $t2          \n"  // |R1|r1|R0|r0| | 622         "lbu              %[tmp_t1], 1(%[src_y])               \n" | 
| 682 | 623         "mul              %[tmp_t7], %[tmp_t7],     %[yg]      \n" | 
| 683       "addiu             %[y_buf], 4            \n" | 624         "mul              %[tmp_t1], %[tmp_t1],     %[yg]      \n" | 
| 684       "preceu.ph.qbla    $t1, $t2               \n"  // |0 |R1|0 |R0| | 625         "lbu              %[tmp_t2], 0(%[src_u])               \n" | 
| 685       "preceu.ph.qbra    $t2, $t2               \n"  // |0 |r1|0 |r0| | 626         "lbu              %[tmp_t3], 0(%[src_v])               \n" | 
| 686       "or                $t1, $t1, $s6          \n"  // |ff|R1|ff|R0| | 627         "replv.ph         %[tmp_t2], %[tmp_t2]                 \n" | 
| 687       "or                $t2, $t2, $s6          \n"  // |ff|r1|ff|r0| | 628         "replv.ph         %[tmp_t3], %[tmp_t3]                 \n" | 
| 688       "precrq.ph.w       $t0, $t2, $t9          \n"  // |ff|r1|g1|b1| | 629         "mul.ph           %[tmp_t4], %[tmp_t2],     %[tmp_ub]  \n" | 
| 689       "precrq.ph.w       $t3, $t1, $t8          \n"  // |ff|R1|G1|B1| | 630         "mul.ph           %[tmp_t5], %[tmp_t2],     %[tmp_ug]  \n" | 
| 690       "sll               $t9, $t9, 16           \n" | 631         "mul.ph           %[tmp_t6], %[tmp_t3],     %[tmp_vr]  \n" | 
| 691       "sll               $t8, $t8, 16           \n" | 632         "mul.ph           %[tmp_t3], %[tmp_t3],     %[tmp_vg]  \n" | 
| 692       "packrl.ph         $t2, $t2, $t9          \n"  // |ff|r0|g0|b0| | 633         "srl              %[tmp_t7], %[tmp_t7],     16         \n" | 
| 693       "packrl.ph         $t1, $t1, $t8          \n"  // |ff|R0|G0|B0| | 634         "ins              %[tmp_t1], %[tmp_t7],     0,      16 \n" | 
| 694                                                      // Store results. | 635         "addq_s.ph        %[tmp_t7], %[tmp_t1],     %[tmp_bb]  \n" | 
| 695       "sw                $t2, 0(%[rgb_buf])     \n" | 636         "addq_s.ph        %[tmp_t8], %[tmp_t1],     %[tmp_bg]  \n" | 
| 696       "sw                $t0, 4(%[rgb_buf])     \n" | 637         "addq_s.ph        %[tmp_t9], %[tmp_t1],     %[tmp_br]  \n" | 
| 697       "sw                $t1, 8(%[rgb_buf])     \n" | 638         "addq_s.ph        %[tmp_t5], %[tmp_t5],     %[tmp_t3]  \n" | 
| 698       "sw                $t3, 12(%[rgb_buf])    \n" | 639         "addq_s.ph        %[tmp_t7], %[tmp_t7],     %[tmp_t4]  \n" | 
| 699       "bnez              %[width], 1b           \n" | 640         "subq_s.ph        %[tmp_t8], %[tmp_t8],     %[tmp_t5]  \n" | 
| 700       " addiu            %[rgb_buf], 16         \n" | 641         "addq_s.ph        %[tmp_t9], %[tmp_t9],     %[tmp_t6]  \n" | 
| 701       "2:                                        \n" | 642         "shra.ph          %[tmp_t7], %[tmp_t7],     6          \n" | 
| 702       ".set pop                                 \n" | 643         "shra.ph          %[tmp_t8], %[tmp_t8],     6          \n" | 
| 703       : [y_buf] "+r"(y_buf), [u_buf] "+r"(u_buf), [v_buf] "+r"(v_buf), | 644         "shra.ph          %[tmp_t9], %[tmp_t9],     6          \n" | 
| 704         [width] "+r"(width), [rgb_buf] "+r"(rgb_buf) | 645         "shll_s.ph        %[tmp_t7], %[tmp_t7],     7          \n" | 
| 705       : | 646         "shll_s.ph        %[tmp_t8], %[tmp_t8],     7          \n" | 
| 706       : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "s0", "s1", | 647         "shll_s.ph        %[tmp_t9], %[tmp_t9],     7          \n" | 
| 707         "s2", "s3", "s4", "s5", "s6"); | 648         "precrqu_s.qb.ph  %[tmp_t8], %[tmp_mask],   %[tmp_t8]  \n" | 
|  | 649         "precrqu_s.qb.ph  %[tmp_t7], %[tmp_t9],     %[tmp_t7]  \n" | 
|  | 650         "precrq.ph.w      %[tmp_t9], %[tmp_t8],     %[tmp_t7]  \n" | 
|  | 651         "ins              %[tmp_t7], %[tmp_t8],     16,     16 \n" | 
|  | 652         "precr.qb.ph      %[tmp_t8], %[tmp_t9],     %[tmp_t7]  \n" | 
|  | 653         "precrq.qb.ph     %[tmp_t7], %[tmp_t9],     %[tmp_t7]  \n" | 
|  | 654         "sw               %[tmp_t8], 0(%[rgb_buf])             \n" | 
|  | 655         "sw               %[tmp_t7], 4(%[rgb_buf])             \n" | 
|  | 656         ".set pop                                              \n" | 
|  | 657         : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), | 
|  | 658           [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), | 
|  | 659           [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), | 
|  | 660           [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9) | 
|  | 661         : [src_y] "r"(src_y), [src_u] "r"(src_u), [src_v] "r"(src_v), | 
|  | 662           [tmp_ub] "r"(tmp_ub), [tmp_ug] "r"(tmp_ug), [yg] "r"(yg), | 
|  | 663           [tmp_vg] "r"(tmp_vg), [tmp_vr] "r"(tmp_vr), [tmp_bb] "r"(tmp_bb), | 
|  | 664           [tmp_bg] "r"(tmp_bg), [tmp_br] "r"(tmp_br), [tmp_yg] "r"(tmp_yg), | 
|  | 665           [rgb_buf] "r"(rgb_buf), [tmp_mask] "r"(tmp_mask)); | 
|  | 666     src_y += 2; | 
|  | 667     src_u += 1; | 
|  | 668     src_v += 1; | 
|  | 669     rgb_buf += 8;  // Advance 4 pixels. | 
|  | 670   } | 
| 708 } | 671 } | 
| 709 | 672 | 
| 710 // Bilinear filter 8x2 -> 8x1 | 673 // Bilinear filter 8x2 -> 8x1 | 
| 711 void InterpolateRow_DSPR2(uint8* dst_ptr, | 674 void InterpolateRow_DSPR2(uint8* dst_ptr, | 
| 712                           const uint8* src_ptr, | 675                           const uint8* src_ptr, | 
| 713                           ptrdiff_t src_stride, | 676                           ptrdiff_t src_stride, | 
| 714                           int dst_width, | 677                           int dst_width, | 
| 715                           int source_y_fraction) { | 678                           int source_y_fraction) { | 
| 716   int y0_fraction = 256 - source_y_fraction; | 679   int y0_fraction = 256 - source_y_fraction; | 
| 717   const uint8* src_ptr1 = src_ptr + src_stride; | 680   const uint8* src_ptr1 = src_ptr + src_stride; | 
| (...skipping 15 matching lines...) Expand all  Loading... | 
| 733       "muleu_s.ph.qbl    $t8, $t3, $t1                     \n" | 696       "muleu_s.ph.qbl    $t8, $t3, $t1                     \n" | 
| 734       "muleu_s.ph.qbr    $t9, $t3, $t1                     \n" | 697       "muleu_s.ph.qbr    $t9, $t3, $t1                     \n" | 
| 735       "muleu_s.ph.qbl    $t2, $t4, $t0                     \n" | 698       "muleu_s.ph.qbl    $t2, $t4, $t0                     \n" | 
| 736       "muleu_s.ph.qbr    $t3, $t4, $t0                     \n" | 699       "muleu_s.ph.qbr    $t3, $t4, $t0                     \n" | 
| 737       "muleu_s.ph.qbl    $t4, $t5, $t1                     \n" | 700       "muleu_s.ph.qbl    $t4, $t5, $t1                     \n" | 
| 738       "muleu_s.ph.qbr    $t5, $t5, $t1                     \n" | 701       "muleu_s.ph.qbr    $t5, $t5, $t1                     \n" | 
| 739       "addq.ph           $t6, $t6, $t8                     \n" | 702       "addq.ph           $t6, $t6, $t8                     \n" | 
| 740       "addq.ph           $t7, $t7, $t9                     \n" | 703       "addq.ph           $t7, $t7, $t9                     \n" | 
| 741       "addq.ph           $t2, $t2, $t4                     \n" | 704       "addq.ph           $t2, $t2, $t4                     \n" | 
| 742       "addq.ph           $t3, $t3, $t5                     \n" | 705       "addq.ph           $t3, $t3, $t5                     \n" | 
| 743       "shra.ph           $t6, $t6, 8                       \n" | 706       "shra_r.ph         $t6, $t6, 8                       \n" | 
| 744       "shra.ph           $t7, $t7, 8                       \n" | 707       "shra_r.ph         $t7, $t7, 8                       \n" | 
| 745       "shra.ph           $t2, $t2, 8                       \n" | 708       "shra_r.ph         $t2, $t2, 8                       \n" | 
| 746       "shra.ph           $t3, $t3, 8                       \n" | 709       "shra_r.ph         $t3, $t3, 8                       \n" | 
| 747       "precr.qb.ph       $t6, $t6, $t7                     \n" | 710       "precr.qb.ph       $t6, $t6, $t7                     \n" | 
| 748       "precr.qb.ph       $t2, $t2, $t3                     \n" | 711       "precr.qb.ph       $t2, $t2, $t3                     \n" | 
| 749       "addiu             %[src_ptr], %[src_ptr], 8         \n" | 712       "addiu             %[src_ptr], %[src_ptr], 8         \n" | 
| 750       "addiu             %[src_ptr1], %[src_ptr1], 8       \n" | 713       "addiu             %[src_ptr1], %[src_ptr1], 8       \n" | 
| 751       "addiu             %[dst_width], %[dst_width], -8    \n" | 714       "addiu             %[dst_width], %[dst_width], -8    \n" | 
| 752       "sw                $t6, 0(%[dst_ptr])                \n" | 715       "sw                $t6, 0(%[dst_ptr])                \n" | 
| 753       "sw                $t2, 4(%[dst_ptr])                \n" | 716       "sw                $t2, 4(%[dst_ptr])                \n" | 
| 754       "bgtz              %[dst_width], 1b                  \n" | 717       "bgtz              %[dst_width], 1b                  \n" | 
| 755       " addiu            %[dst_ptr], %[dst_ptr], 8         \n" | 718       " addiu            %[dst_ptr], %[dst_ptr], 8         \n" | 
| 756 | 719 | 
| 757       ".set pop                                            \n" | 720       ".set pop                                            \n" | 
| 758       : [dst_ptr] "+r"(dst_ptr), [src_ptr1] "+r"(src_ptr1), | 721       : [dst_ptr] "+r"(dst_ptr), [src_ptr1] "+r"(src_ptr1), | 
| 759         [src_ptr] "+r"(src_ptr), [dst_width] "+r"(dst_width) | 722         [src_ptr] "+r"(src_ptr), [dst_width] "+r"(dst_width) | 
| 760       : [source_y_fraction] "r"(source_y_fraction), | 723       : [source_y_fraction] "r"(source_y_fraction), | 
| 761         [y0_fraction] "r"(y0_fraction), [src_stride] "r"(src_stride) | 724         [y0_fraction] "r"(y0_fraction), [src_stride] "r"(src_stride) | 
| 762       : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"); | 725       : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"); | 
| 763 } | 726 } | 
|  | 727 #include <stdio.h> | 
|  | 728 void RGB24ToARGBRow_DSPR2(const uint8* src_rgb24, uint8* dst_argb, int width) { | 
|  | 729   int x; | 
|  | 730   uint32 tmp_mask = 0xff; | 
|  | 731   uint32 tmp_t1; | 
|  | 732   for (x = 0; x < (width - 1); ++x) { | 
|  | 733     __asm__ __volatile__( | 
|  | 734         ".set push                                                  \n" | 
|  | 735         ".set noreorder                                             \n" | 
|  | 736         "ulw             %[tmp_t1],    0(%[src_rgb24])              \n" | 
|  | 737         "addiu           %[dst_argb],  %[dst_argb],     4           \n" | 
|  | 738         "addiu           %[src_rgb24], %[src_rgb24],    3           \n" | 
|  | 739         "ins             %[tmp_t1],    %[tmp_mask],     24,    8    \n" | 
|  | 740         "sw              %[tmp_t1],    -4(%[dst_argb])              \n" | 
|  | 741         ".set pop                                                   \n" | 
|  | 742         : [src_rgb24] "+r"(src_rgb24), [dst_argb] "+r"(dst_argb), | 
|  | 743           [tmp_t1] "=&r"(tmp_t1) | 
|  | 744         : [tmp_mask] "r"(tmp_mask) | 
|  | 745         : "memory"); | 
|  | 746   } | 
|  | 747   uint8 b = src_rgb24[0]; | 
|  | 748   uint8 g = src_rgb24[1]; | 
|  | 749   uint8 r = src_rgb24[2]; | 
|  | 750   dst_argb[0] = b; | 
|  | 751   dst_argb[1] = g; | 
|  | 752   dst_argb[2] = r; | 
|  | 753   dst_argb[3] = 255u; | 
|  | 754 } | 
|  | 755 | 
|  | 756 void RAWToARGBRow_DSPR2(const uint8* src_raw, uint8* dst_argb, int width) { | 
|  | 757   int x; | 
|  | 758   uint32 tmp_mask = 0xff; | 
|  | 759   uint32 tmp_t1, tmp_t2; | 
|  | 760   for (x = 0; x < (width - 1); ++x) { | 
|  | 761     __asm__ __volatile__( | 
|  | 762         ".set push                                               \n" | 
|  | 763         ".set noreorder                                          \n" | 
|  | 764         "ulw               %[tmp_t1],   0(%[src_raw])            \n" | 
|  | 765         "addiu             %[dst_argb], %[dst_argb],      4      \n" | 
|  | 766         "addiu             %[src_raw],  %[src_raw],       3      \n" | 
|  | 767         "srl               %[tmp_t2],   %[tmp_t1],        16     \n" | 
|  | 768         "ins               %[tmp_t1],   %[tmp_mask],      24, 8  \n" | 
|  | 769         "ins               %[tmp_t1],   %[tmp_t1],        16, 8  \n" | 
|  | 770         "ins               %[tmp_t1],   %[tmp_t2],        0,  8  \n" | 
|  | 771         "sw                %[tmp_t1],   -4(%[dst_argb])          \n" | 
|  | 772         ".set pop                                                \n" | 
|  | 773         : [src_raw] "+r"(src_raw), [dst_argb] "+r"(dst_argb), | 
|  | 774           [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2) | 
|  | 775         : [tmp_mask] "r"(tmp_mask) | 
|  | 776         : "memory"); | 
|  | 777   } | 
|  | 778   uint8 r = src_raw[0]; | 
|  | 779   uint8 g = src_raw[1]; | 
|  | 780   uint8 b = src_raw[2]; | 
|  | 781   dst_argb[0] = b; | 
|  | 782   dst_argb[1] = g; | 
|  | 783   dst_argb[2] = r; | 
|  | 784   dst_argb[3] = 255u; | 
|  | 785 } | 
|  | 786 | 
|  | 787 void RGB565ToARGBRow_DSPR2(const uint8* src_rgb565, | 
|  | 788                            uint8* dst_argb, | 
|  | 789                            int width) { | 
|  | 790   int x; | 
|  | 791   uint32 tmp_mask = 0xff; | 
|  | 792   uint32 tmp_t1, tmp_t2, tmp_t3; | 
|  | 793   for (x = 0; x < width; ++x) { | 
|  | 794     __asm__ __volatile__( | 
|  | 795         ".set push                                                   \n" | 
|  | 796         ".set noreorder                                              \n" | 
|  | 797         "lhu               %[tmp_t1],     0(%[src_rgb565])           \n" | 
|  | 798         "addiu             %[dst_argb],   %[dst_argb],      4        \n" | 
|  | 799         "addiu             %[src_rgb565], %[src_rgb565],    2        \n" | 
|  | 800         "sll               %[tmp_t2],     %[tmp_t1],        8        \n" | 
|  | 801         "ins               %[tmp_t2],     %[tmp_mask],      24,8     \n" | 
|  | 802         "ins               %[tmp_t2],     %[tmp_t1],        3, 16    \n" | 
|  | 803         "ins               %[tmp_t2],     %[tmp_t1],        5, 11    \n" | 
|  | 804         "srl               %[tmp_t3],     %[tmp_t1],        9        \n" | 
|  | 805         "ins               %[tmp_t2],     %[tmp_t3],        8, 2     \n" | 
|  | 806         "ins               %[tmp_t2],     %[tmp_t1],        3, 5     \n" | 
|  | 807         "srl               %[tmp_t3],     %[tmp_t1],        2        \n" | 
|  | 808         "ins               %[tmp_t2],     %[tmp_t3],        0, 3     \n" | 
|  | 809         "sw                %[tmp_t2],     -4(%[dst_argb])            \n" | 
|  | 810         ".set pop                                                    \n" | 
|  | 811         : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), | 
|  | 812           [tmp_t3] "=&r"(tmp_t3), [src_rgb565] "+r"(src_rgb565), | 
|  | 813           [dst_argb] "+r"(dst_argb) | 
|  | 814         : [tmp_mask] "r"(tmp_mask)); | 
|  | 815   } | 
|  | 816 } | 
|  | 817 | 
|  | 818 void ARGB1555ToARGBRow_DSPR2(const uint8* src_argb1555, | 
|  | 819                              uint8* dst_argb, | 
|  | 820                              int width) { | 
|  | 821   int x; | 
|  | 822   uint32 tmp_t1, tmp_t2, tmp_t3; | 
|  | 823   for (x = 0; x < width; ++x) { | 
|  | 824     __asm__ __volatile__( | 
|  | 825         ".set push                                                   \n" | 
|  | 826         ".set noreorder                                              \n" | 
|  | 827         "lh                %[tmp_t1],       0(%[src_argb1555])       \n" | 
|  | 828         "addiu             %[dst_argb],     %[dst_argb],      4      \n" | 
|  | 829         "addiu             %[src_argb1555], %[src_argb1555],  2      \n" | 
|  | 830         "sll               %[tmp_t2],       %[tmp_t1],        9      \n" | 
|  | 831         "ins               %[tmp_t2],       %[tmp_t1],        4, 15  \n" | 
|  | 832         "ins               %[tmp_t2],       %[tmp_t1],        6, 10  \n" | 
|  | 833         "srl               %[tmp_t3],       %[tmp_t1],        7      \n" | 
|  | 834         "ins               %[tmp_t2],       %[tmp_t3],        8, 3   \n" | 
|  | 835         "ins               %[tmp_t2],       %[tmp_t1],        3, 5   \n" | 
|  | 836         "srl               %[tmp_t3],       %[tmp_t1],        2      \n" | 
|  | 837         "ins               %[tmp_t2],       %[tmp_t3],        0, 3   \n" | 
|  | 838         "sw                %[tmp_t2],       -4(%[dst_argb])          \n" | 
|  | 839         ".set pop                                                    \n" | 
|  | 840         : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), | 
|  | 841           [tmp_t3] "=&r"(tmp_t3), [src_argb1555] "+r"(src_argb1555), | 
|  | 842           [dst_argb] "+r"(dst_argb) | 
|  | 843         :); | 
|  | 844   } | 
|  | 845 } | 
|  | 846 | 
|  | 847 void ARGB4444ToARGBRow_DSPR2(const uint8* src_argb4444, | 
|  | 848                              uint8* dst_argb, | 
|  | 849                              int width) { | 
|  | 850   int x; | 
|  | 851   uint32 tmp_t1; | 
|  | 852   for (x = 0; x < width; ++x) { | 
|  | 853     __asm__ __volatile__( | 
|  | 854         ".set push                                                    \n" | 
|  | 855         ".set noreorder                                               \n" | 
|  | 856         "lh                %[tmp_t1],       0(%[src_argb4444])        \n" | 
|  | 857         "addiu             %[dst_argb],     %[dst_argb],       4      \n" | 
|  | 858         "addiu             %[src_argb4444], %[src_argb4444],   2      \n" | 
|  | 859         "ins               %[tmp_t1],       %[tmp_t1],         16, 16 \n" | 
|  | 860         "ins               %[tmp_t1],       %[tmp_t1],         12, 16 \n" | 
|  | 861         "ins               %[tmp_t1],       %[tmp_t1],         8,  12 \n" | 
|  | 862         "ins               %[tmp_t1],       %[tmp_t1],         4,  8  \n" | 
|  | 863         "sw                %[tmp_t1],       -4(%[dst_argb])           \n" | 
|  | 864         ".set pop                                                     \n" | 
|  | 865         : [src_argb4444] "+r"(src_argb4444), [dst_argb] "+r"(dst_argb), | 
|  | 866           [tmp_t1] "=&r"(tmp_t1)); | 
|  | 867   } | 
|  | 868 } | 
|  | 869 | 
|  | 870 void I444ToARGBRow_DSPR2(const uint8* y_buf, | 
|  | 871                          const uint8* u_buf, | 
|  | 872                          const uint8* v_buf, | 
|  | 873                          uint8* rgb_buf, | 
|  | 874                          const struct YuvConstants* yuvconstants, | 
|  | 875                          int width) { | 
|  | 876   int x; | 
|  | 877   uint32 tmp_ub = yuvconstants->kUVToB[0]; | 
|  | 878   uint32 tmp_ug = yuvconstants->kUVToG[0]; | 
|  | 879   uint32 tmp_vg = yuvconstants->kUVToG[1]; | 
|  | 880   uint32 tmp_vr = yuvconstants->kUVToR[1]; | 
|  | 881   uint32 tmp_bb = yuvconstants->kUVBiasB[0]; | 
|  | 882   uint32 tmp_bg = yuvconstants->kUVBiasG[0]; | 
|  | 883   uint32 tmp_br = yuvconstants->kUVBiasR[0]; | 
|  | 884   uint32 yg = yuvconstants->kYToRgb[0]; | 
|  | 885   uint32 tmp_mask = 0x7fff7fff; | 
|  | 886   uint32 tmp_yg; | 
|  | 887 | 
|  | 888   tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff); | 
|  | 889   tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff); | 
|  | 890   tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff); | 
|  | 891   tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff); | 
|  | 892   tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001; | 
|  | 893   tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff); | 
|  | 894   tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff); | 
|  | 895   tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001; | 
|  | 896   yg = yg * 0x0101; | 
|  | 897 | 
|  | 898   for (x = 0; x < width - 1; x += 2) { | 
|  | 899     uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; | 
|  | 900     uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9; | 
|  | 901     __asm__ __volatile__( | 
|  | 902         ".set push                                              \n" | 
|  | 903         ".set noreorder                                         \n" | 
|  | 904         "lbu              %[tmp_t7], 0(%[y_buf])               \n" | 
|  | 905         "lbu              %[tmp_t1], 1(%[y_buf])               \n" | 
|  | 906         "mul              %[tmp_t7], %[tmp_t7],     %[yg]      \n" | 
|  | 907         "mul              %[tmp_t1], %[tmp_t1],     %[yg]      \n" | 
|  | 908         "lh               %[tmp_t2], 0(%[u_buf])               \n" | 
|  | 909         "lh               %[tmp_t3], 0(%[v_buf])               \n" | 
|  | 910         "preceu.ph.qbr    %[tmp_t2], %[tmp_t2]                 \n" | 
|  | 911         "preceu.ph.qbr    %[tmp_t3], %[tmp_t3]                 \n" | 
|  | 912         "mul.ph           %[tmp_t4], %[tmp_t2],     %[tmp_ub]  \n" | 
|  | 913         "mul.ph           %[tmp_t5], %[tmp_t2],     %[tmp_ug]  \n" | 
|  | 914         "mul.ph           %[tmp_t6], %[tmp_t3],     %[tmp_vr]  \n" | 
|  | 915         "mul.ph           %[tmp_t3], %[tmp_t3],     %[tmp_vg]  \n" | 
|  | 916         "srl              %[tmp_t7], %[tmp_t7],     16         \n" | 
|  | 917         "ins              %[tmp_t1], %[tmp_t7],     0,      16 \n" | 
|  | 918         "addq_s.ph        %[tmp_t7], %[tmp_t1],     %[tmp_bb]  \n" | 
|  | 919         "addq_s.ph        %[tmp_t8], %[tmp_t1],     %[tmp_bg]  \n" | 
|  | 920         "addq_s.ph        %[tmp_t9], %[tmp_t1],     %[tmp_br]  \n" | 
|  | 921         "addq_s.ph        %[tmp_t5], %[tmp_t5],     %[tmp_t3]  \n" | 
|  | 922         "addq_s.ph        %[tmp_t7], %[tmp_t7],     %[tmp_t4]  \n" | 
|  | 923         "subq_s.ph        %[tmp_t8], %[tmp_t8],     %[tmp_t5]  \n" | 
|  | 924         "addq_s.ph        %[tmp_t9], %[tmp_t9],     %[tmp_t6]  \n" | 
|  | 925         "shra.ph          %[tmp_t7], %[tmp_t7],     6          \n" | 
|  | 926         "shra.ph          %[tmp_t8], %[tmp_t8],     6          \n" | 
|  | 927         "shra.ph          %[tmp_t9], %[tmp_t9],     6          \n" | 
|  | 928         "shll_s.ph        %[tmp_t7], %[tmp_t7],     7          \n" | 
|  | 929         "shll_s.ph        %[tmp_t8], %[tmp_t8],     7          \n" | 
|  | 930         "shll_s.ph        %[tmp_t9], %[tmp_t9],     7          \n" | 
|  | 931         "precrqu_s.qb.ph  %[tmp_t8], %[tmp_mask],   %[tmp_t8]  \n" | 
|  | 932         "precrqu_s.qb.ph  %[tmp_t7], %[tmp_t9],     %[tmp_t7]  \n" | 
|  | 933         "precrq.ph.w      %[tmp_t2], %[tmp_t8],     %[tmp_t7]  \n" | 
|  | 934         "ins              %[tmp_t7], %[tmp_t8],     16,     16 \n" | 
|  | 935         "precr.qb.ph      %[tmp_t8], %[tmp_t2],     %[tmp_t7]  \n" | 
|  | 936         "precrq.qb.ph     %[tmp_t7], %[tmp_t2],     %[tmp_t7]  \n" | 
|  | 937         "sw               %[tmp_t8], 0(%[rgb_buf])             \n" | 
|  | 938         "sw               %[tmp_t7], 4(%[rgb_buf])             \n" | 
|  | 939         ".set pop                                              \n" | 
|  | 940         : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), | 
|  | 941           [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), | 
|  | 942           [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), | 
|  | 943           [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9) | 
|  | 944         : [y_buf] "r"(y_buf), [yg] "r"(yg), [u_buf] "r"(u_buf), | 
|  | 945           [v_buf] "r"(v_buf), [tmp_ub] "r"(tmp_ub), [tmp_ug] "r"(tmp_ug), | 
|  | 946           [tmp_vg] "r"(tmp_vg), [tmp_vr] "r"(tmp_vr), [tmp_bb] "r"(tmp_bb), | 
|  | 947           [tmp_bg] "r"(tmp_bg), [tmp_br] "r"(tmp_br), [tmp_yg] "r"(tmp_yg), | 
|  | 948           [rgb_buf] "r"(rgb_buf), [tmp_mask] "r"(tmp_mask)); | 
|  | 949     y_buf += 2; | 
|  | 950     u_buf += 2; | 
|  | 951     v_buf += 2; | 
|  | 952     rgb_buf += 8;  // Advance 1 pixel. | 
|  | 953   } | 
|  | 954 } | 
|  | 955 | 
|  | 956 void I422ToARGB4444Row_DSPR2(const uint8* src_y, | 
|  | 957                              const uint8* src_u, | 
|  | 958                              const uint8* src_v, | 
|  | 959                              uint8* dst_argb4444, | 
|  | 960                              const struct YuvConstants* yuvconstants, | 
|  | 961                              int width) { | 
|  | 962   int x; | 
|  | 963   uint32 tmp_ub = yuvconstants->kUVToB[0]; | 
|  | 964   uint32 tmp_ug = yuvconstants->kUVToG[0]; | 
|  | 965   uint32 tmp_vg = yuvconstants->kUVToG[1]; | 
|  | 966   uint32 tmp_vr = yuvconstants->kUVToR[1]; | 
|  | 967   uint32 tmp_bb = yuvconstants->kUVBiasB[0]; | 
|  | 968   uint32 tmp_bg = yuvconstants->kUVBiasG[0]; | 
|  | 969   uint32 tmp_br = yuvconstants->kUVBiasR[0]; | 
|  | 970   uint32 yg = yuvconstants->kYToRgb[0]; | 
|  | 971   uint32 tmp_yg; | 
|  | 972   uint32 tmp_mask = 0x7fff7fff; | 
|  | 973   tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff); | 
|  | 974   tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff); | 
|  | 975   tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff); | 
|  | 976   tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff); | 
|  | 977   tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001; | 
|  | 978   tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff); | 
|  | 979   tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff); | 
|  | 980   tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001; | 
|  | 981   yg = yg * 0x0101; | 
|  | 982 | 
|  | 983   for (x = 0; x < width - 1; x += 2) { | 
|  | 984     uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; | 
|  | 985     uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9; | 
|  | 986     __asm__ __volatile__( | 
|  | 987         ".set push                                             \n" | 
|  | 988         ".set noreorder                                        \n" | 
|  | 989         "lbu              %[tmp_t7], 0(%[src_y])               \n" | 
|  | 990         "lbu              %[tmp_t1], 1(%[src_y])               \n" | 
|  | 991         "mul              %[tmp_t7], %[tmp_t7],     %[yg]      \n" | 
|  | 992         "mul              %[tmp_t1], %[tmp_t1],     %[yg]      \n" | 
|  | 993         "lbu              %[tmp_t2], 0(%[src_u])               \n" | 
|  | 994         "lbu              %[tmp_t3], 0(%[src_v])               \n" | 
|  | 995         "replv.ph         %[tmp_t2], %[tmp_t2]                 \n" | 
|  | 996         "replv.ph         %[tmp_t3], %[tmp_t3]                 \n" | 
|  | 997         "mul.ph           %[tmp_t4], %[tmp_t2],     %[tmp_ub]  \n" | 
|  | 998         "mul.ph           %[tmp_t5], %[tmp_t2],     %[tmp_ug]  \n" | 
|  | 999         "mul.ph           %[tmp_t6], %[tmp_t3],     %[tmp_vr]  \n" | 
|  | 1000         "mul.ph           %[tmp_t3], %[tmp_t3],     %[tmp_vg]  \n" | 
|  | 1001         "srl              %[tmp_t7], %[tmp_t7],     16         \n" | 
|  | 1002         "ins              %[tmp_t1], %[tmp_t7],     0,      16 \n" | 
|  | 1003         "addq_s.ph        %[tmp_t7], %[tmp_t1],     %[tmp_bb]  \n" | 
|  | 1004         "addq_s.ph        %[tmp_t8], %[tmp_t1],     %[tmp_bg]  \n" | 
|  | 1005         "addq_s.ph        %[tmp_t9], %[tmp_t1],     %[tmp_br]  \n" | 
|  | 1006         "addq_s.ph        %[tmp_t5], %[tmp_t5],     %[tmp_t3]  \n" | 
|  | 1007         "addq_s.ph        %[tmp_t7], %[tmp_t7],     %[tmp_t4]  \n" | 
|  | 1008         "subq_s.ph        %[tmp_t8], %[tmp_t8],     %[tmp_t5]  \n" | 
|  | 1009         "addq_s.ph        %[tmp_t9], %[tmp_t9],     %[tmp_t6]  \n" | 
|  | 1010         "shra.ph          %[tmp_t7], %[tmp_t7],     6          \n" | 
|  | 1011         "shra.ph          %[tmp_t8], %[tmp_t8],     6          \n" | 
|  | 1012         "shra.ph          %[tmp_t9], %[tmp_t9],     6          \n" | 
|  | 1013         "shll_s.ph        %[tmp_t7], %[tmp_t7],     7          \n" | 
|  | 1014         "shll_s.ph        %[tmp_t8], %[tmp_t8],     7          \n" | 
|  | 1015         "shll_s.ph        %[tmp_t9], %[tmp_t9],     7          \n" | 
|  | 1016         "precrqu_s.qb.ph  %[tmp_t8], %[tmp_mask],   %[tmp_t8]  \n" | 
|  | 1017         "precrqu_s.qb.ph  %[tmp_t7], %[tmp_t9],     %[tmp_t7]  \n" | 
|  | 1018         "precrq.ph.w      %[tmp_t2], %[tmp_t8],     %[tmp_t7]  \n" | 
|  | 1019         "ins              %[tmp_t7], %[tmp_t8],     16,     16 \n" | 
|  | 1020         "precr.qb.ph      %[tmp_t8], %[tmp_t2],     %[tmp_t7]  \n" | 
|  | 1021         "precrq.qb.ph     %[tmp_t7], %[tmp_t2],     %[tmp_t7]  \n" | 
|  | 1022         "shrl.qb          %[tmp_t1], %[tmp_t8],     4          \n" | 
|  | 1023         "shrl.qb          %[tmp_t2], %[tmp_t7],     4          \n" | 
|  | 1024         "shrl.ph          %[tmp_t8], %[tmp_t1],     4          \n" | 
|  | 1025         "shrl.ph          %[tmp_t7], %[tmp_t2],     4          \n" | 
|  | 1026         "or               %[tmp_t8], %[tmp_t8],     %[tmp_t1]  \n" | 
|  | 1027         "or               %[tmp_t7], %[tmp_t7],     %[tmp_t2]  \n" | 
|  | 1028         "precr.qb.ph      %[tmp_t8], %[tmp_t7],     %[tmp_t8]  \n" | 
|  | 1029         "sw               %[tmp_t8], 0(%[dst_argb4444])        \n" | 
|  | 1030         ".set pop                                              \n" | 
|  | 1031         : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), | 
|  | 1032           [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), | 
|  | 1033           [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), | 
|  | 1034           [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9) | 
|  | 1035         : [dst_argb4444] "r"(dst_argb4444), [yg] "r"(yg), [src_u] "r"(src_u), | 
|  | 1036           [src_v] "r"(src_v), [src_y] "r"(src_y), [tmp_ub] "r"(tmp_ub), | 
|  | 1037           [tmp_ug] "r"(tmp_ug), [tmp_vg] "r"(tmp_vg), [tmp_vr] "r"(tmp_vr), | 
|  | 1038           [tmp_bb] "r"(tmp_bb), [tmp_bg] "r"(tmp_bg), [tmp_br] "r"(tmp_br), | 
|  | 1039           [tmp_yg] "r"(tmp_yg), [tmp_mask] "r"(tmp_mask)); | 
|  | 1040     src_y += 2; | 
|  | 1041     src_u += 1; | 
|  | 1042     src_v += 1; | 
|  | 1043     dst_argb4444 += 4;  // Advance 2 pixels. | 
|  | 1044   } | 
|  | 1045 } | 
|  | 1046 | 
|  | 1047 void I422ToARGB1555Row_DSPR2(const uint8* src_y, | 
|  | 1048                              const uint8* src_u, | 
|  | 1049                              const uint8* src_v, | 
|  | 1050                              uint8* dst_argb1555, | 
|  | 1051                              const struct YuvConstants* yuvconstants, | 
|  | 1052                              int width) { | 
|  | 1053   int x; | 
|  | 1054   uint32 tmp_ub = yuvconstants->kUVToB[0]; | 
|  | 1055   uint32 tmp_ug = yuvconstants->kUVToG[0]; | 
|  | 1056   uint32 tmp_vg = yuvconstants->kUVToG[1]; | 
|  | 1057   uint32 tmp_vr = yuvconstants->kUVToR[1]; | 
|  | 1058   uint32 tmp_bb = yuvconstants->kUVBiasB[0]; | 
|  | 1059   uint32 tmp_bg = yuvconstants->kUVBiasG[0]; | 
|  | 1060   uint32 tmp_br = yuvconstants->kUVBiasR[0]; | 
|  | 1061   uint32 yg = yuvconstants->kYToRgb[0]; | 
|  | 1062   uint32 tmp_yg; | 
|  | 1063   uint32 tmp_mask = 0x80008000; | 
|  | 1064   tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff); | 
|  | 1065   tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff); | 
|  | 1066   tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff); | 
|  | 1067   tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff); | 
|  | 1068   tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001; | 
|  | 1069   tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff); | 
|  | 1070   tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff); | 
|  | 1071   tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001; | 
|  | 1072   yg = yg * 0x0101; | 
|  | 1073 | 
|  | 1074   for (x = 0; x < width - 1; x += 2) { | 
|  | 1075     uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; | 
|  | 1076     uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9; | 
|  | 1077     __asm__ __volatile__( | 
|  | 1078         ".set push                                             \n" | 
|  | 1079         ".set noreorder                                        \n" | 
|  | 1080         "lbu              %[tmp_t7], 0(%[src_y])               \n" | 
|  | 1081         "lbu              %[tmp_t1], 1(%[src_y])               \n" | 
|  | 1082         "mul              %[tmp_t7], %[tmp_t7],     %[yg]      \n" | 
|  | 1083         "mul              %[tmp_t1], %[tmp_t1],     %[yg]      \n" | 
|  | 1084         "lbu              %[tmp_t2], 0(%[src_u])               \n" | 
|  | 1085         "lbu              %[tmp_t3], 0(%[src_v])               \n" | 
|  | 1086         "replv.ph         %[tmp_t2], %[tmp_t2]                 \n" | 
|  | 1087         "replv.ph         %[tmp_t3], %[tmp_t3]                 \n" | 
|  | 1088         "mul.ph           %[tmp_t4], %[tmp_t2],     %[tmp_ub]  \n" | 
|  | 1089         "mul.ph           %[tmp_t5], %[tmp_t2],     %[tmp_ug]  \n" | 
|  | 1090         "mul.ph           %[tmp_t6], %[tmp_t3],     %[tmp_vr]  \n" | 
|  | 1091         "mul.ph           %[tmp_t3], %[tmp_t3],     %[tmp_vg]  \n" | 
|  | 1092         "srl              %[tmp_t7], %[tmp_t7],     16         \n" | 
|  | 1093         "ins              %[tmp_t1], %[tmp_t7],     0,      16 \n" | 
|  | 1094         "addq_s.ph        %[tmp_t7], %[tmp_t1],     %[tmp_bb]  \n" | 
|  | 1095         "addq_s.ph        %[tmp_t8], %[tmp_t1],     %[tmp_bg]  \n" | 
|  | 1096         "addq_s.ph        %[tmp_t9], %[tmp_t1],     %[tmp_br]  \n" | 
|  | 1097         "addq_s.ph        %[tmp_t5], %[tmp_t5],     %[tmp_t3]  \n" | 
|  | 1098         "addq_s.ph        %[tmp_t7], %[tmp_t7],     %[tmp_t4]  \n" | 
|  | 1099         "subq_s.ph        %[tmp_t8], %[tmp_t8],     %[tmp_t5]  \n" | 
|  | 1100         "addq_s.ph        %[tmp_t9], %[tmp_t9],     %[tmp_t6]  \n" | 
|  | 1101         "shra.ph          %[tmp_t7], %[tmp_t7],     6          \n" | 
|  | 1102         "shra.ph          %[tmp_t8], %[tmp_t8],     6          \n" | 
|  | 1103         "shra.ph          %[tmp_t9], %[tmp_t9],     6          \n" | 
|  | 1104         "shll_s.ph        %[tmp_t7], %[tmp_t7],     7          \n" | 
|  | 1105         "shll_s.ph        %[tmp_t8], %[tmp_t8],     7          \n" | 
|  | 1106         "shll_s.ph        %[tmp_t9], %[tmp_t9],     7          \n" | 
|  | 1107         "precrqu_s.qb.ph  %[tmp_t8], %[tmp_mask],   %[tmp_t8]  \n" | 
|  | 1108         "precrqu_s.qb.ph  %[tmp_t7], %[tmp_t9],     %[tmp_t7]  \n" | 
|  | 1109         "precrq.ph.w      %[tmp_t2], %[tmp_t8],     %[tmp_t7]  \n" | 
|  | 1110         "ins              %[tmp_t7], %[tmp_t8],     16,     16 \n" | 
|  | 1111         "precr.qb.ph      %[tmp_t8], %[tmp_t2],     %[tmp_t7]  \n" | 
|  | 1112         "precrq.qb.ph     %[tmp_t7], %[tmp_t2],     %[tmp_t7]  \n" | 
|  | 1113         "ins              %[tmp_t3], %[tmp_t8],     7,      24 \n" | 
|  | 1114         "ins              %[tmp_t3], %[tmp_t8],     10,     16 \n" | 
|  | 1115         "ins              %[tmp_t3], %[tmp_t8],     13,     8  \n" | 
|  | 1116         "ins              %[tmp_t4], %[tmp_t7],     7,      24 \n" | 
|  | 1117         "ins              %[tmp_t4], %[tmp_t7],     10,     16 \n" | 
|  | 1118         "ins              %[tmp_t4], %[tmp_t7],     13,     8  \n" | 
|  | 1119         "precrq.ph.w      %[tmp_t8], %[tmp_t4],     %[tmp_t3]  \n" | 
|  | 1120         "or               %[tmp_t8], %[tmp_t8],     %[tmp_mask]\n" | 
|  | 1121         "sw               %[tmp_t8], 0(%[dst_argb1555])        \n" | 
|  | 1122         ".set pop                                              \n" | 
|  | 1123         : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), | 
|  | 1124           [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), | 
|  | 1125           [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), | 
|  | 1126           [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9) | 
|  | 1127         : [dst_argb1555] "r"(dst_argb1555), [yg] "r"(yg), [src_u] "r"(src_u), | 
|  | 1128           [src_v] "r"(src_v), [src_y] "r"(src_y), [tmp_ub] "r"(tmp_ub), | 
|  | 1129           [tmp_ug] "r"(tmp_ug), [tmp_vg] "r"(tmp_vg), [tmp_vr] "r"(tmp_vr), | 
|  | 1130           [tmp_bb] "r"(tmp_bb), [tmp_bg] "r"(tmp_bg), [tmp_br] "r"(tmp_br), | 
|  | 1131           [tmp_yg] "r"(tmp_yg), [tmp_mask] "r"(tmp_mask)); | 
|  | 1132     src_y += 2; | 
|  | 1133     src_u += 1; | 
|  | 1134     src_v += 1; | 
|  | 1135     dst_argb1555 += 4;  // Advance 2 pixels. | 
|  | 1136   } | 
|  | 1137 } | 
|  | 1138 | 
|  | 1139 void NV12ToARGBRow_DSPR2(const uint8* src_y, | 
|  | 1140                          const uint8* src_uv, | 
|  | 1141                          uint8* rgb_buf, | 
|  | 1142                          const struct YuvConstants* yuvconstants, | 
|  | 1143                          int width) { | 
|  | 1144   int x; | 
|  | 1145   uint32 tmp_ub = yuvconstants->kUVToB[0]; | 
|  | 1146   uint32 tmp_ug = yuvconstants->kUVToG[0]; | 
|  | 1147   uint32 tmp_vg = yuvconstants->kUVToG[1]; | 
|  | 1148   uint32 tmp_vr = yuvconstants->kUVToR[1]; | 
|  | 1149   uint32 tmp_bb = yuvconstants->kUVBiasB[0]; | 
|  | 1150   uint32 tmp_bg = yuvconstants->kUVBiasG[0]; | 
|  | 1151   uint32 tmp_br = yuvconstants->kUVBiasR[0]; | 
|  | 1152   uint32 yg = yuvconstants->kYToRgb[0]; | 
|  | 1153   uint32 tmp_mask = 0x7fff7fff; | 
|  | 1154   uint32 tmp_yg; | 
|  | 1155   tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff); | 
|  | 1156   tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff); | 
|  | 1157   tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff); | 
|  | 1158   tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff); | 
|  | 1159   tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001; | 
|  | 1160   tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff); | 
|  | 1161   tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff); | 
|  | 1162   tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001; | 
|  | 1163   yg = yg * 0x0101; | 
|  | 1164 | 
|  | 1165   for (x = 0; x < width - 1; x += 2) { | 
|  | 1166     uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; | 
|  | 1167     uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9; | 
|  | 1168     __asm__ __volatile__( | 
|  | 1169         ".set push                                             \n" | 
|  | 1170         ".set noreorder                                        \n" | 
|  | 1171         "lbu              %[tmp_t7], 0(%[src_y])               \n" | 
|  | 1172         "lbu              %[tmp_t1], 1(%[src_y])               \n" | 
|  | 1173         "mul              %[tmp_t7], %[tmp_t7],     %[yg]      \n" | 
|  | 1174         "mul              %[tmp_t1], %[tmp_t1],     %[yg]      \n" | 
|  | 1175         "lbu              %[tmp_t2], 0(%[src_uv])              \n" | 
|  | 1176         "lbu              %[tmp_t3], 1(%[src_uv])              \n" | 
|  | 1177         "replv.ph         %[tmp_t2], %[tmp_t2]                 \n" | 
|  | 1178         "replv.ph         %[tmp_t3], %[tmp_t3]                 \n" | 
|  | 1179         "mul.ph           %[tmp_t4], %[tmp_t2],     %[tmp_ub]  \n" | 
|  | 1180         "mul.ph           %[tmp_t5], %[tmp_t2],     %[tmp_ug]  \n" | 
|  | 1181         "mul.ph           %[tmp_t6], %[tmp_t3],     %[tmp_vr]  \n" | 
|  | 1182         "mul.ph           %[tmp_t3], %[tmp_t3],     %[tmp_vg]  \n" | 
|  | 1183         "srl              %[tmp_t7], %[tmp_t7],     16         \n" | 
|  | 1184         "ins              %[tmp_t1], %[tmp_t7],     0,      16 \n" | 
|  | 1185         "addq_s.ph        %[tmp_t7], %[tmp_t1],     %[tmp_bb]  \n" | 
|  | 1186         "addq_s.ph        %[tmp_t8], %[tmp_t1],     %[tmp_bg]  \n" | 
|  | 1187         "addq_s.ph        %[tmp_t9], %[tmp_t1],     %[tmp_br]  \n" | 
|  | 1188         "addq_s.ph        %[tmp_t5], %[tmp_t5],     %[tmp_t3]  \n" | 
|  | 1189         "addq_s.ph        %[tmp_t7], %[tmp_t7],     %[tmp_t4]  \n" | 
|  | 1190         "subq_s.ph        %[tmp_t8], %[tmp_t8],     %[tmp_t5]  \n" | 
|  | 1191         "addq_s.ph        %[tmp_t9], %[tmp_t9],     %[tmp_t6]  \n" | 
|  | 1192         "shra.ph          %[tmp_t7], %[tmp_t7],     6          \n" | 
|  | 1193         "shra.ph          %[tmp_t8], %[tmp_t8],     6          \n" | 
|  | 1194         "shra.ph          %[tmp_t9], %[tmp_t9],     6          \n" | 
|  | 1195         "shll_s.ph        %[tmp_t7], %[tmp_t7],     7          \n" | 
|  | 1196         "shll_s.ph        %[tmp_t8], %[tmp_t8],     7          \n" | 
|  | 1197         "shll_s.ph        %[tmp_t9], %[tmp_t9],     7          \n" | 
|  | 1198         "precrqu_s.qb.ph  %[tmp_t8], %[tmp_mask],   %[tmp_t8]  \n" | 
|  | 1199         "precrqu_s.qb.ph  %[tmp_t7], %[tmp_t9],     %[tmp_t7]  \n" | 
|  | 1200         "precrq.ph.w      %[tmp_t2], %[tmp_t8],     %[tmp_t7]  \n" | 
|  | 1201         "ins              %[tmp_t7], %[tmp_t8],     16,     16 \n" | 
|  | 1202         "precr.qb.ph      %[tmp_t8], %[tmp_t2],     %[tmp_t7]  \n" | 
|  | 1203         "precrq.qb.ph     %[tmp_t7], %[tmp_t2],     %[tmp_t7]  \n" | 
|  | 1204         "sw               %[tmp_t8], 0(%[rgb_buf])             \n" | 
|  | 1205         "sw               %[tmp_t7], 4(%[rgb_buf])             \n" | 
|  | 1206         ".set pop                                              \n" | 
|  | 1207         : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), | 
|  | 1208           [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), | 
|  | 1209           [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), | 
|  | 1210           [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9) | 
|  | 1211         : [src_y] "r"(src_y), [src_uv] "r"(src_uv), [yg] "r"(yg), | 
|  | 1212           [tmp_ub] "r"(tmp_ub), [tmp_ug] "r"(tmp_ug), [tmp_vg] "r"(tmp_vg), | 
|  | 1213           [tmp_vr] "r"(tmp_vr), [tmp_bb] "r"(tmp_bb), [tmp_bg] "r"(tmp_bg), | 
|  | 1214           [tmp_br] "r"(tmp_br), [tmp_yg] "r"(tmp_yg), [rgb_buf] "r"(rgb_buf), | 
|  | 1215           [tmp_mask] "r"(tmp_mask)); | 
|  | 1216 | 
|  | 1217     src_y += 2; | 
|  | 1218     src_uv += 2; | 
|  | 1219     rgb_buf += 8;  // Advance 2 pixels. | 
|  | 1220   } | 
|  | 1221 } | 
|  | 1222 | 
|  | 1223 void BGRAToUVRow_DSPR2(const uint8* src_rgb0, | 
|  | 1224                        int src_stride_rgb, | 
|  | 1225                        uint8* dst_u, | 
|  | 1226                        uint8* dst_v, | 
|  | 1227                        int width) { | 
|  | 1228   const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; | 
|  | 1229   int x; | 
|  | 1230   int const1 = 0xffda0000; | 
|  | 1231   int const2 = 0x0070ffb6; | 
|  | 1232   int const3 = 0x00700000; | 
|  | 1233   int const4 = 0xffeeffa2; | 
|  | 1234   int const5 = 0x100; | 
|  | 1235   for (x = 0; x < width - 1; x += 2) { | 
|  | 1236     int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; | 
|  | 1237     int tmp_t6, tmp_t7, tmp_t8; | 
|  | 1238     __asm__ __volatile__( | 
|  | 1239         ".set push                                                 \n" | 
|  | 1240         ".set noreorder                                            \n" | 
|  | 1241         "lw                %[tmp_t1],   0(%[src_rgb0])             \n" | 
|  | 1242         "lw                %[tmp_t2],   4(%[src_rgb0])             \n" | 
|  | 1243         "lw                %[tmp_t3],   0(%[src_rgb1])             \n" | 
|  | 1244         "lw                %[tmp_t4],   4(%[src_rgb1])             \n" | 
|  | 1245         "preceu.ph.qbr     %[tmp_t5],   %[tmp_t1]                  \n" | 
|  | 1246         "preceu.ph.qbl     %[tmp_t1],   %[tmp_t1]                  \n" | 
|  | 1247         "preceu.ph.qbr     %[tmp_t6],   %[tmp_t2]                  \n" | 
|  | 1248         "preceu.ph.qbl     %[tmp_t2],   %[tmp_t2]                  \n" | 
|  | 1249         "preceu.ph.qbr     %[tmp_t7],   %[tmp_t3]                  \n" | 
|  | 1250         "preceu.ph.qbl     %[tmp_t3],   %[tmp_t3]                  \n" | 
|  | 1251         "preceu.ph.qbr     %[tmp_t8],   %[tmp_t4]                  \n" | 
|  | 1252         "preceu.ph.qbl     %[tmp_t4],   %[tmp_t4]                  \n" | 
|  | 1253         "addu.ph           %[tmp_t5],   %[tmp_t5],     %[tmp_t6]   \n" | 
|  | 1254         "addu.ph           %[tmp_t7],   %[tmp_t7],     %[tmp_t8]   \n" | 
|  | 1255         "addu.ph           %[tmp_t1],   %[tmp_t1],     %[tmp_t2]   \n" | 
|  | 1256         "addu.ph           %[tmp_t3],   %[tmp_t3],     %[tmp_t4]   \n" | 
|  | 1257         "addu.ph           %[tmp_t5],   %[tmp_t5],     %[tmp_t7]   \n" | 
|  | 1258         "addu.ph           %[tmp_t1],   %[tmp_t1],     %[tmp_t3]   \n" | 
|  | 1259         "shrl.ph           %[tmp_t5],   %[tmp_t5],     2           \n" | 
|  | 1260         "shrl.ph           %[tmp_t1],   %[tmp_t1],     2           \n" | 
|  | 1261         "mult              $ac0,        %[const5],     %[const5]   \n" | 
|  | 1262         "mult              $ac1,        %[const5],     %[const5]   \n" | 
|  | 1263         "dpaq_s.w.ph       $ac0,        %[tmp_t5],     %[const1]   \n" | 
|  | 1264         "dpaq_s.w.ph       $ac1,        %[tmp_t5],     %[const3]   \n" | 
|  | 1265         "dpaq_s.w.ph       $ac0,        %[tmp_t1],     %[const2]   \n" | 
|  | 1266         "dpaq_s.w.ph       $ac1,        %[tmp_t1],     %[const4]   \n" | 
|  | 1267         "extr_r.w          %[tmp_t7],   $ac0,          9           \n" | 
|  | 1268         "extr_r.w          %[tmp_t8],   $ac1,          9           \n" | 
|  | 1269         "addiu             %[dst_u],    %[dst_u],    1             \n" | 
|  | 1270         "addiu             %[dst_v],    %[dst_v],    1             \n" | 
|  | 1271         "addiu             %[src_rgb0], %[src_rgb0], 8             \n" | 
|  | 1272         "addiu             %[src_rgb1], %[src_rgb1], 8             \n" | 
|  | 1273         "sb                %[tmp_t7],   -1(%[dst_u])               \n" | 
|  | 1274         "sb                %[tmp_t8],   -1(%[dst_v])               \n" | 
|  | 1275         ".set pop                                                  \n" | 
|  | 1276         : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), | 
|  | 1277           [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), | 
|  | 1278           [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), | 
|  | 1279           [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), | 
|  | 1280           [src_rgb0] "+r"(src_rgb0), [src_rgb1] "+r"(src_rgb1), | 
|  | 1281           [dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v) | 
|  | 1282         : [const1] "r"(const1), [const2] "r"(const2), [const3] "r"(const3), | 
|  | 1283           [const4] "r"(const4), [const5] "r"(const5) | 
|  | 1284         : "hi", "lo", "$ac1lo", "$ac1hi"); | 
|  | 1285   } | 
|  | 1286 } | 
|  | 1287 | 
|  | 1288 void BGRAToYRow_DSPR2(const uint8* src_argb0, uint8* dst_y, int width) { | 
|  | 1289   int x; | 
|  | 1290   int const1 = 0x00420000; | 
|  | 1291   int const2 = 0x00190081; | 
|  | 1292   int const5 = 0x40; | 
|  | 1293   for (x = 0; x < width; x += 4) { | 
|  | 1294     int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; | 
|  | 1295     int tmp_t6, tmp_t7, tmp_t8; | 
|  | 1296     __asm__ __volatile__( | 
|  | 1297         ".set push                                                \n" | 
|  | 1298         ".set noreorder                                           \n" | 
|  | 1299         "lw                %[tmp_t1],   0(%[src_argb0])           \n" | 
|  | 1300         "lw                %[tmp_t2],   4(%[src_argb0])           \n" | 
|  | 1301         "lw                %[tmp_t3],   8(%[src_argb0])           \n" | 
|  | 1302         "lw                %[tmp_t4],   12(%[src_argb0])          \n" | 
|  | 1303         "preceu.ph.qbr     %[tmp_t5],   %[tmp_t1]                 \n" | 
|  | 1304         "preceu.ph.qbl     %[tmp_t1],   %[tmp_t1]                 \n" | 
|  | 1305         "preceu.ph.qbr     %[tmp_t6],   %[tmp_t2]                 \n" | 
|  | 1306         "preceu.ph.qbl     %[tmp_t2],   %[tmp_t2]                 \n" | 
|  | 1307         "preceu.ph.qbr     %[tmp_t7],   %[tmp_t3]                 \n" | 
|  | 1308         "preceu.ph.qbl     %[tmp_t3],   %[tmp_t3]                 \n" | 
|  | 1309         "preceu.ph.qbr     %[tmp_t8],   %[tmp_t4]                 \n" | 
|  | 1310         "preceu.ph.qbl     %[tmp_t4],   %[tmp_t4]                 \n" | 
|  | 1311         "mult              $ac0,        %[const5],     %[const5]  \n" | 
|  | 1312         "mult              $ac1,        %[const5],     %[const5]  \n" | 
|  | 1313         "mult              $ac2,        %[const5],     %[const5]  \n" | 
|  | 1314         "mult              $ac3,        %[const5],     %[const5]  \n" | 
|  | 1315         "dpa.w.ph          $ac0,        %[tmp_t5],     %[const1]  \n" | 
|  | 1316         "dpa.w.ph          $ac1,        %[tmp_t6],     %[const1]  \n" | 
|  | 1317         "dpa.w.ph          $ac2,        %[tmp_t7],     %[const1]  \n" | 
|  | 1318         "dpa.w.ph          $ac3,        %[tmp_t8],     %[const1]  \n" | 
|  | 1319         "dpa.w.ph          $ac0,        %[tmp_t1],     %[const2]  \n" | 
|  | 1320         "dpa.w.ph          $ac1,        %[tmp_t2],     %[const2]  \n" | 
|  | 1321         "dpa.w.ph          $ac2,        %[tmp_t3],     %[const2]  \n" | 
|  | 1322         "dpa.w.ph          $ac3,        %[tmp_t4],     %[const2]  \n" | 
|  | 1323         "extr_r.w          %[tmp_t1],   $ac0,          8          \n" | 
|  | 1324         "extr_r.w          %[tmp_t2],   $ac1,          8          \n" | 
|  | 1325         "extr_r.w          %[tmp_t3],   $ac2,          8          \n" | 
|  | 1326         "extr_r.w          %[tmp_t4],   $ac3,          8          \n" | 
|  | 1327         "addiu             %[src_argb0],%[src_argb0],  16         \n" | 
|  | 1328         "addiu             %[dst_y],    %[dst_y],      4          \n" | 
|  | 1329         "sb                %[tmp_t1],   -4(%[dst_y])              \n" | 
|  | 1330         "sb                %[tmp_t2],   -3(%[dst_y])              \n" | 
|  | 1331         "sb                %[tmp_t3],   -2(%[dst_y])              \n" | 
|  | 1332         "sb                %[tmp_t4],   -1(%[dst_y])              \n" | 
|  | 1333         ".set pop                                                 \n" | 
|  | 1334         : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), | 
|  | 1335           [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), | 
|  | 1336           [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), | 
|  | 1337           [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), | 
|  | 1338           [src_argb0] "+r"(src_argb0), [dst_y] "+r"(dst_y) | 
|  | 1339         : [const1] "r"(const1), [const2] "r"(const2), [const5] "r"(const5) | 
|  | 1340         : "hi", "lo", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo", | 
|  | 1341           "$ac3hi"); | 
|  | 1342   } | 
|  | 1343 } | 
|  | 1344 | 
|  | 1345 void ABGRToUVRow_DSPR2(const uint8* src_rgb0, | 
|  | 1346                        int src_stride_rgb, | 
|  | 1347                        uint8* dst_u, | 
|  | 1348                        uint8* dst_v, | 
|  | 1349                        int width) { | 
|  | 1350   const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; | 
|  | 1351   int x; | 
|  | 1352   int const1 = 0xffb6ffda; | 
|  | 1353   int const2 = 0x00000070; | 
|  | 1354   int const3 = 0xffa20070; | 
|  | 1355   int const4 = 0x0000ffee; | 
|  | 1356   int const5 = 0x100; | 
|  | 1357 | 
|  | 1358   for (x = 0; x < width - 1; x += 2) { | 
|  | 1359     int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; | 
|  | 1360     int tmp_t6, tmp_t7, tmp_t8; | 
|  | 1361     __asm__ __volatile__( | 
|  | 1362         ".set push                                                \n" | 
|  | 1363         ".set noreorder                                           \n" | 
|  | 1364         "lw                %[tmp_t1],   0(%[src_rgb0])            \n" | 
|  | 1365         "lw                %[tmp_t2],   4(%[src_rgb0])            \n" | 
|  | 1366         "lw                %[tmp_t3],   0(%[src_rgb1])            \n" | 
|  | 1367         "lw                %[tmp_t4],   4(%[src_rgb1])            \n" | 
|  | 1368         "preceu.ph.qbr     %[tmp_t5],   %[tmp_t1]                 \n" | 
|  | 1369         "preceu.ph.qbl     %[tmp_t1],   %[tmp_t1]                 \n" | 
|  | 1370         "preceu.ph.qbr     %[tmp_t6],   %[tmp_t2]                 \n" | 
|  | 1371         "preceu.ph.qbl     %[tmp_t2],   %[tmp_t2]                 \n" | 
|  | 1372         "preceu.ph.qbr     %[tmp_t7],   %[tmp_t3]                 \n" | 
|  | 1373         "preceu.ph.qbl     %[tmp_t3],   %[tmp_t3]                 \n" | 
|  | 1374         "preceu.ph.qbr     %[tmp_t8],   %[tmp_t4]                 \n" | 
|  | 1375         "preceu.ph.qbl     %[tmp_t4],   %[tmp_t4]                 \n" | 
|  | 1376         "addu.ph           %[tmp_t5],   %[tmp_t5],     %[tmp_t6]  \n" | 
|  | 1377         "addu.ph           %[tmp_t7],   %[tmp_t7],     %[tmp_t8]  \n" | 
|  | 1378         "addu.ph           %[tmp_t1],   %[tmp_t1],     %[tmp_t2]  \n" | 
|  | 1379         "addu.ph           %[tmp_t3],   %[tmp_t3],     %[tmp_t4]  \n" | 
|  | 1380         "addu.ph           %[tmp_t5],   %[tmp_t5],     %[tmp_t7]  \n" | 
|  | 1381         "addu.ph           %[tmp_t1],   %[tmp_t1],     %[tmp_t3]  \n" | 
|  | 1382         "shrl.ph           %[tmp_t5],   %[tmp_t5],     2          \n" | 
|  | 1383         "shrl.ph           %[tmp_t1],   %[tmp_t1],     2          \n" | 
|  | 1384         "mult              $ac0,        %[const5],     %[const5]  \n" | 
|  | 1385         "mult              $ac1,        %[const5],     %[const5]  \n" | 
|  | 1386         "dpaq_s.w.ph       $ac0,        %[tmp_t5],     %[const1]  \n" | 
|  | 1387         "dpaq_s.w.ph       $ac1,        %[tmp_t5],     %[const3]  \n" | 
|  | 1388         "dpaq_s.w.ph       $ac0,        %[tmp_t1],     %[const2]  \n" | 
|  | 1389         "dpaq_s.w.ph       $ac1,        %[tmp_t1],     %[const4]  \n" | 
|  | 1390         "extr_r.w          %[tmp_t7],   $ac0,          9          \n" | 
|  | 1391         "extr_r.w          %[tmp_t8],   $ac1,          9          \n" | 
|  | 1392         "addiu             %[dst_u],    %[dst_u],    1            \n" | 
|  | 1393         "addiu             %[dst_v],    %[dst_v],    1            \n" | 
|  | 1394         "addiu             %[src_rgb0], %[src_rgb0], 8            \n" | 
|  | 1395         "addiu             %[src_rgb1], %[src_rgb1], 8            \n" | 
|  | 1396         "sb                %[tmp_t7],   -1(%[dst_u])              \n" | 
|  | 1397         "sb                %[tmp_t8],   -1(%[dst_v])              \n" | 
|  | 1398         ".set pop                                                 \n" | 
|  | 1399         : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), | 
|  | 1400           [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), | 
|  | 1401           [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), | 
|  | 1402           [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), | 
|  | 1403           [src_rgb0] "+r"(src_rgb0), [src_rgb1] "+r"(src_rgb1), | 
|  | 1404           [dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v) | 
|  | 1405         : [const1] "r"(const1), [const2] "r"(const2), [const3] "r"(const3), | 
|  | 1406           [const4] "r"(const4), [const5] "r"(const5) | 
|  | 1407         : "hi", "lo", "$ac1lo", "$ac1hi"); | 
|  | 1408   } | 
|  | 1409 } | 
|  | 1410 | 
|  | 1411 void ARGBToYRow_DSPR2(const uint8* src_argb0, uint8* dst_y, int width) { | 
|  | 1412   int x; | 
|  | 1413   int const1 = 0x00810019; | 
|  | 1414   int const2 = 0x00000042; | 
|  | 1415   int const5 = 0x40; | 
|  | 1416   for (x = 0; x < width; x += 4) { | 
|  | 1417     int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; | 
|  | 1418     int tmp_t6, tmp_t7, tmp_t8; | 
|  | 1419     __asm__ __volatile__( | 
|  | 1420         ".set push                                                \n" | 
|  | 1421         ".set noreorder                                           \n" | 
|  | 1422         "lw                %[tmp_t1],   0(%[src_argb0])           \n" | 
|  | 1423         "lw                %[tmp_t2],   4(%[src_argb0])           \n" | 
|  | 1424         "lw                %[tmp_t3],   8(%[src_argb0])           \n" | 
|  | 1425         "lw                %[tmp_t4],   12(%[src_argb0])          \n" | 
|  | 1426         "preceu.ph.qbr     %[tmp_t5],   %[tmp_t1]                 \n" | 
|  | 1427         "preceu.ph.qbl     %[tmp_t1],   %[tmp_t1]                 \n" | 
|  | 1428         "preceu.ph.qbr     %[tmp_t6],   %[tmp_t2]                 \n" | 
|  | 1429         "preceu.ph.qbl     %[tmp_t2],   %[tmp_t2]                 \n" | 
|  | 1430         "preceu.ph.qbr     %[tmp_t7],   %[tmp_t3]                 \n" | 
|  | 1431         "preceu.ph.qbl     %[tmp_t3],   %[tmp_t3]                 \n" | 
|  | 1432         "preceu.ph.qbr     %[tmp_t8],   %[tmp_t4]                 \n" | 
|  | 1433         "preceu.ph.qbl     %[tmp_t4],   %[tmp_t4]                 \n" | 
|  | 1434         "mult              $ac0,        %[const5],     %[const5]  \n" | 
|  | 1435         "mult              $ac1,        %[const5],     %[const5]  \n" | 
|  | 1436         "mult              $ac2,        %[const5],     %[const5]  \n" | 
|  | 1437         "mult              $ac3,        %[const5],     %[const5]  \n" | 
|  | 1438         "dpa.w.ph          $ac0,        %[tmp_t5],     %[const1]  \n" | 
|  | 1439         "dpa.w.ph          $ac1,        %[tmp_t6],     %[const1]  \n" | 
|  | 1440         "dpa.w.ph          $ac2,        %[tmp_t7],     %[const1]  \n" | 
|  | 1441         "dpa.w.ph          $ac3,        %[tmp_t8],     %[const1]  \n" | 
|  | 1442         "dpa.w.ph          $ac0,        %[tmp_t1],     %[const2]  \n" | 
|  | 1443         "dpa.w.ph          $ac1,        %[tmp_t2],     %[const2]  \n" | 
|  | 1444         "dpa.w.ph          $ac2,        %[tmp_t3],     %[const2]  \n" | 
|  | 1445         "dpa.w.ph          $ac3,        %[tmp_t4],     %[const2]  \n" | 
|  | 1446         "extr_r.w          %[tmp_t1],   $ac0,          8          \n" | 
|  | 1447         "extr_r.w          %[tmp_t2],   $ac1,          8          \n" | 
|  | 1448         "extr_r.w          %[tmp_t3],   $ac2,          8          \n" | 
|  | 1449         "extr_r.w          %[tmp_t4],   $ac3,          8          \n" | 
|  | 1450         "addiu             %[dst_y],    %[dst_y],      4          \n" | 
|  | 1451         "addiu             %[src_argb0],%[src_argb0],  16         \n" | 
|  | 1452         "sb                %[tmp_t1],   -4(%[dst_y])              \n" | 
|  | 1453         "sb                %[tmp_t2],   -3(%[dst_y])              \n" | 
|  | 1454         "sb                %[tmp_t3],   -2(%[dst_y])              \n" | 
|  | 1455         "sb                %[tmp_t4],   -1(%[dst_y])              \n" | 
|  | 1456         ".set pop                                                 \n" | 
|  | 1457         : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), | 
|  | 1458           [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), | 
|  | 1459           [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), | 
|  | 1460           [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), | 
|  | 1461           [src_argb0] "+r"(src_argb0), [dst_y] "+r"(dst_y) | 
|  | 1462         : [const1] "r"(const1), [const2] "r"(const2), [const5] "r"(const5) | 
|  | 1463         : "hi", "lo", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo", | 
|  | 1464           "$ac3hi"); | 
|  | 1465   } | 
|  | 1466 } | 
|  | 1467 | 
|  | 1468 void ABGRToYRow_DSPR2(const uint8* src_argb0, uint8* dst_y, int width) { | 
|  | 1469   int x; | 
|  | 1470   int const1 = 0x00810042; | 
|  | 1471   int const2 = 0x00000019; | 
|  | 1472   int const5 = 0x40; | 
|  | 1473   for (x = 0; x < width; x += 4) { | 
|  | 1474     int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; | 
|  | 1475     int tmp_t6, tmp_t7, tmp_t8; | 
|  | 1476     __asm__ __volatile__( | 
|  | 1477         ".set push                                                \n" | 
|  | 1478         ".set noreorder                                           \n" | 
|  | 1479         "lw                %[tmp_t1],   0(%[src_argb0])           \n" | 
|  | 1480         "lw                %[tmp_t2],   4(%[src_argb0])           \n" | 
|  | 1481         "lw                %[tmp_t3],   8(%[src_argb0])           \n" | 
|  | 1482         "lw                %[tmp_t4],   12(%[src_argb0])          \n" | 
|  | 1483         "preceu.ph.qbr     %[tmp_t5],   %[tmp_t1]                 \n" | 
|  | 1484         "preceu.ph.qbl     %[tmp_t1],   %[tmp_t1]                 \n" | 
|  | 1485         "preceu.ph.qbr     %[tmp_t6],   %[tmp_t2]                 \n" | 
|  | 1486         "preceu.ph.qbl     %[tmp_t2],   %[tmp_t2]                 \n" | 
|  | 1487         "preceu.ph.qbr     %[tmp_t7],   %[tmp_t3]                 \n" | 
|  | 1488         "preceu.ph.qbl     %[tmp_t3],   %[tmp_t3]                 \n" | 
|  | 1489         "preceu.ph.qbr     %[tmp_t8],   %[tmp_t4]                 \n" | 
|  | 1490         "preceu.ph.qbl     %[tmp_t4],   %[tmp_t4]                 \n" | 
|  | 1491         "mult              $ac0,        %[const5],     %[const5]  \n" | 
|  | 1492         "mult              $ac1,        %[const5],     %[const5]  \n" | 
|  | 1493         "mult              $ac2,        %[const5],     %[const5]  \n" | 
|  | 1494         "mult              $ac3,        %[const5],     %[const5]  \n" | 
|  | 1495         "dpa.w.ph          $ac0,        %[tmp_t5],     %[const1]  \n" | 
|  | 1496         "dpa.w.ph          $ac1,        %[tmp_t6],     %[const1]  \n" | 
|  | 1497         "dpa.w.ph          $ac2,        %[tmp_t7],     %[const1]  \n" | 
|  | 1498         "dpa.w.ph          $ac3,        %[tmp_t8],     %[const1]  \n" | 
|  | 1499         "dpa.w.ph          $ac0,        %[tmp_t1],     %[const2]  \n" | 
|  | 1500         "dpa.w.ph          $ac1,        %[tmp_t2],     %[const2]  \n" | 
|  | 1501         "dpa.w.ph          $ac2,        %[tmp_t3],     %[const2]  \n" | 
|  | 1502         "dpa.w.ph          $ac3,        %[tmp_t4],     %[const2]  \n" | 
|  | 1503         "extr_r.w          %[tmp_t1],   $ac0,          8          \n" | 
|  | 1504         "extr_r.w          %[tmp_t2],   $ac1,          8          \n" | 
|  | 1505         "extr_r.w          %[tmp_t3],   $ac2,          8          \n" | 
|  | 1506         "extr_r.w          %[tmp_t4],   $ac3,          8          \n" | 
|  | 1507         "addiu             %[src_argb0],%[src_argb0],  16         \n" | 
|  | 1508         "addiu             %[dst_y],    %[dst_y],      4          \n" | 
|  | 1509         "sb                %[tmp_t1],   -4(%[dst_y])              \n" | 
|  | 1510         "sb                %[tmp_t2],   -3(%[dst_y])              \n" | 
|  | 1511         "sb                %[tmp_t3],   -2(%[dst_y])              \n" | 
|  | 1512         "sb                %[tmp_t4],   -1(%[dst_y])              \n" | 
|  | 1513         ".set pop                                                 \n" | 
|  | 1514         : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), | 
|  | 1515           [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), | 
|  | 1516           [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), | 
|  | 1517           [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), | 
|  | 1518           [src_argb0] "+r"(src_argb0), [dst_y] "+r"(dst_y) | 
|  | 1519         : [const1] "r"(const1), [const2] "r"(const2), [const5] "r"(const5) | 
|  | 1520         : "hi", "lo", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo", | 
|  | 1521           "$ac3hi"); | 
|  | 1522   } | 
|  | 1523 } | 
|  | 1524 | 
|  | 1525 void RGBAToUVRow_DSPR2(const uint8* src_rgb0, | 
|  | 1526                        int src_stride_rgb, | 
|  | 1527                        uint8* dst_u, | 
|  | 1528                        uint8* dst_v, | 
|  | 1529                        int width) { | 
|  | 1530   const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; | 
|  | 1531   int x; | 
|  | 1532   int const1 = 0xffb60070; | 
|  | 1533   int const2 = 0x0000ffda; | 
|  | 1534   int const3 = 0xffa2ffee; | 
|  | 1535   int const4 = 0x00000070; | 
|  | 1536   int const5 = 0x100; | 
|  | 1537 | 
|  | 1538   for (x = 0; x < width - 1; x += 2) { | 
|  | 1539     int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; | 
|  | 1540     int tmp_t6, tmp_t7, tmp_t8; | 
|  | 1541     __asm__ __volatile__( | 
|  | 1542         ".set push                                                \n" | 
|  | 1543         ".set noreorder                                           \n" | 
|  | 1544         "ulw               %[tmp_t1],   0+1(%[src_rgb0])          \n" | 
|  | 1545         "ulw               %[tmp_t2],   4+1(%[src_rgb0])          \n" | 
|  | 1546         "ulw               %[tmp_t3],   0+1(%[src_rgb1])          \n" | 
|  | 1547         "ulw               %[tmp_t4],   4+1(%[src_rgb1])          \n" | 
|  | 1548         "preceu.ph.qbr     %[tmp_t5],   %[tmp_t1]                 \n" | 
|  | 1549         "preceu.ph.qbl     %[tmp_t1],   %[tmp_t1]                 \n" | 
|  | 1550         "preceu.ph.qbr     %[tmp_t6],   %[tmp_t2]                 \n" | 
|  | 1551         "preceu.ph.qbl     %[tmp_t2],   %[tmp_t2]                 \n" | 
|  | 1552         "preceu.ph.qbr     %[tmp_t7],   %[tmp_t3]                 \n" | 
|  | 1553         "preceu.ph.qbl     %[tmp_t3],   %[tmp_t3]                 \n" | 
|  | 1554         "preceu.ph.qbr     %[tmp_t8],   %[tmp_t4]                 \n" | 
|  | 1555         "preceu.ph.qbl     %[tmp_t4],   %[tmp_t4]                 \n" | 
|  | 1556         "addu.ph           %[tmp_t5],   %[tmp_t5],     %[tmp_t6]  \n" | 
|  | 1557         "addu.ph           %[tmp_t7],   %[tmp_t7],     %[tmp_t8]  \n" | 
|  | 1558         "addu.ph           %[tmp_t1],   %[tmp_t1],     %[tmp_t2]  \n" | 
|  | 1559         "addu.ph           %[tmp_t3],   %[tmp_t3],     %[tmp_t4]  \n" | 
|  | 1560         "addu.ph           %[tmp_t5],   %[tmp_t5],     %[tmp_t7]  \n" | 
|  | 1561         "addu.ph           %[tmp_t1],   %[tmp_t1],     %[tmp_t3]  \n" | 
|  | 1562         "shrl.ph           %[tmp_t5],   %[tmp_t5],     2          \n" | 
|  | 1563         "shrl.ph           %[tmp_t1],   %[tmp_t1],     2          \n" | 
|  | 1564         "mult              $ac0,        %[const5],     %[const5]  \n" | 
|  | 1565         "mult              $ac1,        %[const5],     %[const5]  \n" | 
|  | 1566         "dpaq_s.w.ph       $ac0,        %[tmp_t5],     %[const1]  \n" | 
|  | 1567         "dpaq_s.w.ph       $ac1,        %[tmp_t5],     %[const3]  \n" | 
|  | 1568         "dpaq_s.w.ph       $ac0,        %[tmp_t1],     %[const2]  \n" | 
|  | 1569         "dpaq_s.w.ph       $ac1,        %[tmp_t1],     %[const4]  \n" | 
|  | 1570         "extr_r.w          %[tmp_t7],   $ac0,          9          \n" | 
|  | 1571         "extr_r.w          %[tmp_t8],   $ac1,          9          \n" | 
|  | 1572         "addiu             %[src_rgb0], %[src_rgb0], 8            \n" | 
|  | 1573         "addiu             %[src_rgb1], %[src_rgb1], 8            \n" | 
|  | 1574         "addiu             %[dst_u],    %[dst_u],    1            \n" | 
|  | 1575         "addiu             %[dst_v],    %[dst_v],    1            \n" | 
|  | 1576         "sb                %[tmp_t7],   -1(%[dst_u])              \n" | 
|  | 1577         "sb                %[tmp_t8],   -1(%[dst_v])              \n" | 
|  | 1578         ".set pop                                                 \n" | 
|  | 1579         : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), | 
|  | 1580           [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), | 
|  | 1581           [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), | 
|  | 1582           [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), | 
|  | 1583           [src_rgb0] "+r"(src_rgb0), [src_rgb1] "+r"(src_rgb1), | 
|  | 1584           [dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v) | 
|  | 1585         : [const1] "r"(const1), [const2] "r"(const2), [const3] "r"(const3), | 
|  | 1586           [const4] "r"(const4), [const5] "r"(const5) | 
|  | 1587         : "hi", "lo", "$ac1lo", "$ac1hi"); | 
|  | 1588   } | 
|  | 1589 } | 
|  | 1590 | 
|  | 1591 void RGBAToYRow_DSPR2(const uint8* src_argb0, uint8* dst_y, int width) { | 
|  | 1592   int x; | 
|  | 1593   int const1 = 0x00420081; | 
|  | 1594   int const2 = 0x00190000; | 
|  | 1595   int const5 = 0x40; | 
|  | 1596   for (x = 0; x < width; x += 4) { | 
|  | 1597     int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; | 
|  | 1598     int tmp_t6, tmp_t7, tmp_t8; | 
|  | 1599     __asm__ __volatile__( | 
|  | 1600         ".set push                                                \n" | 
|  | 1601         ".set noreorder                                           \n" | 
|  | 1602         "lw                %[tmp_t1],   0(%[src_argb0])           \n" | 
|  | 1603         "lw                %[tmp_t2],   4(%[src_argb0])           \n" | 
|  | 1604         "lw                %[tmp_t3],   8(%[src_argb0])           \n" | 
|  | 1605         "lw                %[tmp_t4],   12(%[src_argb0])          \n" | 
|  | 1606         "preceu.ph.qbl     %[tmp_t5],   %[tmp_t1]                 \n" | 
|  | 1607         "preceu.ph.qbr     %[tmp_t1],   %[tmp_t1]                 \n" | 
|  | 1608         "preceu.ph.qbl     %[tmp_t6],   %[tmp_t2]                 \n" | 
|  | 1609         "preceu.ph.qbr     %[tmp_t2],   %[tmp_t2]                 \n" | 
|  | 1610         "preceu.ph.qbl     %[tmp_t7],   %[tmp_t3]                 \n" | 
|  | 1611         "preceu.ph.qbr     %[tmp_t3],   %[tmp_t3]                 \n" | 
|  | 1612         "preceu.ph.qbl     %[tmp_t8],   %[tmp_t4]                 \n" | 
|  | 1613         "preceu.ph.qbr     %[tmp_t4],   %[tmp_t4]                 \n" | 
|  | 1614         "mult              $ac0,        %[const5],     %[const5]  \n" | 
|  | 1615         "mult              $ac1,        %[const5],     %[const5]  \n" | 
|  | 1616         "mult              $ac2,        %[const5],     %[const5]  \n" | 
|  | 1617         "mult              $ac3,        %[const5],     %[const5]  \n" | 
|  | 1618         "dpa.w.ph          $ac0,        %[tmp_t5],     %[const1]  \n" | 
|  | 1619         "dpa.w.ph          $ac1,        %[tmp_t6],     %[const1]  \n" | 
|  | 1620         "dpa.w.ph          $ac2,        %[tmp_t7],     %[const1]  \n" | 
|  | 1621         "dpa.w.ph          $ac3,        %[tmp_t8],     %[const1]  \n" | 
|  | 1622         "dpa.w.ph          $ac0,        %[tmp_t1],     %[const2]  \n" | 
|  | 1623         "dpa.w.ph          $ac1,        %[tmp_t2],     %[const2]  \n" | 
|  | 1624         "dpa.w.ph          $ac2,        %[tmp_t3],     %[const2]  \n" | 
|  | 1625         "dpa.w.ph          $ac3,        %[tmp_t4],     %[const2]  \n" | 
|  | 1626         "extr_r.w          %[tmp_t1],   $ac0,          8          \n" | 
|  | 1627         "extr_r.w          %[tmp_t2],   $ac1,          8          \n" | 
|  | 1628         "extr_r.w          %[tmp_t3],   $ac2,          8          \n" | 
|  | 1629         "extr_r.w          %[tmp_t4],   $ac3,          8          \n" | 
|  | 1630         "addiu             %[dst_y],    %[dst_y],      4          \n" | 
|  | 1631         "addiu             %[src_argb0],%[src_argb0],  16         \n" | 
|  | 1632         "sb                %[tmp_t1],   -4(%[dst_y])              \n" | 
|  | 1633         "sb                %[tmp_t2],   -3(%[dst_y])              \n" | 
|  | 1634         "sb                %[tmp_t3],   -2(%[dst_y])              \n" | 
|  | 1635         "sb                %[tmp_t4],   -1(%[dst_y])              \n" | 
|  | 1636         ".set pop                                                 \n" | 
|  | 1637         : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), | 
|  | 1638           [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), | 
|  | 1639           [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), | 
|  | 1640           [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), | 
|  | 1641           [src_argb0] "+r"(src_argb0), [dst_y] "+r"(dst_y) | 
|  | 1642         : [const1] "r"(const1), [const2] "r"(const2), [const5] "r"(const5) | 
|  | 1643         : "hi", "lo", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo", | 
|  | 1644           "$ac3hi"); | 
|  | 1645   } | 
|  | 1646 } | 
|  | 1647 | 
|  | 1648 void ARGBToUVRow_DSPR2(const uint8* src_rgb0, | 
|  | 1649                        int src_stride_rgb, | 
|  | 1650                        uint8* dst_u, | 
|  | 1651                        uint8* dst_v, | 
|  | 1652                        int width) { | 
|  | 1653   const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; | 
|  | 1654   int x; | 
|  | 1655   int const1 = 0xffb60070; | 
|  | 1656   int const2 = 0x0000ffda; | 
|  | 1657   int const3 = 0xffa2ffee; | 
|  | 1658   int const4 = 0x00000070; | 
|  | 1659   int const5 = 0x100; | 
|  | 1660 | 
|  | 1661   for (x = 0; x < width - 1; x += 2) { | 
|  | 1662     int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5; | 
|  | 1663     int tmp_t6, tmp_t7, tmp_t8; | 
|  | 1664     __asm__ __volatile__( | 
|  | 1665         ".set push                                                \n" | 
|  | 1666         ".set noreorder                                           \n" | 
|  | 1667         "lw                %[tmp_t1],   0(%[src_rgb0])            \n" | 
|  | 1668         "lw                %[tmp_t2],   4(%[src_rgb0])            \n" | 
|  | 1669         "lw                %[tmp_t3],   0(%[src_rgb1])            \n" | 
|  | 1670         "lw                %[tmp_t4],   4(%[src_rgb1])            \n" | 
|  | 1671         "preceu.ph.qbr     %[tmp_t5],   %[tmp_t1]                 \n" | 
|  | 1672         "preceu.ph.qbl     %[tmp_t1],   %[tmp_t1]                 \n" | 
|  | 1673         "preceu.ph.qbr     %[tmp_t6],   %[tmp_t2]                 \n" | 
|  | 1674         "preceu.ph.qbl     %[tmp_t2],   %[tmp_t2]                 \n" | 
|  | 1675         "preceu.ph.qbr     %[tmp_t7],   %[tmp_t3]                 \n" | 
|  | 1676         "preceu.ph.qbl     %[tmp_t3],   %[tmp_t3]                 \n" | 
|  | 1677         "preceu.ph.qbr     %[tmp_t8],   %[tmp_t4]                 \n" | 
|  | 1678         "preceu.ph.qbl     %[tmp_t4],   %[tmp_t4]                 \n" | 
|  | 1679         "addu.ph           %[tmp_t5],   %[tmp_t5],     %[tmp_t6]  \n" | 
|  | 1680         "addu.ph           %[tmp_t7],   %[tmp_t7],     %[tmp_t8]  \n" | 
|  | 1681         "addu.ph           %[tmp_t1],   %[tmp_t1],     %[tmp_t2]  \n" | 
|  | 1682         "addu.ph           %[tmp_t3],   %[tmp_t3],     %[tmp_t4]  \n" | 
|  | 1683         "addu.ph           %[tmp_t5],   %[tmp_t5],     %[tmp_t7]  \n" | 
|  | 1684         "addu.ph           %[tmp_t1],   %[tmp_t1],     %[tmp_t3]  \n" | 
|  | 1685         "shrl.ph           %[tmp_t5],   %[tmp_t5],     2          \n" | 
|  | 1686         "shrl.ph           %[tmp_t1],   %[tmp_t1],     2          \n" | 
|  | 1687         "mult              $ac0,        %[const5],     %[const5]  \n" | 
|  | 1688         "mult              $ac1,        %[const5],     %[const5]  \n" | 
|  | 1689         "dpaq_s.w.ph       $ac0,        %[tmp_t5],     %[const1]  \n" | 
|  | 1690         "dpaq_s.w.ph       $ac1,        %[tmp_t5],     %[const3]  \n" | 
|  | 1691         "dpaq_s.w.ph       $ac0,        %[tmp_t1],     %[const2]  \n" | 
|  | 1692         "dpaq_s.w.ph       $ac1,        %[tmp_t1],     %[const4]  \n" | 
|  | 1693         "extr_r.w          %[tmp_t7],   $ac0,          9          \n" | 
|  | 1694         "extr_r.w          %[tmp_t8],   $ac1,          9          \n" | 
|  | 1695         "addiu             %[src_rgb0], %[src_rgb0], 8            \n" | 
|  | 1696         "addiu             %[src_rgb1], %[src_rgb1], 8            \n" | 
|  | 1697         "addiu             %[dst_u],    %[dst_u],    1            \n" | 
|  | 1698         "addiu             %[dst_v],    %[dst_v],    1            \n" | 
|  | 1699         "sb                %[tmp_t7],   -1(%[dst_u])              \n" | 
|  | 1700         "sb                %[tmp_t8],   -1(%[dst_v])              \n" | 
|  | 1701         ".set pop                                                 \n" | 
|  | 1702         : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), | 
|  | 1703           [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4), | 
|  | 1704           [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6), | 
|  | 1705           [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), | 
|  | 1706           [src_rgb0] "+r"(src_rgb0), [src_rgb1] "+r"(src_rgb1), | 
|  | 1707           [dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v) | 
|  | 1708         : [const1] "r"(const1), [const2] "r"(const2), [const3] "r"(const3), | 
|  | 1709           [const4] "r"(const4), [const5] "r"(const5) | 
|  | 1710         : "hi", "lo", "$ac1lo", "$ac1hi"); | 
|  | 1711   } | 
|  | 1712 } | 
|  | 1713 | 
| 764 #endif  // __mips_dsp_rev >= 2 | 1714 #endif  // __mips_dsp_rev >= 2 | 
| 765 | 1715 | 
| 766 #endif  // defined(__mips__) | 1716 #endif  // defined(__mips__) | 
| 767 | 1717 | 
| 768 #ifdef __cplusplus | 1718 #ifdef __cplusplus | 
| 769 }  // extern "C" | 1719 }  // extern "C" | 
| 770 }  // namespace libyuv | 1720 }  // namespace libyuv | 
| 771 #endif | 1721 #endif | 
| OLD | NEW | 
|---|