| OLD | NEW | 
|---|
| 1 ; | 1 ; | 
| 2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 
| 3 ; | 3 ; | 
| 4 ;  Use of this source code is governed by a BSD-style license | 4 ;  Use of this source code is governed by a BSD-style license | 
| 5 ;  that can be found in the LICENSE file in the root of the source | 5 ;  that can be found in the LICENSE file in the root of the source | 
| 6 ;  tree. An additional intellectual property rights grant can be found | 6 ;  tree. An additional intellectual property rights grant can be found | 
| 7 ;  in the file PATENTS.  All contributing project authors may | 7 ;  in the file PATENTS.  All contributing project authors may | 
| 8 ;  be found in the AUTHORS file in the root of the source tree. | 8 ;  be found in the AUTHORS file in the root of the source tree. | 
| 9 ; | 9 ; | 
| 10 | 10 | 
| 11 | 11 | 
| 12     EXPORT  |vp8_yv12_extend_frame_borders_neon| | 12     EXPORT  |vp8_yv12_extend_frame_borders_neon| | 
| 13     ARM | 13     ARM | 
| 14     REQUIRE8 | 14     REQUIRE8 | 
| 15     PRESERVE8 | 15     PRESERVE8 | 
| 16 | 16 | 
| 17     INCLUDE asm_com_offsets.asm | 17     INCLUDE asm_com_offsets.asm | 
| 18 | 18 | 
| 19     AREA ||.text||, CODE, READONLY, ALIGN=2 | 19     AREA ||.text||, CODE, READONLY, ALIGN=2 | 
| 20 ;void vp8_yv12_extend_frame_borders_neon (YV12_BUFFER_CONFIG *ybf); | 20 ;void vp8_yv12_extend_frame_borders_neon (YV12_BUFFER_CONFIG *ybf); | 
| 21 ;Note: this is VP8 function, which has border=32 and 16. Internal y_width and y_
     height | 21 ; we depend on VP8BORDERINPIXELS being 32 | 
| 22 ; are always multiples of 16. |  | 
| 23 | 22 | 
| 24 |vp8_yv12_extend_frame_borders_neon| PROC | 23 |vp8_yv12_extend_frame_borders_neon| PROC | 
| 25     push            {r4 - r10, lr} | 24     push            {r4 - r10, lr} | 
| 26     vpush           {d8 - d15} | 25     vpush           {d8 - d15} | 
| 27 | 26 | 
| 28     ;Not need to load y_width, since: y_width = y_stride - 2*border | 27     ; Border = 32 | 
| 29     ldr             r3, [r0, #yv12_buffer_config_border] | 28     ldr             r3, [r0, #yv12_buffer_config_y_width]  ; plane_width | 
| 30     ldr             r1, [r0, #yv12_buffer_config_y_buffer]       ;srcptr1 | 29     ldr             r1, [r0, #yv12_buffer_config_y_buffer] ; src_ptr1 | 
| 31     ldr             r4, [r0, #yv12_buffer_config_y_height] | 30     ldr             r4, [r0, #yv12_buffer_config_y_height] ; plane_height | 
| 32     ldr             lr, [r0, #yv12_buffer_config_y_stride] | 31     ldr             lr, [r0, #yv12_buffer_config_y_stride] ; plane_stride | 
| 33 | 32 | 
| 34     cmp             r3, #16 | 33 ; Border copy for Y plane | 
| 35     beq             b16_extend_frame_borders | 34 ; copy the left and right most columns out | 
|  | 35     add             r6, r1, r3              ; dest_ptr2 = src_ptr2 + 1 (src_ptr1
      + plane_width) | 
|  | 36     sub             r2, r6, #1              ; src_ptr2 = src_ptr1 + plane_width 
     - 1 | 
|  | 37     sub             r5, r1, #32             ; dest_ptr1 = src_ptr1 - Border | 
| 36 | 38 | 
| 37 ;======================= | 39     mov             r12, r4, lsr #2         ; plane_height / 4 | 
| 38 b32_extend_frame_borders |  | 
| 39 ;border = 32 |  | 
| 40 ;======================= |  | 
| 41 ;Border copy for Y plane |  | 
| 42 ;copy the left and right most columns out |  | 
| 43     sub             r5, r1, r3              ;destptr1 |  | 
| 44     add             r6, r1, lr |  | 
| 45     sub             r6, r6, r3, lsl #1      ;destptr2 |  | 
| 46     sub             r2, r6, #1              ;srcptr2 |  | 
| 47 |  | 
| 48     ;Do four rows at one time |  | 
| 49     mov             r12, r4, lsr #2 |  | 
| 50 | 40 | 
| 51 copy_left_right_y | 41 copy_left_right_y | 
| 52     vld1.8          {d0[], d1[]}, [r1], lr | 42     vld1.8          {d0[], d1[]}, [r1], lr | 
| 53     vld1.8          {d4[], d5[]}, [r2], lr | 43     vld1.8          {d4[], d5[]}, [r2], lr | 
| 54     vld1.8          {d8[], d9[]}, [r1], lr | 44     vld1.8          {d8[], d9[]}, [r1], lr | 
| 55     vld1.8          {d12[], d13[]}, [r2], lr | 45     vld1.8          {d12[], d13[]}, [r2], lr | 
| 56     vld1.8          {d16[], d17[]},  [r1], lr | 46     vld1.8          {d16[], d17[]}, [r1], lr | 
| 57     vld1.8          {d20[], d21[]}, [r2], lr | 47     vld1.8          {d20[], d21[]}, [r2], lr | 
| 58     vld1.8          {d24[], d25[]}, [r1], lr | 48     vld1.8          {d24[], d25[]}, [r1], lr | 
| 59     vld1.8          {d28[], d29[]}, [r2], lr | 49     vld1.8          {d28[], d29[]}, [r2], lr | 
| 60 | 50 | 
| 61     vmov            q1, q0 | 51     vmov            q1, q0 | 
| 62     vmov            q3, q2 | 52     vmov            q3, q2 | 
| 63     vmov            q5, q4 | 53     vmov            q5, q4 | 
| 64     vmov            q7, q6 | 54     vmov            q7, q6 | 
| 65     vmov            q9, q8 | 55     vmov            q9, q8 | 
| 66     vmov            q11, q10 | 56     vmov            q11, q10 | 
| 67     vmov            q13, q12 | 57     vmov            q13, q12 | 
| 68     vmov            q15, q14 | 58     vmov            q15, q14 | 
| 69 | 59 | 
| 70     subs            r12, r12, #1 | 60     subs            r12, r12, #1 | 
| 71 | 61 | 
| 72     vst1.8          {q0, q1}, [r5], lr | 62     vst1.8          {q0, q1}, [r5], lr | 
| 73     vst1.8          {q2, q3}, [r6], lr | 63     vst1.8          {q2, q3}, [r6], lr | 
| 74     vst1.8          {q4, q5}, [r5], lr | 64     vst1.8          {q4, q5}, [r5], lr | 
| 75     vst1.8          {q6, q7}, [r6], lr | 65     vst1.8          {q6, q7}, [r6], lr | 
| 76     vst1.8          {q8, q9}, [r5], lr | 66     vst1.8          {q8, q9}, [r5], lr | 
| 77     vst1.8          {q10, q11}, [r6], lr | 67     vst1.8          {q10, q11}, [r6], lr | 
| 78     vst1.8          {q12, q13}, [r5], lr | 68     vst1.8          {q12, q13}, [r5], lr | 
| 79     vst1.8          {q14, q15}, [r6], lr | 69     vst1.8          {q14, q15}, [r6], lr | 
| 80 | 70 | 
| 81     bne             copy_left_right_y | 71     bne             copy_left_right_y | 
| 82 | 72 | 
| 83 ;Now copy the top and bottom source lines into each line of the respective borde
     rs | 73 ;Now copy the top and bottom source lines into each line of the respective borde
     rs | 
| 84     ldr             r7, [r0, #yv12_buffer_config_y_buffer]       ;srcptr1 | 74     ldr             r1, [r0, #yv12_buffer_config_y_buffer] ; y_buffer | 
| 85     mul             r8, r3, lr | 75     mul             r8, r4, lr              ; plane_height * plane_stride | 
| 86 | 76 | 
| 87     mov             r12, lr, lsr #7 | 77     ; copy width is plane_stride | 
|  | 78     mov             r12, lr, lsr #7         ; plane_stride / 128 | 
| 88 | 79 | 
| 89     sub             r6, r1, r3              ;destptr2 | 80     sub             r1, r1, #32             ; src_ptr1 = y_buffer - Border | 
| 90     sub             r2, r6, lr              ;srcptr2 | 81     add             r6, r1, r8              ; dest_ptr2 = src_ptr2 - plane_strid
     e (src_ptr1 + (plane_height * plane_stride)) | 
| 91     sub             r1, r7, r3              ;srcptr1 | 82     sub             r2, r6, lr              ; src_ptr2 = src_ptr1 + (plane_heigh
     t * plane_stride) - plane_stride | 
| 92     sub             r5, r1, r8              ;destptr1 | 83     sub             r5, r1, lr, asl #5      ; dest_ptr1 = src_ptr1 - (Border * p
     lane_stride) | 
| 93 | 84 | 
| 94 copy_top_bottom_y | 85 copy_top_bottom_y | 
| 95     vld1.8          {q0, q1}, [r1]! | 86     vld1.8          {q0, q1}, [r1]! | 
| 96     vld1.8          {q8, q9}, [r2]! | 87     vld1.8          {q8, q9}, [r2]! | 
| 97     vld1.8          {q2, q3}, [r1]! | 88     vld1.8          {q2, q3}, [r1]! | 
| 98     vld1.8          {q10, q11}, [r2]! | 89     vld1.8          {q10, q11}, [r2]! | 
| 99     vld1.8          {q4, q5}, [r1]! | 90     vld1.8          {q4, q5}, [r1]! | 
| 100     vld1.8          {q12, q13}, [r2]! | 91     vld1.8          {q12, q13}, [r2]! | 
| 101     vld1.8          {q6, q7}, [r1]! | 92     vld1.8          {q6, q7}, [r1]! | 
| 102     vld1.8          {q14, q15}, [r2]! | 93     vld1.8          {q14, q15}, [r2]! | 
| 103 | 94 | 
| 104     mov             r7, r3 | 95     mov             r7, #32                 ; Border | 
| 105 | 96 | 
| 106 top_bottom_32 | 97 top_bottom_32 | 
| 107     subs            r7, r7, #1 | 98     subs            r7, r7, #1 | 
| 108 | 99 | 
| 109     vst1.8          {q0, q1}, [r5]! | 100     vst1.8          {q0, q1}, [r5]! | 
| 110     vst1.8          {q8, q9}, [r6]! | 101     vst1.8          {q8, q9}, [r6]! | 
| 111     vst1.8          {q2, q3}, [r5]! | 102     vst1.8          {q2, q3}, [r5]! | 
| 112     vst1.8          {q10, q11}, [r6]! | 103     vst1.8          {q10, q11}, [r6]! | 
| 113     vst1.8          {q4, q5}, [r5]! | 104     vst1.8          {q4, q5}, [r5]! | 
| 114     vst1.8          {q12, q13}, [r6]! | 105     vst1.8          {q12, q13}, [r6]! | 
| 115     vst1.8          {q6, q7}, [r5]! | 106     vst1.8          {q6, q7}, [r5]! | 
| 116     vst1.8          {q14, q15}, [r6]! | 107     vst1.8          {q14, q15}, [r6]! | 
| 117 | 108 | 
| 118     add             r5, r5, lr | 109     add             r5, r5, lr              ; dest_ptr1 += plane_stride | 
| 119     sub             r5, r5, #128 | 110     sub             r5, r5, #128            ; dest_ptr1 -= 128 | 
| 120     add             r6, r6, lr | 111     add             r6, r6, lr              ; dest_ptr2 += plane_stride | 
| 121     sub             r6, r6, #128 | 112     sub             r6, r6, #128            ; dest_ptr2 -= 128 | 
| 122 | 113 | 
| 123     bne             top_bottom_32 | 114     bne             top_bottom_32 | 
| 124 | 115 | 
| 125     sub             r5, r1, r8 | 116     sub             r5, r1, lr, asl #5      ; src_ptr1 - (Border* plane_stride) | 
| 126     add             r6, r2, lr | 117     add             r6, r2, lr              ; src_ptr2 + plane_stride | 
| 127 | 118 | 
| 128     subs            r12, r12, #1 | 119     subs            r12, r12, #1 | 
| 129     bne             copy_top_bottom_y | 120     bne             copy_top_bottom_y | 
| 130 | 121 | 
| 131     mov             r7, lr, lsr #4              ;check to see if extra copy is n
     eeded | 122     mov             r7, lr, lsr #4          ; check to see if extra copy is need
     ed | 
| 132     ands            r7, r7, #0x7 | 123     ands            r7, r7, #0x7 | 
| 133     bne             extra_top_bottom_y | 124     bne             extra_top_bottom_y | 
| 134 end_of_border_copy_y | 125 end_of_border_copy_y | 
| 135 | 126 | 
| 136 ;Border copy for U, V planes | 127 ;Border copy for U, V planes | 
| 137     ldr             r1, [r0, #yv12_buffer_config_u_buffer]       ;srcptr1 | 128 ; Border = 16 | 
| 138     mov             lr, lr, lsr #1              ;uv_stride | 129     ldr             r7, [r0, #yv12_buffer_config_u_buffer]  ; src_ptr1 | 
| 139     mov             r3, r3, lsr #1              ;border | 130     ldr             lr, [r0, #yv12_buffer_config_uv_stride] ; plane_stride | 
| 140     mov             r4, r4, lsr #1              ;uv_height | 131     ldr             r3, [r0, #yv12_buffer_config_uv_width]  ; plane_width | 
| 141     mov             r8, r8, lsr #2 | 132     ldr             r4, [r0, #yv12_buffer_config_uv_height] ; plane_height | 
| 142 | 133 | 
| 143     mov             r10, #2 | 134     mov             r10, #2 | 
| 144 | 135 | 
| 145 ;copy the left and right most columns out | 136 ;copy the left and right most columns out | 
| 146 border_copy_uv | 137 border_copy_uv | 
| 147     sub             r5, r1, r3              ;destptr1 | 138     mov             r1, r7                  ; src_ptr1 needs to be saved for sec
     ond half of loop | 
| 148     add             r6, r1, lr | 139     sub             r5, r1, #16             ; dest_ptr1 = src_ptr1 - Border | 
| 149     sub             r6, r6, r3, lsl #1      ;destptr2 | 140     add             r6, r1, r3              ; dest_ptr2 = src_ptr2 + 1 (src_ptr1
      + plane_width) | 
| 150     sub             r2, r6, #1              ;srcptr2 | 141     sub             r2, r6, #1              ; src_ptr2 = src_ptr1 + plane_width 
     - 1 | 
| 151 | 142 | 
| 152     mov             r7, r1 | 143     mov             r12, r4, lsr #3         ; plane_height / 8 | 
| 153 |  | 
| 154     ;Do eight rows at one time |  | 
| 155     mov             r12, r4, lsr #3 |  | 
| 156 | 144 | 
| 157 copy_left_right_uv | 145 copy_left_right_uv | 
| 158     vld1.8          {d0[], d1[]}, [r1], lr | 146     vld1.8          {d0[], d1[]}, [r1], lr | 
| 159     vld1.8          {d2[], d3[]}, [r2], lr | 147     vld1.8          {d2[], d3[]}, [r2], lr | 
| 160     vld1.8          {d4[], d5[]}, [r1], lr | 148     vld1.8          {d4[], d5[]}, [r1], lr | 
| 161     vld1.8          {d6[], d7[]}, [r2], lr | 149     vld1.8          {d6[], d7[]}, [r2], lr | 
| 162     vld1.8          {d8[], d9[]},  [r1], lr | 150     vld1.8          {d8[], d9[]},  [r1], lr | 
| 163     vld1.8          {d10[], d11[]}, [r2], lr | 151     vld1.8          {d10[], d11[]}, [r2], lr | 
| 164     vld1.8          {d12[], d13[]}, [r1], lr | 152     vld1.8          {d12[], d13[]}, [r1], lr | 
| 165     vld1.8          {d14[], d15[]}, [r2], lr | 153     vld1.8          {d14[], d15[]}, [r2], lr | 
| 166     vld1.8          {d16[], d17[]}, [r1], lr | 154     vld1.8          {d16[], d17[]}, [r1], lr | 
| 167     vld1.8          {d18[], d19[]}, [r2], lr | 155     vld1.8          {d18[], d19[]}, [r2], lr | 
| 168     vld1.8          {d20[], d21[]}, [r1], lr | 156     vld1.8          {d20[], d21[]}, [r1], lr | 
| 169     vld1.8          {d22[], d23[]}, [r2], lr | 157     vld1.8          {d22[], d23[]}, [r2], lr | 
| 170     vld1.8          {d24[], d25[]},  [r1], lr | 158     vld1.8          {d24[], d25[]}, [r1], lr | 
| 171     vld1.8          {d26[], d27[]}, [r2], lr | 159     vld1.8          {d26[], d27[]}, [r2], lr | 
| 172     vld1.8          {d28[], d29[]}, [r1], lr | 160     vld1.8          {d28[], d29[]}, [r1], lr | 
| 173     vld1.8          {d30[], d31[]}, [r2], lr | 161     vld1.8          {d30[], d31[]}, [r2], lr | 
| 174 | 162 | 
| 175     subs            r12, r12, #1 | 163     subs            r12, r12, #1 | 
| 176 | 164 | 
| 177     vst1.8          {q0}, [r5], lr | 165     vst1.8          {q0}, [r5], lr | 
| 178     vst1.8          {q1}, [r6], lr | 166     vst1.8          {q1}, [r6], lr | 
| 179     vst1.8          {q2}, [r5], lr | 167     vst1.8          {q2}, [r5], lr | 
| 180     vst1.8          {q3}, [r6], lr | 168     vst1.8          {q3}, [r6], lr | 
| 181     vst1.8          {q4}, [r5], lr | 169     vst1.8          {q4}, [r5], lr | 
| 182     vst1.8          {q5}, [r6], lr | 170     vst1.8          {q5}, [r6], lr | 
| 183     vst1.8          {q6}, [r5], lr | 171     vst1.8          {q6}, [r5], lr | 
| 184     vst1.8          {q7}, [r6], lr | 172     vst1.8          {q7}, [r6], lr | 
| 185     vst1.8          {q8}, [r5], lr | 173     vst1.8          {q8}, [r5], lr | 
| 186     vst1.8          {q9}, [r6], lr | 174     vst1.8          {q9}, [r6], lr | 
| 187     vst1.8          {q10}, [r5], lr | 175     vst1.8          {q10}, [r5], lr | 
| 188     vst1.8          {q11}, [r6], lr | 176     vst1.8          {q11}, [r6], lr | 
| 189     vst1.8          {q12}, [r5], lr | 177     vst1.8          {q12}, [r5], lr | 
| 190     vst1.8          {q13}, [r6], lr | 178     vst1.8          {q13}, [r6], lr | 
| 191     vst1.8          {q14}, [r5], lr | 179     vst1.8          {q14}, [r5], lr | 
| 192     vst1.8          {q15}, [r6], lr | 180     vst1.8          {q15}, [r6], lr | 
| 193 | 181 | 
| 194     bne             copy_left_right_uv | 182     bne             copy_left_right_uv | 
| 195 | 183 | 
| 196 ;Now copy the top and bottom source lines into each line of the respective borde
     rs | 184 ;Now copy the top and bottom source lines into each line of the respective borde
     rs | 
| 197     mov             r12, lr, lsr #6 | 185     mov             r1, r7 | 
|  | 186     mul             r8, r4, lr              ; plane_height * plane_stride | 
|  | 187     mov             r12, lr, lsr #6         ; plane_stride / 64 | 
| 198 | 188 | 
| 199     sub             r6, r1, r3              ;destptr2 | 189     sub             r1, r1, #16             ; src_ptr1 = u_buffer - Border | 
| 200     sub             r2, r6, lr              ;srcptr2 | 190     add             r6, r1, r8              ; dest_ptr2 = src_ptr2 + plane_strid
     e (src_ptr1 + (plane_height * plane_stride) | 
| 201     sub             r1, r7, r3              ;srcptr1 | 191     sub             r2, r6, lr              ; src_ptr2 = src_ptr1 + (plane_heigh
     t * plane_stride) - plane_stride | 
| 202     sub             r5, r1, r8              ;destptr1 | 192     sub             r5, r1, lr, asl #4      ; dest_ptr1 = src_ptr1 - (Border * p
     lane_stride) | 
| 203 | 193 | 
| 204 copy_top_bottom_uv | 194 copy_top_bottom_uv | 
| 205     vld1.8          {q0, q1}, [r1]! | 195     vld1.8          {q0, q1}, [r1]! | 
| 206     vld1.8          {q8, q9}, [r2]! | 196     vld1.8          {q8, q9}, [r2]! | 
| 207     vld1.8          {q2, q3}, [r1]! | 197     vld1.8          {q2, q3}, [r1]! | 
| 208     vld1.8          {q10, q11}, [r2]! | 198     vld1.8          {q10, q11}, [r2]! | 
| 209 | 199 | 
| 210     mov             r7, r3 | 200     mov             r7, #16                 ; Border | 
| 211 | 201 | 
| 212 top_bottom_16 | 202 top_bottom_16 | 
| 213     subs            r7, r7, #1 | 203     subs            r7, r7, #1 | 
| 214 | 204 | 
| 215     vst1.8          {q0, q1}, [r5]! | 205     vst1.8          {q0, q1}, [r5]! | 
| 216     vst1.8          {q8, q9}, [r6]! | 206     vst1.8          {q8, q9}, [r6]! | 
| 217     vst1.8          {q2, q3}, [r5]! | 207     vst1.8          {q2, q3}, [r5]! | 
| 218     vst1.8          {q10, q11}, [r6]! | 208     vst1.8          {q10, q11}, [r6]! | 
| 219 | 209 | 
| 220     add             r5, r5, lr | 210     add             r5, r5, lr              ; dest_ptr1 += plane_stride | 
| 221     sub             r5, r5, #64 | 211     sub             r5, r5, #64 | 
| 222     add             r6, r6, lr | 212     add             r6, r6, lr              ; dest_ptr2 += plane_stride | 
| 223     sub             r6, r6, #64 | 213     sub             r6, r6, #64 | 
| 224 | 214 | 
| 225     bne             top_bottom_16 | 215     bne             top_bottom_16 | 
| 226 | 216 | 
| 227     sub             r5, r1, r8 | 217     sub             r5, r1, lr, asl #4      ; dest_ptr1 = src_ptr1 - (Border * p
     lane_stride) | 
| 228     add             r6, r2, lr | 218     add             r6, r2, lr              ; dest_ptr2 = src_ptr2 + plane_strid
     e | 
| 229 | 219 | 
| 230     subs            r12, r12, #1 | 220     subs            r12, r12, #1 | 
| 231     bne             copy_top_bottom_uv | 221     bne             copy_top_bottom_uv | 
| 232 | 222 | 
| 233     mov             r7, lr, lsr #3              ;check to see if extra copy is n
     eeded | 223     mov             r7, lr, lsr #3          ; check to see if extra copy is need
     ed | 
| 234     ands            r7, r7, #0x7 | 224     ands            r7, r7, #0x7 | 
| 235     bne             extra_top_bottom_uv | 225     bne             extra_top_bottom_uv | 
| 236 | 226 | 
| 237 end_of_border_copy_uv | 227 end_of_border_copy_uv | 
| 238     subs            r10, r10, #1 | 228     subs            r10, r10, #1 | 
| 239     ldrne           r1, [r0, #yv12_buffer_config_v_buffer]       ;srcptr1 | 229     ldrne           r7, [r0, #yv12_buffer_config_v_buffer] ; src_ptr1 | 
| 240     bne             border_copy_uv | 230     bne             border_copy_uv | 
| 241 | 231 | 
| 242     vpop            {d8 - d15} | 232     vpop            {d8 - d15} | 
| 243     pop             {r4 - r10, pc} | 233     pop             {r4 - r10, pc} | 
| 244 | 234 | 
| 245 ;;;;;;;;;;;;;;;;;;;;;; | 235 ;;;;;;;;;;;;;;;;;;;;;; | 
| 246 ;extra copy part for Y |  | 
| 247 extra_top_bottom_y | 236 extra_top_bottom_y | 
| 248     vld1.8          {q0}, [r1]! | 237     vld1.8          {q0}, [r1]! | 
| 249     vld1.8          {q2}, [r2]! | 238     vld1.8          {q2}, [r2]! | 
| 250 | 239 | 
| 251     mov             r9, r3, lsr #3 | 240     mov             r9, #4                  ; 32 >> 3 | 
| 252 | 241 | 
| 253 extra_top_bottom_32 | 242 extra_top_bottom_32 | 
| 254     subs            r9, r9, #1 | 243     subs            r9, r9, #1 | 
| 255 | 244 | 
| 256     vst1.8          {q0}, [r5], lr | 245     vst1.8          {q0}, [r5], lr | 
| 257     vst1.8          {q2}, [r6], lr | 246     vst1.8          {q2}, [r6], lr | 
| 258     vst1.8          {q0}, [r5], lr | 247     vst1.8          {q0}, [r5], lr | 
| 259     vst1.8          {q2}, [r6], lr | 248     vst1.8          {q2}, [r6], lr | 
| 260     vst1.8          {q0}, [r5], lr | 249     vst1.8          {q0}, [r5], lr | 
| 261     vst1.8          {q2}, [r6], lr | 250     vst1.8          {q2}, [r6], lr | 
| 262     vst1.8          {q0}, [r5], lr | 251     vst1.8          {q0}, [r5], lr | 
| 263     vst1.8          {q2}, [r6], lr | 252     vst1.8          {q2}, [r6], lr | 
| 264     vst1.8          {q0}, [r5], lr | 253     vst1.8          {q0}, [r5], lr | 
| 265     vst1.8          {q2}, [r6], lr | 254     vst1.8          {q2}, [r6], lr | 
| 266     vst1.8          {q0}, [r5], lr | 255     vst1.8          {q0}, [r5], lr | 
| 267     vst1.8          {q2}, [r6], lr | 256     vst1.8          {q2}, [r6], lr | 
| 268     vst1.8          {q0}, [r5], lr | 257     vst1.8          {q0}, [r5], lr | 
| 269     vst1.8          {q2}, [r6], lr | 258     vst1.8          {q2}, [r6], lr | 
| 270     vst1.8          {q0}, [r5], lr | 259     vst1.8          {q0}, [r5], lr | 
| 271     vst1.8          {q2}, [r6], lr | 260     vst1.8          {q2}, [r6], lr | 
| 272     bne             extra_top_bottom_32 | 261     bne             extra_top_bottom_32 | 
| 273 | 262 | 
| 274     sub             r5, r1, r8 | 263     sub             r5, r1, lr, asl #5      ; src_ptr1 - (Border * plane_stride) | 
| 275     add             r6, r2, lr | 264     add             r6, r2, lr              ; src_ptr2 + plane_stride | 
| 276     subs            r7, r7, #1 | 265     subs            r7, r7, #1 | 
| 277     bne             extra_top_bottom_y | 266     bne             extra_top_bottom_y | 
| 278 | 267 | 
| 279     b               end_of_border_copy_y | 268     b               end_of_border_copy_y | 
| 280 | 269 | 
| 281 ;extra copy part for UV |  | 
| 282 extra_top_bottom_uv | 270 extra_top_bottom_uv | 
| 283     vld1.8          {d0}, [r1]! | 271     vld1.8          {d0}, [r1]! | 
| 284     vld1.8          {d8}, [r2]! | 272     vld1.8          {d8}, [r2]! | 
| 285 | 273 | 
| 286     mov             r9, r3, lsr #3 | 274     mov             r9, #2                  ; 16 >> 3 | 
| 287 | 275 | 
| 288 extra_top_bottom_16 | 276 extra_top_bottom_16 | 
| 289     subs            r9, r9, #1 | 277     subs            r9, r9, #1 | 
| 290 | 278 | 
| 291     vst1.8          {d0}, [r5], lr | 279     vst1.8          {d0}, [r5], lr | 
| 292     vst1.8          {d8}, [r6], lr | 280     vst1.8          {d8}, [r6], lr | 
| 293     vst1.8          {d0}, [r5], lr | 281     vst1.8          {d0}, [r5], lr | 
| 294     vst1.8          {d8}, [r6], lr | 282     vst1.8          {d8}, [r6], lr | 
| 295     vst1.8          {d0}, [r5], lr | 283     vst1.8          {d0}, [r5], lr | 
| 296     vst1.8          {d8}, [r6], lr | 284     vst1.8          {d8}, [r6], lr | 
| 297     vst1.8          {d0}, [r5], lr | 285     vst1.8          {d0}, [r5], lr | 
| 298     vst1.8          {d8}, [r6], lr | 286     vst1.8          {d8}, [r6], lr | 
| 299     vst1.8          {d0}, [r5], lr | 287     vst1.8          {d0}, [r5], lr | 
| 300     vst1.8          {d8}, [r6], lr | 288     vst1.8          {d8}, [r6], lr | 
| 301     vst1.8          {d0}, [r5], lr | 289     vst1.8          {d0}, [r5], lr | 
| 302     vst1.8          {d8}, [r6], lr | 290     vst1.8          {d8}, [r6], lr | 
| 303     vst1.8          {d0}, [r5], lr | 291     vst1.8          {d0}, [r5], lr | 
| 304     vst1.8          {d8}, [r6], lr | 292     vst1.8          {d8}, [r6], lr | 
| 305     vst1.8          {d0}, [r5], lr | 293     vst1.8          {d0}, [r5], lr | 
| 306     vst1.8          {d8}, [r6], lr | 294     vst1.8          {d8}, [r6], lr | 
| 307     bne             extra_top_bottom_16 | 295     bne             extra_top_bottom_16 | 
| 308 | 296 | 
| 309     sub             r5, r1, r8 | 297     sub             r5, r1, lr, asl #4      ; src_ptr1 - (Border * plane_stride) | 
| 310     add             r6, r2, lr | 298     add             r6, r2, lr              ; src_ptr2 + plane_stride | 
| 311     subs            r7, r7, #1 | 299     subs            r7, r7, #1 | 
| 312     bne             extra_top_bottom_uv | 300     bne             extra_top_bottom_uv | 
| 313 | 301 | 
| 314     b               end_of_border_copy_uv | 302     b               end_of_border_copy_uv | 
| 315 | 303 | 
| 316 |  | 
| 317 ;======================= |  | 
| 318 b16_extend_frame_borders |  | 
| 319 ;border = 16 |  | 
| 320 ;======================= |  | 
| 321 ;Border copy for Y plane |  | 
| 322 ;copy the left and right most columns out |  | 
| 323     sub             r5, r1, r3              ;destptr1 |  | 
| 324     add             r6, r1, lr |  | 
| 325     sub             r6, r6, r3, lsl #1      ;destptr2 |  | 
| 326     sub             r2, r6, #1              ;srcptr2 |  | 
| 327 |  | 
| 328     ;Do four rows at one time |  | 
| 329     mov             r12, r4, lsr #2 |  | 
| 330 |  | 
| 331 copy_left_right_y_b16 |  | 
| 332     vld1.8          {d0[], d1[]}, [r1], lr |  | 
| 333     vld1.8          {d4[], d5[]}, [r2], lr |  | 
| 334     vld1.8          {d8[], d9[]}, [r1], lr |  | 
| 335     vld1.8          {d12[], d13[]}, [r2], lr |  | 
| 336     vld1.8          {d16[], d17[]},  [r1], lr |  | 
| 337     vld1.8          {d20[], d21[]}, [r2], lr |  | 
| 338     vld1.8          {d24[], d25[]}, [r1], lr |  | 
| 339     vld1.8          {d28[], d29[]}, [r2], lr |  | 
| 340 |  | 
| 341     subs            r12, r12, #1 |  | 
| 342 |  | 
| 343     vst1.8          {q0}, [r5], lr |  | 
| 344     vst1.8          {q2}, [r6], lr |  | 
| 345     vst1.8          {q4}, [r5], lr |  | 
| 346     vst1.8          {q6}, [r6], lr |  | 
| 347     vst1.8          {q8}, [r5], lr |  | 
| 348     vst1.8          {q10}, [r6], lr |  | 
| 349     vst1.8          {q12}, [r5], lr |  | 
| 350     vst1.8          {q14}, [r6], lr |  | 
| 351 |  | 
| 352     bne             copy_left_right_y_b16 |  | 
| 353 |  | 
| 354 ;Now copy the top and bottom source lines into each line of the respective borde
     rs |  | 
| 355     ldr             r7, [r0, #yv12_buffer_config_y_buffer]       ;srcptr1 |  | 
| 356     mul             r8, r3, lr |  | 
| 357 |  | 
| 358     mov             r12, lr, lsr #7 |  | 
| 359 |  | 
| 360     sub             r6, r1, r3              ;destptr2 |  | 
| 361     sub             r2, r6, lr              ;srcptr2 |  | 
| 362     sub             r1, r7, r3              ;srcptr1 |  | 
| 363     sub             r5, r1, r8              ;destptr1 |  | 
| 364 |  | 
| 365 copy_top_bottom_y_b16 |  | 
| 366     vld1.8          {q0, q1}, [r1]! |  | 
| 367     vld1.8          {q8, q9}, [r2]! |  | 
| 368     vld1.8          {q2, q3}, [r1]! |  | 
| 369     vld1.8          {q10, q11}, [r2]! |  | 
| 370     vld1.8          {q4, q5}, [r1]! |  | 
| 371     vld1.8          {q12, q13}, [r2]! |  | 
| 372     vld1.8          {q6, q7}, [r1]! |  | 
| 373     vld1.8          {q14, q15}, [r2]! |  | 
| 374 |  | 
| 375     mov             r7, r3 |  | 
| 376 |  | 
| 377 top_bottom_16_b16 |  | 
| 378     subs            r7, r7, #1 |  | 
| 379 |  | 
| 380     vst1.8          {q0, q1}, [r5]! |  | 
| 381     vst1.8          {q8, q9}, [r6]! |  | 
| 382     vst1.8          {q2, q3}, [r5]! |  | 
| 383     vst1.8          {q10, q11}, [r6]! |  | 
| 384     vst1.8          {q4, q5}, [r5]! |  | 
| 385     vst1.8          {q12, q13}, [r6]! |  | 
| 386     vst1.8          {q6, q7}, [r5]! |  | 
| 387     vst1.8          {q14, q15}, [r6]! |  | 
| 388 |  | 
| 389     add             r5, r5, lr |  | 
| 390     sub             r5, r5, #128 |  | 
| 391     add             r6, r6, lr |  | 
| 392     sub             r6, r6, #128 |  | 
| 393 |  | 
| 394     bne             top_bottom_16_b16 |  | 
| 395 |  | 
| 396     sub             r5, r1, r8 |  | 
| 397     add             r6, r2, lr |  | 
| 398 |  | 
| 399     subs            r12, r12, #1 |  | 
| 400     bne             copy_top_bottom_y_b16 |  | 
| 401 |  | 
| 402     mov             r7, lr, lsr #4              ;check to see if extra copy is n
     eeded |  | 
| 403     ands            r7, r7, #0x7 |  | 
| 404     bne             extra_top_bottom_y_b16 |  | 
| 405 end_of_border_copy_y_b16 |  | 
| 406 |  | 
| 407 ;Border copy for U, V planes |  | 
| 408     ldr             r1, [r0, #yv12_buffer_config_u_buffer]       ;srcptr1 |  | 
| 409     mov             lr, lr, lsr #1              ;uv_stride |  | 
| 410     mov             r3, r3, lsr #1              ;border |  | 
| 411     mov             r4, r4, lsr #1              ;uv_height |  | 
| 412     mov             r8, r8, lsr #2 |  | 
| 413 |  | 
| 414     mov             r10, #2 |  | 
| 415 |  | 
| 416 ;copy the left and right most columns out |  | 
| 417 border_copy_uv_b16 |  | 
| 418     sub             r5, r1, r3              ;destptr1 |  | 
| 419     add             r6, r1, lr |  | 
| 420     sub             r6, r6, r3, lsl #1      ;destptr2 |  | 
| 421     sub             r2, r6, #1              ;srcptr2 |  | 
| 422 |  | 
| 423     mov             r7, r1 |  | 
| 424 |  | 
| 425     ;Do eight rows at one time |  | 
| 426     mov             r12, r4, lsr #3 |  | 
| 427 |  | 
| 428 copy_left_right_uv_b16 |  | 
| 429     vld1.8          {d0[]}, [r1], lr |  | 
| 430     vld1.8          {d2[]}, [r2], lr |  | 
| 431     vld1.8          {d4[]}, [r1], lr |  | 
| 432     vld1.8          {d6[]}, [r2], lr |  | 
| 433     vld1.8          {d8[]},  [r1], lr |  | 
| 434     vld1.8          {d10[]}, [r2], lr |  | 
| 435     vld1.8          {d12[]}, [r1], lr |  | 
| 436     vld1.8          {d14[]}, [r2], lr |  | 
| 437     vld1.8          {d16[]}, [r1], lr |  | 
| 438     vld1.8          {d18[]}, [r2], lr |  | 
| 439     vld1.8          {d20[]}, [r1], lr |  | 
| 440     vld1.8          {d22[]}, [r2], lr |  | 
| 441     vld1.8          {d24[]},  [r1], lr |  | 
| 442     vld1.8          {d26[]}, [r2], lr |  | 
| 443     vld1.8          {d28[]}, [r1], lr |  | 
| 444     vld1.8          {d30[]}, [r2], lr |  | 
| 445 |  | 
| 446     subs            r12, r12, #1 |  | 
| 447 |  | 
| 448     vst1.8          {d0}, [r5], lr |  | 
| 449     vst1.8          {d2}, [r6], lr |  | 
| 450     vst1.8          {d4}, [r5], lr |  | 
| 451     vst1.8          {d6}, [r6], lr |  | 
| 452     vst1.8          {d8}, [r5], lr |  | 
| 453     vst1.8          {d10}, [r6], lr |  | 
| 454     vst1.8          {d12}, [r5], lr |  | 
| 455     vst1.8          {d14}, [r6], lr |  | 
| 456     vst1.8          {d16}, [r5], lr |  | 
| 457     vst1.8          {d18}, [r6], lr |  | 
| 458     vst1.8          {d20}, [r5], lr |  | 
| 459     vst1.8          {d22}, [r6], lr |  | 
| 460     vst1.8          {d24}, [r5], lr |  | 
| 461     vst1.8          {d26}, [r6], lr |  | 
| 462     vst1.8          {d28}, [r5], lr |  | 
| 463     vst1.8          {d30}, [r6], lr |  | 
| 464 |  | 
| 465     bne             copy_left_right_uv_b16 |  | 
| 466 |  | 
| 467 ;Now copy the top and bottom source lines into each line of the respective borde
     rs |  | 
| 468     mov             r12, lr, lsr #6 |  | 
| 469 |  | 
| 470     sub             r6, r1, r3              ;destptr2 |  | 
| 471     sub             r2, r6, lr              ;srcptr2 |  | 
| 472     sub             r1, r7, r3              ;srcptr1 |  | 
| 473     sub             r5, r1, r8              ;destptr1 |  | 
| 474 |  | 
| 475 copy_top_bottom_uv_b16 |  | 
| 476     vld1.8          {q0, q1}, [r1]! |  | 
| 477     vld1.8          {q8, q9}, [r2]! |  | 
| 478     vld1.8          {q2, q3}, [r1]! |  | 
| 479     vld1.8          {q10, q11}, [r2]! |  | 
| 480 |  | 
| 481     mov             r7, r3 |  | 
| 482 |  | 
| 483 top_bottom_8_b16 |  | 
| 484     subs            r7, r7, #1 |  | 
| 485 |  | 
| 486     vst1.8          {q0, q1}, [r5]! |  | 
| 487     vst1.8          {q8, q9}, [r6]! |  | 
| 488     vst1.8          {q2, q3}, [r5]! |  | 
| 489     vst1.8          {q10, q11}, [r6]! |  | 
| 490 |  | 
| 491     add             r5, r5, lr |  | 
| 492     sub             r5, r5, #64 |  | 
| 493     add             r6, r6, lr |  | 
| 494     sub             r6, r6, #64 |  | 
| 495 |  | 
| 496     bne             top_bottom_8_b16 |  | 
| 497 |  | 
| 498     sub             r5, r1, r8 |  | 
| 499     add             r6, r2, lr |  | 
| 500 |  | 
| 501     subs            r12, r12, #1 |  | 
| 502     bne             copy_top_bottom_uv_b16 |  | 
| 503 |  | 
| 504     mov             r7, lr, lsr #3              ;check to see if extra copy is n
     eeded |  | 
| 505     ands            r7, r7, #0x7 |  | 
| 506     bne             extra_top_bottom_uv_b16 |  | 
| 507 |  | 
| 508 end_of_border_copy_uv_b16 |  | 
| 509     subs            r10, r10, #1 |  | 
| 510     ldrne           r1, [r0, #yv12_buffer_config_v_buffer]       ;srcptr1 |  | 
| 511     bne             border_copy_uv_b16 |  | 
| 512 |  | 
| 513     vpop            {d8-d15} |  | 
| 514     pop             {r4 - r10, pc} |  | 
| 515 |  | 
| 516 ;;;;;;;;;;;;;;;;;;;;;; |  | 
| 517 ;extra copy part for Y |  | 
| 518 extra_top_bottom_y_b16 |  | 
| 519     vld1.8          {q0}, [r1]! |  | 
| 520     vld1.8          {q2}, [r2]! |  | 
| 521 |  | 
| 522     mov             r9, r3, lsr #3 |  | 
| 523 |  | 
| 524 extra_top_bottom_16_b16 |  | 
| 525     subs            r9, r9, #1 |  | 
| 526 |  | 
| 527     vst1.8          {q0}, [r5], lr |  | 
| 528     vst1.8          {q2}, [r6], lr |  | 
| 529     vst1.8          {q0}, [r5], lr |  | 
| 530     vst1.8          {q2}, [r6], lr |  | 
| 531     vst1.8          {q0}, [r5], lr |  | 
| 532     vst1.8          {q2}, [r6], lr |  | 
| 533     vst1.8          {q0}, [r5], lr |  | 
| 534     vst1.8          {q2}, [r6], lr |  | 
| 535     vst1.8          {q0}, [r5], lr |  | 
| 536     vst1.8          {q2}, [r6], lr |  | 
| 537     vst1.8          {q0}, [r5], lr |  | 
| 538     vst1.8          {q2}, [r6], lr |  | 
| 539     vst1.8          {q0}, [r5], lr |  | 
| 540     vst1.8          {q2}, [r6], lr |  | 
| 541     vst1.8          {q0}, [r5], lr |  | 
| 542     vst1.8          {q2}, [r6], lr |  | 
| 543     bne             extra_top_bottom_16_b16 |  | 
| 544 |  | 
| 545     sub             r5, r1, r8 |  | 
| 546     add             r6, r2, lr |  | 
| 547     subs            r7, r7, #1 |  | 
| 548     bne             extra_top_bottom_y_b16 |  | 
| 549 |  | 
| 550     b               end_of_border_copy_y_b16 |  | 
| 551 |  | 
| 552 ;extra copy part for UV |  | 
| 553 extra_top_bottom_uv_b16 |  | 
| 554     vld1.8          {d0}, [r1]! |  | 
| 555     vld1.8          {d8}, [r2]! |  | 
| 556 |  | 
| 557     mov             r9, r3, lsr #3 |  | 
| 558 |  | 
| 559 extra_top_bottom_8_b16 |  | 
| 560     subs            r9, r9, #1 |  | 
| 561 |  | 
| 562     vst1.8          {d0}, [r5], lr |  | 
| 563     vst1.8          {d8}, [r6], lr |  | 
| 564     vst1.8          {d0}, [r5], lr |  | 
| 565     vst1.8          {d8}, [r6], lr |  | 
| 566     vst1.8          {d0}, [r5], lr |  | 
| 567     vst1.8          {d8}, [r6], lr |  | 
| 568     vst1.8          {d0}, [r5], lr |  | 
| 569     vst1.8          {d8}, [r6], lr |  | 
| 570     vst1.8          {d0}, [r5], lr |  | 
| 571     vst1.8          {d8}, [r6], lr |  | 
| 572     vst1.8          {d0}, [r5], lr |  | 
| 573     vst1.8          {d8}, [r6], lr |  | 
| 574     vst1.8          {d0}, [r5], lr |  | 
| 575     vst1.8          {d8}, [r6], lr |  | 
| 576     vst1.8          {d0}, [r5], lr |  | 
| 577     vst1.8          {d8}, [r6], lr |  | 
| 578     bne             extra_top_bottom_8_b16 |  | 
| 579 |  | 
| 580     sub             r5, r1, r8 |  | 
| 581     add             r6, r2, lr |  | 
| 582     subs            r7, r7, #1 |  | 
| 583     bne             extra_top_bottom_uv_b16 |  | 
| 584 |  | 
| 585     b               end_of_border_copy_uv_b16 |  | 
| 586 |  | 
| 587     ENDP | 304     ENDP | 
| 588     END | 305     END | 
| OLD | NEW | 
|---|