| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 ; | 3 ; |
| 4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
| 5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
| 6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
| 7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
| 8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
| 9 ; | 9 ; |
| 10 | 10 |
| 11 | 11 |
| 12 EXPORT |vp8_yv12_extend_frame_borders_neon| | 12 EXPORT |vp8_yv12_extend_frame_borders_neon| |
| 13 ARM | 13 ARM |
| 14 REQUIRE8 | 14 REQUIRE8 |
| 15 PRESERVE8 | 15 PRESERVE8 |
| 16 | 16 |
| 17 INCLUDE asm_com_offsets.asm | 17 INCLUDE asm_com_offsets.asm |
| 18 | 18 |
| 19 AREA ||.text||, CODE, READONLY, ALIGN=2 | 19 AREA ||.text||, CODE, READONLY, ALIGN=2 |
| 20 ;void vp8_yv12_extend_frame_borders_neon (YV12_BUFFER_CONFIG *ybf); | 20 ;void vp8_yv12_extend_frame_borders_neon (YV12_BUFFER_CONFIG *ybf); |
| 21 ;Note: this is VP8 function, which has border=32 and 16. Internal y_width and y_
height | 21 ; we depend on VP8BORDERINPIXELS being 32 |
| 22 ; are always multiples of 16. | |
| 23 | 22 |
| 24 |vp8_yv12_extend_frame_borders_neon| PROC | 23 |vp8_yv12_extend_frame_borders_neon| PROC |
| 25 push {r4 - r10, lr} | 24 push {r4 - r10, lr} |
| 26 vpush {d8 - d15} | 25 vpush {d8 - d15} |
| 27 | 26 |
| 28 ;Not need to load y_width, since: y_width = y_stride - 2*border | 27 ; Border = 32 |
| 29 ldr r3, [r0, #yv12_buffer_config_border] | 28 ldr r3, [r0, #yv12_buffer_config_y_width] ; plane_width |
| 30 ldr r1, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 | 29 ldr r1, [r0, #yv12_buffer_config_y_buffer] ; src_ptr1 |
| 31 ldr r4, [r0, #yv12_buffer_config_y_height] | 30 ldr r4, [r0, #yv12_buffer_config_y_height] ; plane_height |
| 32 ldr lr, [r0, #yv12_buffer_config_y_stride] | 31 ldr lr, [r0, #yv12_buffer_config_y_stride] ; plane_stride |
| 33 | 32 |
| 34 cmp r3, #16 | 33 ; Border copy for Y plane |
| 35 beq b16_extend_frame_borders | 34 ; copy the left and right most columns out |
| 35 add r6, r1, r3 ; dest_ptr2 = src_ptr2 + 1 (src_ptr1
+ plane_width) |
| 36 sub r2, r6, #1 ; src_ptr2 = src_ptr1 + plane_width
- 1 |
| 37 sub r5, r1, #32 ; dest_ptr1 = src_ptr1 - Border |
| 36 | 38 |
| 37 ;======================= | 39 mov r12, r4, lsr #2 ; plane_height / 4 |
| 38 b32_extend_frame_borders | |
| 39 ;border = 32 | |
| 40 ;======================= | |
| 41 ;Border copy for Y plane | |
| 42 ;copy the left and right most columns out | |
| 43 sub r5, r1, r3 ;destptr1 | |
| 44 add r6, r1, lr | |
| 45 sub r6, r6, r3, lsl #1 ;destptr2 | |
| 46 sub r2, r6, #1 ;srcptr2 | |
| 47 | |
| 48 ;Do four rows at one time | |
| 49 mov r12, r4, lsr #2 | |
| 50 | 40 |
| 51 copy_left_right_y | 41 copy_left_right_y |
| 52 vld1.8 {d0[], d1[]}, [r1], lr | 42 vld1.8 {d0[], d1[]}, [r1], lr |
| 53 vld1.8 {d4[], d5[]}, [r2], lr | 43 vld1.8 {d4[], d5[]}, [r2], lr |
| 54 vld1.8 {d8[], d9[]}, [r1], lr | 44 vld1.8 {d8[], d9[]}, [r1], lr |
| 55 vld1.8 {d12[], d13[]}, [r2], lr | 45 vld1.8 {d12[], d13[]}, [r2], lr |
| 56 vld1.8 {d16[], d17[]}, [r1], lr | 46 vld1.8 {d16[], d17[]}, [r1], lr |
| 57 vld1.8 {d20[], d21[]}, [r2], lr | 47 vld1.8 {d20[], d21[]}, [r2], lr |
| 58 vld1.8 {d24[], d25[]}, [r1], lr | 48 vld1.8 {d24[], d25[]}, [r1], lr |
| 59 vld1.8 {d28[], d29[]}, [r2], lr | 49 vld1.8 {d28[], d29[]}, [r2], lr |
| 60 | 50 |
| 61 vmov q1, q0 | 51 vmov q1, q0 |
| 62 vmov q3, q2 | 52 vmov q3, q2 |
| 63 vmov q5, q4 | 53 vmov q5, q4 |
| 64 vmov q7, q6 | 54 vmov q7, q6 |
| 65 vmov q9, q8 | 55 vmov q9, q8 |
| 66 vmov q11, q10 | 56 vmov q11, q10 |
| 67 vmov q13, q12 | 57 vmov q13, q12 |
| 68 vmov q15, q14 | 58 vmov q15, q14 |
| 69 | 59 |
| 70 subs r12, r12, #1 | 60 subs r12, r12, #1 |
| 71 | 61 |
| 72 vst1.8 {q0, q1}, [r5], lr | 62 vst1.8 {q0, q1}, [r5], lr |
| 73 vst1.8 {q2, q3}, [r6], lr | 63 vst1.8 {q2, q3}, [r6], lr |
| 74 vst1.8 {q4, q5}, [r5], lr | 64 vst1.8 {q4, q5}, [r5], lr |
| 75 vst1.8 {q6, q7}, [r6], lr | 65 vst1.8 {q6, q7}, [r6], lr |
| 76 vst1.8 {q8, q9}, [r5], lr | 66 vst1.8 {q8, q9}, [r5], lr |
| 77 vst1.8 {q10, q11}, [r6], lr | 67 vst1.8 {q10, q11}, [r6], lr |
| 78 vst1.8 {q12, q13}, [r5], lr | 68 vst1.8 {q12, q13}, [r5], lr |
| 79 vst1.8 {q14, q15}, [r6], lr | 69 vst1.8 {q14, q15}, [r6], lr |
| 80 | 70 |
| 81 bne copy_left_right_y | 71 bne copy_left_right_y |
| 82 | 72 |
| 83 ;Now copy the top and bottom source lines into each line of the respective borde
rs | 73 ;Now copy the top and bottom source lines into each line of the respective borde
rs |
| 84 ldr r7, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 | 74 ldr r1, [r0, #yv12_buffer_config_y_buffer] ; y_buffer |
| 85 mul r8, r3, lr | 75 mul r8, r4, lr ; plane_height * plane_stride |
| 86 | 76 |
| 87 mov r12, lr, lsr #7 | 77 ; copy width is plane_stride |
| 78 mov r12, lr, lsr #7 ; plane_stride / 128 |
| 88 | 79 |
| 89 sub r6, r1, r3 ;destptr2 | 80 sub r1, r1, #32 ; src_ptr1 = y_buffer - Border |
| 90 sub r2, r6, lr ;srcptr2 | 81 add r6, r1, r8 ; dest_ptr2 = src_ptr2 - plane_strid
e (src_ptr1 + (plane_height * plane_stride)) |
| 91 sub r1, r7, r3 ;srcptr1 | 82 sub r2, r6, lr ; src_ptr2 = src_ptr1 + (plane_heigh
t * plane_stride) - plane_stride |
| 92 sub r5, r1, r8 ;destptr1 | 83 sub r5, r1, lr, asl #5 ; dest_ptr1 = src_ptr1 - (Border * p
lane_stride) |
| 93 | 84 |
| 94 copy_top_bottom_y | 85 copy_top_bottom_y |
| 95 vld1.8 {q0, q1}, [r1]! | 86 vld1.8 {q0, q1}, [r1]! |
| 96 vld1.8 {q8, q9}, [r2]! | 87 vld1.8 {q8, q9}, [r2]! |
| 97 vld1.8 {q2, q3}, [r1]! | 88 vld1.8 {q2, q3}, [r1]! |
| 98 vld1.8 {q10, q11}, [r2]! | 89 vld1.8 {q10, q11}, [r2]! |
| 99 vld1.8 {q4, q5}, [r1]! | 90 vld1.8 {q4, q5}, [r1]! |
| 100 vld1.8 {q12, q13}, [r2]! | 91 vld1.8 {q12, q13}, [r2]! |
| 101 vld1.8 {q6, q7}, [r1]! | 92 vld1.8 {q6, q7}, [r1]! |
| 102 vld1.8 {q14, q15}, [r2]! | 93 vld1.8 {q14, q15}, [r2]! |
| 103 | 94 |
| 104 mov r7, r3 | 95 mov r7, #32 ; Border |
| 105 | 96 |
| 106 top_bottom_32 | 97 top_bottom_32 |
| 107 subs r7, r7, #1 | 98 subs r7, r7, #1 |
| 108 | 99 |
| 109 vst1.8 {q0, q1}, [r5]! | 100 vst1.8 {q0, q1}, [r5]! |
| 110 vst1.8 {q8, q9}, [r6]! | 101 vst1.8 {q8, q9}, [r6]! |
| 111 vst1.8 {q2, q3}, [r5]! | 102 vst1.8 {q2, q3}, [r5]! |
| 112 vst1.8 {q10, q11}, [r6]! | 103 vst1.8 {q10, q11}, [r6]! |
| 113 vst1.8 {q4, q5}, [r5]! | 104 vst1.8 {q4, q5}, [r5]! |
| 114 vst1.8 {q12, q13}, [r6]! | 105 vst1.8 {q12, q13}, [r6]! |
| 115 vst1.8 {q6, q7}, [r5]! | 106 vst1.8 {q6, q7}, [r5]! |
| 116 vst1.8 {q14, q15}, [r6]! | 107 vst1.8 {q14, q15}, [r6]! |
| 117 | 108 |
| 118 add r5, r5, lr | 109 add r5, r5, lr ; dest_ptr1 += plane_stride |
| 119 sub r5, r5, #128 | 110 sub r5, r5, #128 ; dest_ptr1 -= 128 |
| 120 add r6, r6, lr | 111 add r6, r6, lr ; dest_ptr2 += plane_stride |
| 121 sub r6, r6, #128 | 112 sub r6, r6, #128 ; dest_ptr2 -= 128 |
| 122 | 113 |
| 123 bne top_bottom_32 | 114 bne top_bottom_32 |
| 124 | 115 |
| 125 sub r5, r1, r8 | 116 sub r5, r1, lr, asl #5 ; src_ptr1 - (Border* plane_stride) |
| 126 add r6, r2, lr | 117 add r6, r2, lr ; src_ptr2 + plane_stride |
| 127 | 118 |
| 128 subs r12, r12, #1 | 119 subs r12, r12, #1 |
| 129 bne copy_top_bottom_y | 120 bne copy_top_bottom_y |
| 130 | 121 |
| 131 mov r7, lr, lsr #4 ;check to see if extra copy is n
eeded | 122 mov r7, lr, lsr #4 ; check to see if extra copy is need
ed |
| 132 ands r7, r7, #0x7 | 123 ands r7, r7, #0x7 |
| 133 bne extra_top_bottom_y | 124 bne extra_top_bottom_y |
| 134 end_of_border_copy_y | 125 end_of_border_copy_y |
| 135 | 126 |
| 136 ;Border copy for U, V planes | 127 ;Border copy for U, V planes |
| 137 ldr r1, [r0, #yv12_buffer_config_u_buffer] ;srcptr1 | 128 ; Border = 16 |
| 138 mov lr, lr, lsr #1 ;uv_stride | 129 ldr r7, [r0, #yv12_buffer_config_u_buffer] ; src_ptr1 |
| 139 mov r3, r3, lsr #1 ;border | 130 ldr lr, [r0, #yv12_buffer_config_uv_stride] ; plane_stride |
| 140 mov r4, r4, lsr #1 ;uv_height | 131 ldr r3, [r0, #yv12_buffer_config_uv_width] ; plane_width |
| 141 mov r8, r8, lsr #2 | 132 ldr r4, [r0, #yv12_buffer_config_uv_height] ; plane_height |
| 142 | 133 |
| 143 mov r10, #2 | 134 mov r10, #2 |
| 144 | 135 |
| 145 ;copy the left and right most columns out | 136 ;copy the left and right most columns out |
| 146 border_copy_uv | 137 border_copy_uv |
| 147 sub r5, r1, r3 ;destptr1 | 138 mov r1, r7 ; src_ptr1 needs to be saved for sec
ond half of loop |
| 148 add r6, r1, lr | 139 sub r5, r1, #16 ; dest_ptr1 = src_ptr1 - Border |
| 149 sub r6, r6, r3, lsl #1 ;destptr2 | 140 add r6, r1, r3 ; dest_ptr2 = src_ptr2 + 1 (src_ptr1
+ plane_width) |
| 150 sub r2, r6, #1 ;srcptr2 | 141 sub r2, r6, #1 ; src_ptr2 = src_ptr1 + plane_width
- 1 |
| 151 | 142 |
| 152 mov r7, r1 | 143 mov r12, r4, lsr #3 ; plane_height / 8 |
| 153 | |
| 154 ;Do eight rows at one time | |
| 155 mov r12, r4, lsr #3 | |
| 156 | 144 |
| 157 copy_left_right_uv | 145 copy_left_right_uv |
| 158 vld1.8 {d0[], d1[]}, [r1], lr | 146 vld1.8 {d0[], d1[]}, [r1], lr |
| 159 vld1.8 {d2[], d3[]}, [r2], lr | 147 vld1.8 {d2[], d3[]}, [r2], lr |
| 160 vld1.8 {d4[], d5[]}, [r1], lr | 148 vld1.8 {d4[], d5[]}, [r1], lr |
| 161 vld1.8 {d6[], d7[]}, [r2], lr | 149 vld1.8 {d6[], d7[]}, [r2], lr |
| 162 vld1.8 {d8[], d9[]}, [r1], lr | 150 vld1.8 {d8[], d9[]}, [r1], lr |
| 163 vld1.8 {d10[], d11[]}, [r2], lr | 151 vld1.8 {d10[], d11[]}, [r2], lr |
| 164 vld1.8 {d12[], d13[]}, [r1], lr | 152 vld1.8 {d12[], d13[]}, [r1], lr |
| 165 vld1.8 {d14[], d15[]}, [r2], lr | 153 vld1.8 {d14[], d15[]}, [r2], lr |
| 166 vld1.8 {d16[], d17[]}, [r1], lr | 154 vld1.8 {d16[], d17[]}, [r1], lr |
| 167 vld1.8 {d18[], d19[]}, [r2], lr | 155 vld1.8 {d18[], d19[]}, [r2], lr |
| 168 vld1.8 {d20[], d21[]}, [r1], lr | 156 vld1.8 {d20[], d21[]}, [r1], lr |
| 169 vld1.8 {d22[], d23[]}, [r2], lr | 157 vld1.8 {d22[], d23[]}, [r2], lr |
| 170 vld1.8 {d24[], d25[]}, [r1], lr | 158 vld1.8 {d24[], d25[]}, [r1], lr |
| 171 vld1.8 {d26[], d27[]}, [r2], lr | 159 vld1.8 {d26[], d27[]}, [r2], lr |
| 172 vld1.8 {d28[], d29[]}, [r1], lr | 160 vld1.8 {d28[], d29[]}, [r1], lr |
| 173 vld1.8 {d30[], d31[]}, [r2], lr | 161 vld1.8 {d30[], d31[]}, [r2], lr |
| 174 | 162 |
| 175 subs r12, r12, #1 | 163 subs r12, r12, #1 |
| 176 | 164 |
| 177 vst1.8 {q0}, [r5], lr | 165 vst1.8 {q0}, [r5], lr |
| 178 vst1.8 {q1}, [r6], lr | 166 vst1.8 {q1}, [r6], lr |
| 179 vst1.8 {q2}, [r5], lr | 167 vst1.8 {q2}, [r5], lr |
| 180 vst1.8 {q3}, [r6], lr | 168 vst1.8 {q3}, [r6], lr |
| 181 vst1.8 {q4}, [r5], lr | 169 vst1.8 {q4}, [r5], lr |
| 182 vst1.8 {q5}, [r6], lr | 170 vst1.8 {q5}, [r6], lr |
| 183 vst1.8 {q6}, [r5], lr | 171 vst1.8 {q6}, [r5], lr |
| 184 vst1.8 {q7}, [r6], lr | 172 vst1.8 {q7}, [r6], lr |
| 185 vst1.8 {q8}, [r5], lr | 173 vst1.8 {q8}, [r5], lr |
| 186 vst1.8 {q9}, [r6], lr | 174 vst1.8 {q9}, [r6], lr |
| 187 vst1.8 {q10}, [r5], lr | 175 vst1.8 {q10}, [r5], lr |
| 188 vst1.8 {q11}, [r6], lr | 176 vst1.8 {q11}, [r6], lr |
| 189 vst1.8 {q12}, [r5], lr | 177 vst1.8 {q12}, [r5], lr |
| 190 vst1.8 {q13}, [r6], lr | 178 vst1.8 {q13}, [r6], lr |
| 191 vst1.8 {q14}, [r5], lr | 179 vst1.8 {q14}, [r5], lr |
| 192 vst1.8 {q15}, [r6], lr | 180 vst1.8 {q15}, [r6], lr |
| 193 | 181 |
| 194 bne copy_left_right_uv | 182 bne copy_left_right_uv |
| 195 | 183 |
| 196 ;Now copy the top and bottom source lines into each line of the respective borde
rs | 184 ;Now copy the top and bottom source lines into each line of the respective borde
rs |
| 197 mov r12, lr, lsr #6 | 185 mov r1, r7 |
| 186 mul r8, r4, lr ; plane_height * plane_stride |
| 187 mov r12, lr, lsr #6 ; plane_stride / 64 |
| 198 | 188 |
| 199 sub r6, r1, r3 ;destptr2 | 189 sub r1, r1, #16 ; src_ptr1 = u_buffer - Border |
| 200 sub r2, r6, lr ;srcptr2 | 190 add r6, r1, r8 ; dest_ptr2 = src_ptr2 + plane_strid
e (src_ptr1 + (plane_height * plane_stride) |
| 201 sub r1, r7, r3 ;srcptr1 | 191 sub r2, r6, lr ; src_ptr2 = src_ptr1 + (plane_heigh
t * plane_stride) - plane_stride |
| 202 sub r5, r1, r8 ;destptr1 | 192 sub r5, r1, lr, asl #4 ; dest_ptr1 = src_ptr1 - (Border * p
lane_stride) |
| 203 | 193 |
| 204 copy_top_bottom_uv | 194 copy_top_bottom_uv |
| 205 vld1.8 {q0, q1}, [r1]! | 195 vld1.8 {q0, q1}, [r1]! |
| 206 vld1.8 {q8, q9}, [r2]! | 196 vld1.8 {q8, q9}, [r2]! |
| 207 vld1.8 {q2, q3}, [r1]! | 197 vld1.8 {q2, q3}, [r1]! |
| 208 vld1.8 {q10, q11}, [r2]! | 198 vld1.8 {q10, q11}, [r2]! |
| 209 | 199 |
| 210 mov r7, r3 | 200 mov r7, #16 ; Border |
| 211 | 201 |
| 212 top_bottom_16 | 202 top_bottom_16 |
| 213 subs r7, r7, #1 | 203 subs r7, r7, #1 |
| 214 | 204 |
| 215 vst1.8 {q0, q1}, [r5]! | 205 vst1.8 {q0, q1}, [r5]! |
| 216 vst1.8 {q8, q9}, [r6]! | 206 vst1.8 {q8, q9}, [r6]! |
| 217 vst1.8 {q2, q3}, [r5]! | 207 vst1.8 {q2, q3}, [r5]! |
| 218 vst1.8 {q10, q11}, [r6]! | 208 vst1.8 {q10, q11}, [r6]! |
| 219 | 209 |
| 220 add r5, r5, lr | 210 add r5, r5, lr ; dest_ptr1 += plane_stride |
| 221 sub r5, r5, #64 | 211 sub r5, r5, #64 |
| 222 add r6, r6, lr | 212 add r6, r6, lr ; dest_ptr2 += plane_stride |
| 223 sub r6, r6, #64 | 213 sub r6, r6, #64 |
| 224 | 214 |
| 225 bne top_bottom_16 | 215 bne top_bottom_16 |
| 226 | 216 |
| 227 sub r5, r1, r8 | 217 sub r5, r1, lr, asl #4 ; dest_ptr1 = src_ptr1 - (Border * p
lane_stride) |
| 228 add r6, r2, lr | 218 add r6, r2, lr ; dest_ptr2 = src_ptr2 + plane_strid
e |
| 229 | 219 |
| 230 subs r12, r12, #1 | 220 subs r12, r12, #1 |
| 231 bne copy_top_bottom_uv | 221 bne copy_top_bottom_uv |
| 232 | 222 |
| 233 mov r7, lr, lsr #3 ;check to see if extra copy is n
eeded | 223 mov r7, lr, lsr #3 ; check to see if extra copy is need
ed |
| 234 ands r7, r7, #0x7 | 224 ands r7, r7, #0x7 |
| 235 bne extra_top_bottom_uv | 225 bne extra_top_bottom_uv |
| 236 | 226 |
| 237 end_of_border_copy_uv | 227 end_of_border_copy_uv |
| 238 subs r10, r10, #1 | 228 subs r10, r10, #1 |
| 239 ldrne r1, [r0, #yv12_buffer_config_v_buffer] ;srcptr1 | 229 ldrne r7, [r0, #yv12_buffer_config_v_buffer] ; src_ptr1 |
| 240 bne border_copy_uv | 230 bne border_copy_uv |
| 241 | 231 |
| 242 vpop {d8 - d15} | 232 vpop {d8 - d15} |
| 243 pop {r4 - r10, pc} | 233 pop {r4 - r10, pc} |
| 244 | 234 |
| 245 ;;;;;;;;;;;;;;;;;;;;;; | 235 ;;;;;;;;;;;;;;;;;;;;;; |
| 246 ;extra copy part for Y | |
| 247 extra_top_bottom_y | 236 extra_top_bottom_y |
| 248 vld1.8 {q0}, [r1]! | 237 vld1.8 {q0}, [r1]! |
| 249 vld1.8 {q2}, [r2]! | 238 vld1.8 {q2}, [r2]! |
| 250 | 239 |
| 251 mov r9, r3, lsr #3 | 240 mov r9, #4 ; 32 >> 3 |
| 252 | 241 |
| 253 extra_top_bottom_32 | 242 extra_top_bottom_32 |
| 254 subs r9, r9, #1 | 243 subs r9, r9, #1 |
| 255 | 244 |
| 256 vst1.8 {q0}, [r5], lr | 245 vst1.8 {q0}, [r5], lr |
| 257 vst1.8 {q2}, [r6], lr | 246 vst1.8 {q2}, [r6], lr |
| 258 vst1.8 {q0}, [r5], lr | 247 vst1.8 {q0}, [r5], lr |
| 259 vst1.8 {q2}, [r6], lr | 248 vst1.8 {q2}, [r6], lr |
| 260 vst1.8 {q0}, [r5], lr | 249 vst1.8 {q0}, [r5], lr |
| 261 vst1.8 {q2}, [r6], lr | 250 vst1.8 {q2}, [r6], lr |
| 262 vst1.8 {q0}, [r5], lr | 251 vst1.8 {q0}, [r5], lr |
| 263 vst1.8 {q2}, [r6], lr | 252 vst1.8 {q2}, [r6], lr |
| 264 vst1.8 {q0}, [r5], lr | 253 vst1.8 {q0}, [r5], lr |
| 265 vst1.8 {q2}, [r6], lr | 254 vst1.8 {q2}, [r6], lr |
| 266 vst1.8 {q0}, [r5], lr | 255 vst1.8 {q0}, [r5], lr |
| 267 vst1.8 {q2}, [r6], lr | 256 vst1.8 {q2}, [r6], lr |
| 268 vst1.8 {q0}, [r5], lr | 257 vst1.8 {q0}, [r5], lr |
| 269 vst1.8 {q2}, [r6], lr | 258 vst1.8 {q2}, [r6], lr |
| 270 vst1.8 {q0}, [r5], lr | 259 vst1.8 {q0}, [r5], lr |
| 271 vst1.8 {q2}, [r6], lr | 260 vst1.8 {q2}, [r6], lr |
| 272 bne extra_top_bottom_32 | 261 bne extra_top_bottom_32 |
| 273 | 262 |
| 274 sub r5, r1, r8 | 263 sub r5, r1, lr, asl #5 ; src_ptr1 - (Border * plane_stride) |
| 275 add r6, r2, lr | 264 add r6, r2, lr ; src_ptr2 + plane_stride |
| 276 subs r7, r7, #1 | 265 subs r7, r7, #1 |
| 277 bne extra_top_bottom_y | 266 bne extra_top_bottom_y |
| 278 | 267 |
| 279 b end_of_border_copy_y | 268 b end_of_border_copy_y |
| 280 | 269 |
| 281 ;extra copy part for UV | |
| 282 extra_top_bottom_uv | 270 extra_top_bottom_uv |
| 283 vld1.8 {d0}, [r1]! | 271 vld1.8 {d0}, [r1]! |
| 284 vld1.8 {d8}, [r2]! | 272 vld1.8 {d8}, [r2]! |
| 285 | 273 |
| 286 mov r9, r3, lsr #3 | 274 mov r9, #2 ; 16 >> 3 |
| 287 | 275 |
| 288 extra_top_bottom_16 | 276 extra_top_bottom_16 |
| 289 subs r9, r9, #1 | 277 subs r9, r9, #1 |
| 290 | 278 |
| 291 vst1.8 {d0}, [r5], lr | 279 vst1.8 {d0}, [r5], lr |
| 292 vst1.8 {d8}, [r6], lr | 280 vst1.8 {d8}, [r6], lr |
| 293 vst1.8 {d0}, [r5], lr | 281 vst1.8 {d0}, [r5], lr |
| 294 vst1.8 {d8}, [r6], lr | 282 vst1.8 {d8}, [r6], lr |
| 295 vst1.8 {d0}, [r5], lr | 283 vst1.8 {d0}, [r5], lr |
| 296 vst1.8 {d8}, [r6], lr | 284 vst1.8 {d8}, [r6], lr |
| 297 vst1.8 {d0}, [r5], lr | 285 vst1.8 {d0}, [r5], lr |
| 298 vst1.8 {d8}, [r6], lr | 286 vst1.8 {d8}, [r6], lr |
| 299 vst1.8 {d0}, [r5], lr | 287 vst1.8 {d0}, [r5], lr |
| 300 vst1.8 {d8}, [r6], lr | 288 vst1.8 {d8}, [r6], lr |
| 301 vst1.8 {d0}, [r5], lr | 289 vst1.8 {d0}, [r5], lr |
| 302 vst1.8 {d8}, [r6], lr | 290 vst1.8 {d8}, [r6], lr |
| 303 vst1.8 {d0}, [r5], lr | 291 vst1.8 {d0}, [r5], lr |
| 304 vst1.8 {d8}, [r6], lr | 292 vst1.8 {d8}, [r6], lr |
| 305 vst1.8 {d0}, [r5], lr | 293 vst1.8 {d0}, [r5], lr |
| 306 vst1.8 {d8}, [r6], lr | 294 vst1.8 {d8}, [r6], lr |
| 307 bne extra_top_bottom_16 | 295 bne extra_top_bottom_16 |
| 308 | 296 |
| 309 sub r5, r1, r8 | 297 sub r5, r1, lr, asl #4 ; src_ptr1 - (Border * plane_stride) |
| 310 add r6, r2, lr | 298 add r6, r2, lr ; src_ptr2 + plane_stride |
| 311 subs r7, r7, #1 | 299 subs r7, r7, #1 |
| 312 bne extra_top_bottom_uv | 300 bne extra_top_bottom_uv |
| 313 | 301 |
| 314 b end_of_border_copy_uv | 302 b end_of_border_copy_uv |
| 315 | 303 |
| 316 | |
| 317 ;======================= | |
| 318 b16_extend_frame_borders | |
| 319 ;border = 16 | |
| 320 ;======================= | |
| 321 ;Border copy for Y plane | |
| 322 ;copy the left and right most columns out | |
| 323 sub r5, r1, r3 ;destptr1 | |
| 324 add r6, r1, lr | |
| 325 sub r6, r6, r3, lsl #1 ;destptr2 | |
| 326 sub r2, r6, #1 ;srcptr2 | |
| 327 | |
| 328 ;Do four rows at one time | |
| 329 mov r12, r4, lsr #2 | |
| 330 | |
| 331 copy_left_right_y_b16 | |
| 332 vld1.8 {d0[], d1[]}, [r1], lr | |
| 333 vld1.8 {d4[], d5[]}, [r2], lr | |
| 334 vld1.8 {d8[], d9[]}, [r1], lr | |
| 335 vld1.8 {d12[], d13[]}, [r2], lr | |
| 336 vld1.8 {d16[], d17[]}, [r1], lr | |
| 337 vld1.8 {d20[], d21[]}, [r2], lr | |
| 338 vld1.8 {d24[], d25[]}, [r1], lr | |
| 339 vld1.8 {d28[], d29[]}, [r2], lr | |
| 340 | |
| 341 subs r12, r12, #1 | |
| 342 | |
| 343 vst1.8 {q0}, [r5], lr | |
| 344 vst1.8 {q2}, [r6], lr | |
| 345 vst1.8 {q4}, [r5], lr | |
| 346 vst1.8 {q6}, [r6], lr | |
| 347 vst1.8 {q8}, [r5], lr | |
| 348 vst1.8 {q10}, [r6], lr | |
| 349 vst1.8 {q12}, [r5], lr | |
| 350 vst1.8 {q14}, [r6], lr | |
| 351 | |
| 352 bne copy_left_right_y_b16 | |
| 353 | |
| 354 ;Now copy the top and bottom source lines into each line of the respective borde
rs | |
| 355 ldr r7, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 | |
| 356 mul r8, r3, lr | |
| 357 | |
| 358 mov r12, lr, lsr #7 | |
| 359 | |
| 360 sub r6, r1, r3 ;destptr2 | |
| 361 sub r2, r6, lr ;srcptr2 | |
| 362 sub r1, r7, r3 ;srcptr1 | |
| 363 sub r5, r1, r8 ;destptr1 | |
| 364 | |
| 365 copy_top_bottom_y_b16 | |
| 366 vld1.8 {q0, q1}, [r1]! | |
| 367 vld1.8 {q8, q9}, [r2]! | |
| 368 vld1.8 {q2, q3}, [r1]! | |
| 369 vld1.8 {q10, q11}, [r2]! | |
| 370 vld1.8 {q4, q5}, [r1]! | |
| 371 vld1.8 {q12, q13}, [r2]! | |
| 372 vld1.8 {q6, q7}, [r1]! | |
| 373 vld1.8 {q14, q15}, [r2]! | |
| 374 | |
| 375 mov r7, r3 | |
| 376 | |
| 377 top_bottom_16_b16 | |
| 378 subs r7, r7, #1 | |
| 379 | |
| 380 vst1.8 {q0, q1}, [r5]! | |
| 381 vst1.8 {q8, q9}, [r6]! | |
| 382 vst1.8 {q2, q3}, [r5]! | |
| 383 vst1.8 {q10, q11}, [r6]! | |
| 384 vst1.8 {q4, q5}, [r5]! | |
| 385 vst1.8 {q12, q13}, [r6]! | |
| 386 vst1.8 {q6, q7}, [r5]! | |
| 387 vst1.8 {q14, q15}, [r6]! | |
| 388 | |
| 389 add r5, r5, lr | |
| 390 sub r5, r5, #128 | |
| 391 add r6, r6, lr | |
| 392 sub r6, r6, #128 | |
| 393 | |
| 394 bne top_bottom_16_b16 | |
| 395 | |
| 396 sub r5, r1, r8 | |
| 397 add r6, r2, lr | |
| 398 | |
| 399 subs r12, r12, #1 | |
| 400 bne copy_top_bottom_y_b16 | |
| 401 | |
| 402 mov r7, lr, lsr #4 ;check to see if extra copy is n
eeded | |
| 403 ands r7, r7, #0x7 | |
| 404 bne extra_top_bottom_y_b16 | |
| 405 end_of_border_copy_y_b16 | |
| 406 | |
| 407 ;Border copy for U, V planes | |
| 408 ldr r1, [r0, #yv12_buffer_config_u_buffer] ;srcptr1 | |
| 409 mov lr, lr, lsr #1 ;uv_stride | |
| 410 mov r3, r3, lsr #1 ;border | |
| 411 mov r4, r4, lsr #1 ;uv_height | |
| 412 mov r8, r8, lsr #2 | |
| 413 | |
| 414 mov r10, #2 | |
| 415 | |
| 416 ;copy the left and right most columns out | |
| 417 border_copy_uv_b16 | |
| 418 sub r5, r1, r3 ;destptr1 | |
| 419 add r6, r1, lr | |
| 420 sub r6, r6, r3, lsl #1 ;destptr2 | |
| 421 sub r2, r6, #1 ;srcptr2 | |
| 422 | |
| 423 mov r7, r1 | |
| 424 | |
| 425 ;Do eight rows at one time | |
| 426 mov r12, r4, lsr #3 | |
| 427 | |
| 428 copy_left_right_uv_b16 | |
| 429 vld1.8 {d0[]}, [r1], lr | |
| 430 vld1.8 {d2[]}, [r2], lr | |
| 431 vld1.8 {d4[]}, [r1], lr | |
| 432 vld1.8 {d6[]}, [r2], lr | |
| 433 vld1.8 {d8[]}, [r1], lr | |
| 434 vld1.8 {d10[]}, [r2], lr | |
| 435 vld1.8 {d12[]}, [r1], lr | |
| 436 vld1.8 {d14[]}, [r2], lr | |
| 437 vld1.8 {d16[]}, [r1], lr | |
| 438 vld1.8 {d18[]}, [r2], lr | |
| 439 vld1.8 {d20[]}, [r1], lr | |
| 440 vld1.8 {d22[]}, [r2], lr | |
| 441 vld1.8 {d24[]}, [r1], lr | |
| 442 vld1.8 {d26[]}, [r2], lr | |
| 443 vld1.8 {d28[]}, [r1], lr | |
| 444 vld1.8 {d30[]}, [r2], lr | |
| 445 | |
| 446 subs r12, r12, #1 | |
| 447 | |
| 448 vst1.8 {d0}, [r5], lr | |
| 449 vst1.8 {d2}, [r6], lr | |
| 450 vst1.8 {d4}, [r5], lr | |
| 451 vst1.8 {d6}, [r6], lr | |
| 452 vst1.8 {d8}, [r5], lr | |
| 453 vst1.8 {d10}, [r6], lr | |
| 454 vst1.8 {d12}, [r5], lr | |
| 455 vst1.8 {d14}, [r6], lr | |
| 456 vst1.8 {d16}, [r5], lr | |
| 457 vst1.8 {d18}, [r6], lr | |
| 458 vst1.8 {d20}, [r5], lr | |
| 459 vst1.8 {d22}, [r6], lr | |
| 460 vst1.8 {d24}, [r5], lr | |
| 461 vst1.8 {d26}, [r6], lr | |
| 462 vst1.8 {d28}, [r5], lr | |
| 463 vst1.8 {d30}, [r6], lr | |
| 464 | |
| 465 bne copy_left_right_uv_b16 | |
| 466 | |
| 467 ;Now copy the top and bottom source lines into each line of the respective borde
rs | |
| 468 mov r12, lr, lsr #6 | |
| 469 | |
| 470 sub r6, r1, r3 ;destptr2 | |
| 471 sub r2, r6, lr ;srcptr2 | |
| 472 sub r1, r7, r3 ;srcptr1 | |
| 473 sub r5, r1, r8 ;destptr1 | |
| 474 | |
| 475 copy_top_bottom_uv_b16 | |
| 476 vld1.8 {q0, q1}, [r1]! | |
| 477 vld1.8 {q8, q9}, [r2]! | |
| 478 vld1.8 {q2, q3}, [r1]! | |
| 479 vld1.8 {q10, q11}, [r2]! | |
| 480 | |
| 481 mov r7, r3 | |
| 482 | |
| 483 top_bottom_8_b16 | |
| 484 subs r7, r7, #1 | |
| 485 | |
| 486 vst1.8 {q0, q1}, [r5]! | |
| 487 vst1.8 {q8, q9}, [r6]! | |
| 488 vst1.8 {q2, q3}, [r5]! | |
| 489 vst1.8 {q10, q11}, [r6]! | |
| 490 | |
| 491 add r5, r5, lr | |
| 492 sub r5, r5, #64 | |
| 493 add r6, r6, lr | |
| 494 sub r6, r6, #64 | |
| 495 | |
| 496 bne top_bottom_8_b16 | |
| 497 | |
| 498 sub r5, r1, r8 | |
| 499 add r6, r2, lr | |
| 500 | |
| 501 subs r12, r12, #1 | |
| 502 bne copy_top_bottom_uv_b16 | |
| 503 | |
| 504 mov r7, lr, lsr #3 ;check to see if extra copy is n
eeded | |
| 505 ands r7, r7, #0x7 | |
| 506 bne extra_top_bottom_uv_b16 | |
| 507 | |
| 508 end_of_border_copy_uv_b16 | |
| 509 subs r10, r10, #1 | |
| 510 ldrne r1, [r0, #yv12_buffer_config_v_buffer] ;srcptr1 | |
| 511 bne border_copy_uv_b16 | |
| 512 | |
| 513 vpop {d8-d15} | |
| 514 pop {r4 - r10, pc} | |
| 515 | |
| 516 ;;;;;;;;;;;;;;;;;;;;;; | |
| 517 ;extra copy part for Y | |
| 518 extra_top_bottom_y_b16 | |
| 519 vld1.8 {q0}, [r1]! | |
| 520 vld1.8 {q2}, [r2]! | |
| 521 | |
| 522 mov r9, r3, lsr #3 | |
| 523 | |
| 524 extra_top_bottom_16_b16 | |
| 525 subs r9, r9, #1 | |
| 526 | |
| 527 vst1.8 {q0}, [r5], lr | |
| 528 vst1.8 {q2}, [r6], lr | |
| 529 vst1.8 {q0}, [r5], lr | |
| 530 vst1.8 {q2}, [r6], lr | |
| 531 vst1.8 {q0}, [r5], lr | |
| 532 vst1.8 {q2}, [r6], lr | |
| 533 vst1.8 {q0}, [r5], lr | |
| 534 vst1.8 {q2}, [r6], lr | |
| 535 vst1.8 {q0}, [r5], lr | |
| 536 vst1.8 {q2}, [r6], lr | |
| 537 vst1.8 {q0}, [r5], lr | |
| 538 vst1.8 {q2}, [r6], lr | |
| 539 vst1.8 {q0}, [r5], lr | |
| 540 vst1.8 {q2}, [r6], lr | |
| 541 vst1.8 {q0}, [r5], lr | |
| 542 vst1.8 {q2}, [r6], lr | |
| 543 bne extra_top_bottom_16_b16 | |
| 544 | |
| 545 sub r5, r1, r8 | |
| 546 add r6, r2, lr | |
| 547 subs r7, r7, #1 | |
| 548 bne extra_top_bottom_y_b16 | |
| 549 | |
| 550 b end_of_border_copy_y_b16 | |
| 551 | |
| 552 ;extra copy part for UV | |
| 553 extra_top_bottom_uv_b16 | |
| 554 vld1.8 {d0}, [r1]! | |
| 555 vld1.8 {d8}, [r2]! | |
| 556 | |
| 557 mov r9, r3, lsr #3 | |
| 558 | |
| 559 extra_top_bottom_8_b16 | |
| 560 subs r9, r9, #1 | |
| 561 | |
| 562 vst1.8 {d0}, [r5], lr | |
| 563 vst1.8 {d8}, [r6], lr | |
| 564 vst1.8 {d0}, [r5], lr | |
| 565 vst1.8 {d8}, [r6], lr | |
| 566 vst1.8 {d0}, [r5], lr | |
| 567 vst1.8 {d8}, [r6], lr | |
| 568 vst1.8 {d0}, [r5], lr | |
| 569 vst1.8 {d8}, [r6], lr | |
| 570 vst1.8 {d0}, [r5], lr | |
| 571 vst1.8 {d8}, [r6], lr | |
| 572 vst1.8 {d0}, [r5], lr | |
| 573 vst1.8 {d8}, [r6], lr | |
| 574 vst1.8 {d0}, [r5], lr | |
| 575 vst1.8 {d8}, [r6], lr | |
| 576 vst1.8 {d0}, [r5], lr | |
| 577 vst1.8 {d8}, [r6], lr | |
| 578 bne extra_top_bottom_8_b16 | |
| 579 | |
| 580 sub r5, r1, r8 | |
| 581 add r6, r2, lr | |
| 582 subs r7, r7, #1 | |
| 583 bne extra_top_bottom_uv_b16 | |
| 584 | |
| 585 b end_of_border_copy_uv_b16 | |
| 586 | |
| 587 ENDP | 304 ENDP |
| 588 END | 305 END |
| OLD | NEW |