OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2009 Mans Rullgard <mans@mansr.com> | 2 * Copyright (c) 2009 Mans Rullgard <mans@mansr.com> |
3 * | 3 * |
4 * This file is part of FFmpeg. | 4 * This file is part of FFmpeg. |
5 * | 5 * |
6 * FFmpeg is free software; you can redistribute it and/or | 6 * FFmpeg is free software; you can redistribute it and/or |
7 * modify it under the terms of the GNU Lesser General Public | 7 * modify it under the terms of the GNU Lesser General Public |
8 * License as published by the Free Software Foundation; either | 8 * License as published by the Free Software Foundation; either |
9 * version 2.1 of the License, or (at your option) any later version. | 9 * version 2.1 of the License, or (at your option) any later version. |
10 * | 10 * |
11 * FFmpeg is distributed in the hope that it will be useful, | 11 * FFmpeg is distributed in the hope that it will be useful, |
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 * Lesser General Public License for more details. | 14 * Lesser General Public License for more details. |
15 * | 15 * |
16 * You should have received a copy of the GNU Lesser General Public | 16 * You should have received a copy of the GNU Lesser General Public |
17 * License along with FFmpeg; if not, write to the Free Software | 17 * License along with FFmpeg; if not, write to the Free Software |
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 */ | 19 */ |
20 | 20 |
21 #include "asm.S" | 21 #include "asm.S" |
22 | 22 |
| 23 preserve8 |
| 24 |
23 .text | 25 .text |
24 | 26 |
| 27 .macro call_2x_pixels type, subp |
| 28 function ff_\type\()_pixels16\subp\()_armv6, export=1 |
| 29 push {r0-r3, lr} |
| 30 bl ff_\type\()_pixels8\subp\()_armv6 |
| 31 pop {r0-r3, lr} |
| 32 add r0, r0, #8 |
| 33 add r1, r1, #8 |
| 34 b ff_\type\()_pixels8\subp\()_armv6 |
| 35 .endfunc |
| 36 .endm |
| 37 |
| 38 call_2x_pixels avg |
| 39 call_2x_pixels put, _x2 |
| 40 call_2x_pixels put, _y2 |
| 41 call_2x_pixels put, _x2_no_rnd |
| 42 call_2x_pixels put, _y2_no_rnd |
| 43 |
| 44 function ff_put_pixels16_armv6, export=1 |
| 45 push {r4-r11} |
| 46 1: |
| 47 ldr r5, [r1, #4] |
| 48 ldr r6, [r1, #8] |
| 49 ldr r7, [r1, #12] |
| 50 ldr r4, [r1], r2 |
| 51 strd r6, r7, [r0, #8] |
| 52 ldr r9, [r1, #4] |
| 53 strd r4, r5, [r0], r2 |
| 54 ldr r10, [r1, #8] |
| 55 ldr r11, [r1, #12] |
| 56 ldr r8, [r1], r2 |
| 57 strd r10, r11, [r0, #8] |
| 58 subs r3, r3, #2 |
| 59 strd r8, r9, [r0], r2 |
| 60 bne 1b |
| 61 |
| 62 pop {r4-r11} |
| 63 bx lr |
| 64 .endfunc |
| 65 |
| 66 function ff_put_pixels8_armv6, export=1 |
| 67 push {r4-r7} |
| 68 1: |
| 69 ldr r5, [r1, #4] |
| 70 ldr r4, [r1], r2 |
| 71 ldr r7, [r1, #4] |
| 72 strd r4, r5, [r0], r2 |
| 73 ldr r6, [r1], r2 |
| 74 subs r3, r3, #2 |
| 75 strd r6, r7, [r0], r2 |
| 76 bne 1b |
| 77 |
| 78 pop {r4-r7} |
| 79 bx lr |
| 80 .endfunc |
| 81 |
| 82 function ff_put_pixels8_x2_armv6, export=1 |
| 83 push {r4-r11, lr} |
| 84 mov r12, #1 |
| 85 orr r12, r12, r12, lsl #8 |
| 86 orr r12, r12, r12, lsl #16 |
| 87 1: |
| 88 ldr r4, [r1] |
| 89 subs r3, r3, #2 |
| 90 ldr r5, [r1, #4] |
| 91 ldr r7, [r1, #5] |
| 92 lsr r6, r4, #8 |
| 93 ldr r8, [r1, r2]! |
| 94 orr r6, r6, r5, lsl #24 |
| 95 ldr r9, [r1, #4] |
| 96 ldr r11, [r1, #5] |
| 97 lsr r10, r8, #8 |
| 98 add r1, r1, r2 |
| 99 orr r10, r10, r9, lsl #24 |
| 100 eor r14, r4, r6 |
| 101 uhadd8 r4, r4, r6 |
| 102 eor r6, r5, r7 |
| 103 uhadd8 r5, r5, r7 |
| 104 and r14, r14, r12 |
| 105 and r6, r6, r12 |
| 106 uadd8 r4, r4, r14 |
| 107 eor r14, r8, r10 |
| 108 uadd8 r5, r5, r6 |
| 109 eor r6, r9, r11 |
| 110 uhadd8 r8, r8, r10 |
| 111 and r14, r14, r12 |
| 112 uhadd8 r9, r9, r11 |
| 113 and r6, r6, r12 |
| 114 uadd8 r8, r8, r14 |
| 115 strd r4, r5, [r0], r2 |
| 116 uadd8 r9, r9, r6 |
| 117 strd r8, r9, [r0], r2 |
| 118 bne 1b |
| 119 |
| 120 pop {r4-r11, pc} |
| 121 .endfunc |
| 122 |
| 123 function ff_put_pixels8_y2_armv6, export=1 |
| 124 push {r4-r11} |
| 125 mov r12, #1 |
| 126 orr r12, r12, r12, lsl #8 |
| 127 orr r12, r12, r12, lsl #16 |
| 128 ldr r4, [r1] |
| 129 ldr r5, [r1, #4] |
| 130 ldr r6, [r1, r2]! |
| 131 ldr r7, [r1, #4] |
| 132 1: |
| 133 subs r3, r3, #2 |
| 134 uhadd8 r8, r4, r6 |
| 135 eor r10, r4, r6 |
| 136 uhadd8 r9, r5, r7 |
| 137 eor r11, r5, r7 |
| 138 and r10, r10, r12 |
| 139 ldr r4, [r1, r2]! |
| 140 uadd8 r8, r8, r10 |
| 141 and r11, r11, r12 |
| 142 uadd8 r9, r9, r11 |
| 143 ldr r5, [r1, #4] |
| 144 uhadd8 r10, r4, r6 |
| 145 eor r6, r4, r6 |
| 146 uhadd8 r11, r5, r7 |
| 147 and r6, r6, r12 |
| 148 eor r7, r5, r7 |
| 149 uadd8 r10, r10, r6 |
| 150 and r7, r7, r12 |
| 151 ldr r6, [r1, r2]! |
| 152 uadd8 r11, r11, r7 |
| 153 strd r8, r9, [r0], r2 |
| 154 ldr r7, [r1, #4] |
| 155 strd r10, r11, [r0], r2 |
| 156 bne 1b |
| 157 |
| 158 pop {r4-r11} |
| 159 bx lr |
| 160 .endfunc |
| 161 |
| 162 function ff_put_pixels8_x2_no_rnd_armv6, export=1 |
| 163 push {r4-r9, lr} |
| 164 1: |
| 165 subs r3, r3, #2 |
| 166 ldr r4, [r1] |
| 167 ldr r5, [r1, #4] |
| 168 ldr r7, [r1, #5] |
| 169 ldr r8, [r1, r2]! |
| 170 ldr r9, [r1, #4] |
| 171 ldr r14, [r1, #5] |
| 172 add r1, r1, r2 |
| 173 lsr r6, r4, #8 |
| 174 orr r6, r6, r5, lsl #24 |
| 175 lsr r12, r8, #8 |
| 176 orr r12, r12, r9, lsl #24 |
| 177 uhadd8 r4, r4, r6 |
| 178 uhadd8 r5, r5, r7 |
| 179 uhadd8 r8, r8, r12 |
| 180 uhadd8 r9, r9, r14 |
| 181 stm r0, {r4,r5} |
| 182 add r0, r0, r2 |
| 183 stm r0, {r8,r9} |
| 184 add r0, r0, r2 |
| 185 bne 1b |
| 186 |
| 187 pop {r4-r9, pc} |
| 188 .endfunc |
| 189 |
| 190 function ff_put_pixels8_y2_no_rnd_armv6, export=1 |
| 191 push {r4-r9, lr} |
| 192 ldr r4, [r1] |
| 193 ldr r5, [r1, #4] |
| 194 ldr r6, [r1, r2]! |
| 195 ldr r7, [r1, #4] |
| 196 1: |
| 197 subs r3, r3, #2 |
| 198 uhadd8 r8, r4, r6 |
| 199 ldr r4, [r1, r2]! |
| 200 uhadd8 r9, r5, r7 |
| 201 ldr r5, [r1, #4] |
| 202 uhadd8 r12, r4, r6 |
| 203 ldr r6, [r1, r2]! |
| 204 uhadd8 r14, r5, r7 |
| 205 ldr r7, [r1, #4] |
| 206 stm r0, {r8,r9} |
| 207 add r0, r0, r2 |
| 208 stm r0, {r12,r14} |
| 209 add r0, r0, r2 |
| 210 bne 1b |
| 211 |
| 212 pop {r4-r9, pc} |
| 213 .endfunc |
| 214 |
| 215 function ff_avg_pixels8_armv6, export=1 |
| 216 pld [r1, r2] |
| 217 push {r4-r10, lr} |
| 218 mov lr, #1 |
| 219 orr lr, lr, lr, lsl #8 |
| 220 orr lr, lr, lr, lsl #16 |
| 221 ldrd r4, r5, [r0] |
| 222 ldr r10, [r1, #4] |
| 223 ldr r9, [r1], r2 |
| 224 subs r3, r3, #2 |
| 225 1: |
| 226 pld [r1, r2] |
| 227 eor r8, r4, r9 |
| 228 uhadd8 r4, r4, r9 |
| 229 eor r12, r5, r10 |
| 230 ldrd r6, r7, [r0, r2] |
| 231 uhadd8 r5, r5, r10 |
| 232 and r8, r8, lr |
| 233 ldr r10, [r1, #4] |
| 234 and r12, r12, lr |
| 235 uadd8 r4, r4, r8 |
| 236 ldr r9, [r1], r2 |
| 237 eor r8, r6, r9 |
| 238 uadd8 r5, r5, r12 |
| 239 pld [r1, r2, lsl #1] |
| 240 eor r12, r7, r10 |
| 241 uhadd8 r6, r6, r9 |
| 242 strd r4, r5, [r0], r2 |
| 243 uhadd8 r7, r7, r10 |
| 244 beq 2f |
| 245 and r8, r8, lr |
| 246 ldrd r4, r5, [r0, r2] |
| 247 uadd8 r6, r6, r8 |
| 248 ldr r10, [r1, #4] |
| 249 and r12, r12, lr |
| 250 subs r3, r3, #2 |
| 251 uadd8 r7, r7, r12 |
| 252 ldr r9, [r1], r2 |
| 253 strd r6, r7, [r0], r2 |
| 254 b 1b |
| 255 2: |
| 256 and r8, r8, lr |
| 257 and r12, r12, lr |
| 258 uadd8 r6, r6, r8 |
| 259 uadd8 r7, r7, r12 |
| 260 strd r6, r7, [r0], r2 |
| 261 |
| 262 pop {r4-r10, pc} |
| 263 .endfunc |
| 264 |
25 function ff_add_pixels_clamped_armv6, export=1 | 265 function ff_add_pixels_clamped_armv6, export=1 |
26 push {r4-r8,lr} | 266 push {r4-r8,lr} |
27 mov r3, #8 | 267 mov r3, #8 |
28 1: | 268 1: |
29 ldm r0!, {r4,r5,r12,lr} | 269 ldm r0!, {r4,r5,r12,lr} |
30 ldrd r6, r7, [r1] | 270 ldrd r6, r7, [r1] |
31 pkhbt r8, r4, r5, lsl #16 | 271 pkhbt r8, r4, r5, lsl #16 |
32 pkhtb r5, r5, r4, asr #16 | 272 pkhtb r5, r5, r4, asr #16 |
33 pkhbt r4, r12, lr, lsl #16 | 273 pkhbt r4, r12, lr, lsl #16 |
34 pkhtb lr, lr, r12, asr #16 | 274 pkhtb lr, lr, r12, asr #16 |
35 pld [r1, r2] | 275 pld [r1, r2] |
36 uxtab16 r8, r8, r6 | 276 uxtab16 r8, r8, r6 |
37 uxtab16 r5, r5, r6, ror #8 | 277 uxtab16 r5, r5, r6, ror #8 |
38 uxtab16 r4, r4, r7 | 278 uxtab16 r4, r4, r7 |
39 uxtab16 lr, lr, r7, ror #8 | 279 uxtab16 lr, lr, r7, ror #8 |
40 usat16 r8, #8, r8 | 280 usat16 r8, #8, r8 |
41 usat16 r5, #8, r5 | 281 usat16 r5, #8, r5 |
42 usat16 r4, #8, r4 | 282 usat16 r4, #8, r4 |
43 usat16 lr, #8, lr | 283 usat16 lr, #8, lr |
44 orr r6, r8, r5, lsl #8 | 284 orr r6, r8, r5, lsl #8 |
45 orr r7, r4, lr, lsl #8 | 285 orr r7, r4, lr, lsl #8 |
46 subs r3, r3, #1 | 286 subs r3, r3, #1 |
47 strd r6, r7, [r1], r2 | 287 strd r6, r7, [r1], r2 |
48 bgt 1b | 288 bgt 1b |
49 pop {r4-r8,pc} | 289 pop {r4-r8,pc} |
50 .endfunc | 290 .endfunc |
| 291 |
| 292 function ff_get_pixels_armv6, export=1 |
| 293 pld [r1, r2] |
| 294 push {r4-r8, lr} |
| 295 mov lr, #8 |
| 296 1: |
| 297 ldrd r4, r5, [r1], r2 |
| 298 subs lr, lr, #1 |
| 299 uxtb16 r6, r4 |
| 300 uxtb16 r4, r4, ror #8 |
| 301 uxtb16 r12, r5 |
| 302 uxtb16 r8, r5, ror #8 |
| 303 pld [r1, r2] |
| 304 pkhbt r5, r6, r4, lsl #16 |
| 305 pkhtb r6, r4, r6, asr #16 |
| 306 pkhbt r7, r12, r8, lsl #16 |
| 307 pkhtb r12, r8, r12, asr #16 |
| 308 stm r0!, {r5,r6,r7,r12} |
| 309 bgt 1b |
| 310 |
| 311 pop {r4-r8, pc} |
| 312 .endfunc |
| 313 |
| 314 function ff_diff_pixels_armv6, export=1 |
| 315 pld [r1, r3] |
| 316 pld [r2, r3] |
| 317 push {r4-r9, lr} |
| 318 mov lr, #8 |
| 319 1: |
| 320 ldrd r4, r5, [r1], r3 |
| 321 ldrd r6, r7, [r2], r3 |
| 322 uxtb16 r8, r4 |
| 323 uxtb16 r4, r4, ror #8 |
| 324 uxtb16 r9, r6 |
| 325 uxtb16 r6, r6, ror #8 |
| 326 pld [r1, r3] |
| 327 ssub16 r9, r8, r9 |
| 328 ssub16 r6, r4, r6 |
| 329 uxtb16 r8, r5 |
| 330 uxtb16 r5, r5, ror #8 |
| 331 pld [r2, r3] |
| 332 pkhbt r4, r9, r6, lsl #16 |
| 333 pkhtb r6, r6, r9, asr #16 |
| 334 uxtb16 r9, r7 |
| 335 uxtb16 r7, r7, ror #8 |
| 336 ssub16 r9, r8, r9 |
| 337 ssub16 r5, r5, r7 |
| 338 subs lr, lr, #1 |
| 339 pkhbt r8, r9, r5, lsl #16 |
| 340 pkhtb r9, r5, r9, asr #16 |
| 341 stm r0!, {r4,r6,r8,r9} |
| 342 bgt 1b |
| 343 |
| 344 pop {r4-r9, pc} |
| 345 .endfunc |
| 346 |
| 347 function ff_pix_abs16_armv6, export=1 |
| 348 ldr r0, [sp] |
| 349 push {r4-r9, lr} |
| 350 mov r12, #0 |
| 351 mov lr, #0 |
| 352 ldm r1, {r4-r7} |
| 353 ldr r8, [r2] |
| 354 1: |
| 355 ldr r9, [r2, #4] |
| 356 pld [r1, r3] |
| 357 usada8 r12, r4, r8, r12 |
| 358 ldr r8, [r2, #8] |
| 359 pld [r2, r3] |
| 360 usada8 lr, r5, r9, lr |
| 361 ldr r9, [r2, #12] |
| 362 usada8 r12, r6, r8, r12 |
| 363 subs r0, r0, #1 |
| 364 usada8 lr, r7, r9, lr |
| 365 beq 2f |
| 366 add r1, r1, r3 |
| 367 ldm r1, {r4-r7} |
| 368 add r2, r2, r3 |
| 369 ldr r8, [r2] |
| 370 b 1b |
| 371 2: |
| 372 add r0, r12, lr |
| 373 pop {r4-r9, pc} |
| 374 .endfunc |
| 375 |
| 376 function ff_pix_abs16_x2_armv6, export=1 |
| 377 ldr r12, [sp] |
| 378 push {r4-r11, lr} |
| 379 mov r0, #0 |
| 380 mov lr, #1 |
| 381 orr lr, lr, lr, lsl #8 |
| 382 orr lr, lr, lr, lsl #16 |
| 383 1: |
| 384 ldr r8, [r2] |
| 385 ldr r9, [r2, #4] |
| 386 lsr r10, r8, #8 |
| 387 ldr r4, [r1] |
| 388 lsr r6, r9, #8 |
| 389 orr r10, r10, r9, lsl #24 |
| 390 ldr r5, [r2, #8] |
| 391 eor r11, r8, r10 |
| 392 uhadd8 r7, r8, r10 |
| 393 orr r6, r6, r5, lsl #24 |
| 394 and r11, r11, lr |
| 395 uadd8 r7, r7, r11 |
| 396 ldr r8, [r1, #4] |
| 397 usada8 r0, r4, r7, r0 |
| 398 eor r7, r9, r6 |
| 399 lsr r10, r5, #8 |
| 400 and r7, r7, lr |
| 401 uhadd8 r4, r9, r6 |
| 402 ldr r6, [r2, #12] |
| 403 uadd8 r4, r4, r7 |
| 404 pld [r1, r3] |
| 405 orr r10, r10, r6, lsl #24 |
| 406 usada8 r0, r8, r4, r0 |
| 407 ldr r4, [r1, #8] |
| 408 eor r11, r5, r10 |
| 409 ldrb r7, [r2, #16] |
| 410 and r11, r11, lr |
| 411 uhadd8 r8, r5, r10 |
| 412 ldr r5, [r1, #12] |
| 413 uadd8 r8, r8, r11 |
| 414 pld [r2, r3] |
| 415 lsr r10, r6, #8 |
| 416 usada8 r0, r4, r8, r0 |
| 417 orr r10, r10, r7, lsl #24 |
| 418 subs r12, r12, #1 |
| 419 eor r11, r6, r10 |
| 420 add r1, r1, r3 |
| 421 uhadd8 r9, r6, r10 |
| 422 and r11, r11, lr |
| 423 uadd8 r9, r9, r11 |
| 424 add r2, r2, r3 |
| 425 usada8 r0, r5, r9, r0 |
| 426 bgt 1b |
| 427 |
| 428 pop {r4-r11, pc} |
| 429 .endfunc |
| 430 |
| 431 .macro usad_y2 p0, p1, p2, p3, n0, n1, n2, n3 |
| 432 ldr \n0, [r2] |
| 433 eor \n1, \p0, \n0 |
| 434 uhadd8 \p0, \p0, \n0 |
| 435 and \n1, \n1, lr |
| 436 ldr \n2, [r1] |
| 437 uadd8 \p0, \p0, \n1 |
| 438 ldr \n1, [r2, #4] |
| 439 usada8 r0, \p0, \n2, r0 |
| 440 pld [r1, r3] |
| 441 eor \n3, \p1, \n1 |
| 442 uhadd8 \p1, \p1, \n1 |
| 443 and \n3, \n3, lr |
| 444 ldr \p0, [r1, #4] |
| 445 uadd8 \p1, \p1, \n3 |
| 446 ldr \n2, [r2, #8] |
| 447 usada8 r0, \p1, \p0, r0 |
| 448 pld [r2, r3] |
| 449 eor \p0, \p2, \n2 |
| 450 uhadd8 \p2, \p2, \n2 |
| 451 and \p0, \p0, lr |
| 452 ldr \p1, [r1, #8] |
| 453 uadd8 \p2, \p2, \p0 |
| 454 ldr \n3, [r2, #12] |
| 455 usada8 r0, \p2, \p1, r0 |
| 456 eor \p1, \p3, \n3 |
| 457 uhadd8 \p3, \p3, \n3 |
| 458 and \p1, \p1, lr |
| 459 ldr \p0, [r1, #12] |
| 460 uadd8 \p3, \p3, \p1 |
| 461 add r1, r1, r3 |
| 462 usada8 r0, \p3, \p0, r0 |
| 463 add r2, r2, r3 |
| 464 .endm |
| 465 |
| 466 function ff_pix_abs16_y2_armv6, export=1 |
| 467 pld [r1] |
| 468 pld [r2] |
| 469 ldr r12, [sp] |
| 470 push {r4-r11, lr} |
| 471 mov r0, #0 |
| 472 mov lr, #1 |
| 473 orr lr, lr, lr, lsl #8 |
| 474 orr lr, lr, lr, lsl #16 |
| 475 ldr r4, [r2] |
| 476 ldr r5, [r2, #4] |
| 477 ldr r6, [r2, #8] |
| 478 ldr r7, [r2, #12] |
| 479 add r2, r2, r3 |
| 480 1: |
| 481 usad_y2 r4, r5, r6, r7, r8, r9, r10, r11 |
| 482 subs r12, r12, #2 |
| 483 usad_y2 r8, r9, r10, r11, r4, r5, r6, r7 |
| 484 bgt 1b |
| 485 |
| 486 pop {r4-r11, pc} |
| 487 .endfunc |
| 488 |
| 489 function ff_pix_abs8_armv6, export=1 |
| 490 pld [r2, r3] |
| 491 ldr r12, [sp] |
| 492 push {r4-r9, lr} |
| 493 mov r0, #0 |
| 494 mov lr, #0 |
| 495 ldrd r4, r5, [r1], r3 |
| 496 1: |
| 497 subs r12, r12, #2 |
| 498 ldr r7, [r2, #4] |
| 499 ldr r6, [r2], r3 |
| 500 ldrd r8, r9, [r1], r3 |
| 501 usada8 r0, r4, r6, r0 |
| 502 pld [r2, r3] |
| 503 usada8 lr, r5, r7, lr |
| 504 ldr r7, [r2, #4] |
| 505 ldr r6, [r2], r3 |
| 506 beq 2f |
| 507 ldrd r4, r5, [r1], r3 |
| 508 usada8 r0, r8, r6, r0 |
| 509 pld [r2, r3] |
| 510 usada8 lr, r9, r7, lr |
| 511 b 1b |
| 512 2: |
| 513 usada8 r0, r8, r6, r0 |
| 514 usada8 lr, r9, r7, lr |
| 515 add r0, r0, lr |
| 516 pop {r4-r9, pc} |
| 517 .endfunc |
| 518 |
| 519 function ff_sse16_armv6, export=1 |
| 520 ldr r12, [sp] |
| 521 push {r4-r9, lr} |
| 522 mov r0, #0 |
| 523 1: |
| 524 ldrd r4, r5, [r1] |
| 525 ldr r8, [r2] |
| 526 uxtb16 lr, r4 |
| 527 uxtb16 r4, r4, ror #8 |
| 528 uxtb16 r9, r8 |
| 529 uxtb16 r8, r8, ror #8 |
| 530 ldr r7, [r2, #4] |
| 531 usub16 lr, lr, r9 |
| 532 usub16 r4, r4, r8 |
| 533 smlad r0, lr, lr, r0 |
| 534 uxtb16 r6, r5 |
| 535 uxtb16 lr, r5, ror #8 |
| 536 uxtb16 r8, r7 |
| 537 uxtb16 r9, r7, ror #8 |
| 538 smlad r0, r4, r4, r0 |
| 539 ldrd r4, r5, [r1, #8] |
| 540 usub16 r6, r6, r8 |
| 541 usub16 r8, lr, r9 |
| 542 ldr r7, [r2, #8] |
| 543 smlad r0, r6, r6, r0 |
| 544 uxtb16 lr, r4 |
| 545 uxtb16 r4, r4, ror #8 |
| 546 uxtb16 r9, r7 |
| 547 uxtb16 r7, r7, ror #8 |
| 548 smlad r0, r8, r8, r0 |
| 549 ldr r8, [r2, #12] |
| 550 usub16 lr, lr, r9 |
| 551 usub16 r4, r4, r7 |
| 552 smlad r0, lr, lr, r0 |
| 553 uxtb16 r6, r5 |
| 554 uxtb16 r5, r5, ror #8 |
| 555 uxtb16 r9, r8 |
| 556 uxtb16 r8, r8, ror #8 |
| 557 smlad r0, r4, r4, r0 |
| 558 usub16 r6, r6, r9 |
| 559 usub16 r5, r5, r8 |
| 560 smlad r0, r6, r6, r0 |
| 561 add r1, r1, r3 |
| 562 add r2, r2, r3 |
| 563 subs r12, r12, #1 |
| 564 smlad r0, r5, r5, r0 |
| 565 bgt 1b |
| 566 |
| 567 pop {r4-r9, pc} |
| 568 .endfunc |
| 569 |
| 570 function ff_pix_norm1_armv6, export=1 |
| 571 push {r4-r6, lr} |
| 572 mov r12, #16 |
| 573 mov lr, #0 |
| 574 1: |
| 575 ldm r0, {r2-r5} |
| 576 uxtb16 r6, r2 |
| 577 uxtb16 r2, r2, ror #8 |
| 578 smlad lr, r6, r6, lr |
| 579 uxtb16 r6, r3 |
| 580 smlad lr, r2, r2, lr |
| 581 uxtb16 r3, r3, ror #8 |
| 582 smlad lr, r6, r6, lr |
| 583 uxtb16 r6, r4 |
| 584 smlad lr, r3, r3, lr |
| 585 uxtb16 r4, r4, ror #8 |
| 586 smlad lr, r6, r6, lr |
| 587 uxtb16 r6, r5 |
| 588 smlad lr, r4, r4, lr |
| 589 uxtb16 r5, r5, ror #8 |
| 590 smlad lr, r6, r6, lr |
| 591 subs r12, r12, #1 |
| 592 add r0, r0, r1 |
| 593 smlad lr, r5, r5, lr |
| 594 bgt 1b |
| 595 |
| 596 mov r0, lr |
| 597 pop {r4-r6, pc} |
| 598 .endfunc |
| 599 |
| 600 function ff_pix_sum_armv6, export=1 |
| 601 push {r4-r7, lr} |
| 602 mov r12, #16 |
| 603 mov r2, #0 |
| 604 mov r3, #0 |
| 605 mov lr, #0 |
| 606 ldr r4, [r0] |
| 607 1: |
| 608 subs r12, r12, #1 |
| 609 ldr r5, [r0, #4] |
| 610 usada8 r2, r4, lr, r2 |
| 611 ldr r6, [r0, #8] |
| 612 usada8 r3, r5, lr, r3 |
| 613 ldr r7, [r0, #12] |
| 614 usada8 r2, r6, lr, r2 |
| 615 beq 2f |
| 616 ldr r4, [r0, r1]! |
| 617 usada8 r3, r7, lr, r3 |
| 618 bgt 1b |
| 619 2: |
| 620 usada8 r3, r7, lr, r3 |
| 621 add r0, r2, r3 |
| 622 pop {r4-r7, pc} |
| 623 .endfunc |
OLD | NEW |