| OLD | NEW |
| (Empty) |
| 1 ; | |
| 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | |
| 3 ; | |
| 4 ; Use of this source code is governed by a BSD-style license | |
| 5 ; that can be found in the LICENSE file in the root of the source | |
| 6 ; tree. An additional intellectual property rights grant can be found | |
| 7 ; in the file PATENTS. All contributing project authors may | |
| 8 ; be found in the AUTHORS file in the root of the source tree. | |
| 9 ; | |
| 10 | |
| 11 | |
| 12 EXPORT |vp8_build_intra_predictors_mby_neon_func| | |
| 13 EXPORT |vp8_build_intra_predictors_mby_s_neon_func| | |
| 14 | |
| 15 ARM | |
| 16 REQUIRE8 | |
| 17 PRESERVE8 | |
| 18 | |
| 19 AREA ||.text||, CODE, READONLY, ALIGN=2 | |
| 20 ; r0 unsigned char *y_buffer | |
| 21 ; r1 unsigned char *ypred_ptr | |
| 22 ; r2 int y_stride | |
| 23 ; r3 int mode | |
| 24 ; stack int Up | |
| 25 ; stack int Left | |
| 26 | |
| 27 |vp8_build_intra_predictors_mby_neon_func| PROC | |
| 28 push {r4-r8, lr} | |
| 29 vpush {d8-d15} | |
| 30 | |
| 31 cmp r3, #0 | |
| 32 beq case_dc_pred | |
| 33 cmp r3, #1 | |
| 34 beq case_v_pred | |
| 35 cmp r3, #2 | |
| 36 beq case_h_pred | |
| 37 cmp r3, #3 | |
| 38 beq case_tm_pred | |
| 39 | |
| 40 case_dc_pred | |
| 41 ldr r4, [sp, #88] ; Up | |
| 42 ldr r5, [sp, #92] ; Left | |
| 43 | |
| 44 ; Default the DC average to 128 | |
| 45 mov r12, #128 | |
| 46 vdup.u8 q0, r12 | |
| 47 | |
| 48 ; Zero out running sum | |
| 49 mov r12, #0 | |
| 50 | |
| 51 ; compute shift and jump | |
| 52 adds r7, r4, r5 | |
| 53 beq skip_dc_pred_up_left | |
| 54 | |
| 55 ; Load above row, if it exists | |
| 56 cmp r4, #0 | |
| 57 beq skip_dc_pred_up | |
| 58 | |
| 59 sub r6, r0, r2 | |
| 60 vld1.8 {q1}, [r6] | |
| 61 vpaddl.u8 q2, q1 | |
| 62 vpaddl.u16 q3, q2 | |
| 63 vpaddl.u32 q4, q3 | |
| 64 | |
| 65 vmov.32 r4, d8[0] | |
| 66 vmov.32 r6, d9[0] | |
| 67 | |
| 68 add r12, r4, r6 | |
| 69 | |
| 70 ; Move back to interger registers | |
| 71 | |
| 72 skip_dc_pred_up | |
| 73 | |
| 74 cmp r5, #0 | |
| 75 beq skip_dc_pred_left | |
| 76 | |
| 77 sub r0, r0, #1 | |
| 78 | |
| 79 ; Load left row, if it exists | |
| 80 ldrb r3, [r0], r2 | |
| 81 ldrb r4, [r0], r2 | |
| 82 ldrb r5, [r0], r2 | |
| 83 ldrb r6, [r0], r2 | |
| 84 | |
| 85 add r12, r12, r3 | |
| 86 add r12, r12, r4 | |
| 87 add r12, r12, r5 | |
| 88 add r12, r12, r6 | |
| 89 | |
| 90 ldrb r3, [r0], r2 | |
| 91 ldrb r4, [r0], r2 | |
| 92 ldrb r5, [r0], r2 | |
| 93 ldrb r6, [r0], r2 | |
| 94 | |
| 95 add r12, r12, r3 | |
| 96 add r12, r12, r4 | |
| 97 add r12, r12, r5 | |
| 98 add r12, r12, r6 | |
| 99 | |
| 100 ldrb r3, [r0], r2 | |
| 101 ldrb r4, [r0], r2 | |
| 102 ldrb r5, [r0], r2 | |
| 103 ldrb r6, [r0], r2 | |
| 104 | |
| 105 add r12, r12, r3 | |
| 106 add r12, r12, r4 | |
| 107 add r12, r12, r5 | |
| 108 add r12, r12, r6 | |
| 109 | |
| 110 ldrb r3, [r0], r2 | |
| 111 ldrb r4, [r0], r2 | |
| 112 ldrb r5, [r0], r2 | |
| 113 ldrb r6, [r0] | |
| 114 | |
| 115 add r12, r12, r3 | |
| 116 add r12, r12, r4 | |
| 117 add r12, r12, r5 | |
| 118 add r12, r12, r6 | |
| 119 | |
| 120 skip_dc_pred_left | |
| 121 add r7, r7, #3 ; Shift | |
| 122 sub r4, r7, #1 | |
| 123 mov r5, #1 | |
| 124 add r12, r12, r5, lsl r4 | |
| 125 mov r5, r12, lsr r7 ; expected_dc | |
| 126 | |
| 127 vdup.u8 q0, r5 | |
| 128 | |
| 129 skip_dc_pred_up_left | |
| 130 vst1.u8 {q0}, [r1]! | |
| 131 vst1.u8 {q0}, [r1]! | |
| 132 vst1.u8 {q0}, [r1]! | |
| 133 vst1.u8 {q0}, [r1]! | |
| 134 vst1.u8 {q0}, [r1]! | |
| 135 vst1.u8 {q0}, [r1]! | |
| 136 vst1.u8 {q0}, [r1]! | |
| 137 vst1.u8 {q0}, [r1]! | |
| 138 vst1.u8 {q0}, [r1]! | |
| 139 vst1.u8 {q0}, [r1]! | |
| 140 vst1.u8 {q0}, [r1]! | |
| 141 vst1.u8 {q0}, [r1]! | |
| 142 vst1.u8 {q0}, [r1]! | |
| 143 vst1.u8 {q0}, [r1]! | |
| 144 vst1.u8 {q0}, [r1]! | |
| 145 vst1.u8 {q0}, [r1]! | |
| 146 | |
| 147 vpop {d8-d15} | |
| 148 pop {r4-r8,pc} | |
| 149 case_v_pred | |
| 150 ; Copy down above row | |
| 151 sub r6, r0, r2 | |
| 152 vld1.8 {q0}, [r6] | |
| 153 | |
| 154 vst1.u8 {q0}, [r1]! | |
| 155 vst1.u8 {q0}, [r1]! | |
| 156 vst1.u8 {q0}, [r1]! | |
| 157 vst1.u8 {q0}, [r1]! | |
| 158 vst1.u8 {q0}, [r1]! | |
| 159 vst1.u8 {q0}, [r1]! | |
| 160 vst1.u8 {q0}, [r1]! | |
| 161 vst1.u8 {q0}, [r1]! | |
| 162 vst1.u8 {q0}, [r1]! | |
| 163 vst1.u8 {q0}, [r1]! | |
| 164 vst1.u8 {q0}, [r1]! | |
| 165 vst1.u8 {q0}, [r1]! | |
| 166 vst1.u8 {q0}, [r1]! | |
| 167 vst1.u8 {q0}, [r1]! | |
| 168 vst1.u8 {q0}, [r1]! | |
| 169 vst1.u8 {q0}, [r1]! | |
| 170 vpop {d8-d15} | |
| 171 pop {r4-r8,pc} | |
| 172 | |
| 173 case_h_pred | |
| 174 ; Load 4x yleft_col | |
| 175 sub r0, r0, #1 | |
| 176 | |
| 177 ldrb r3, [r0], r2 | |
| 178 ldrb r4, [r0], r2 | |
| 179 ldrb r5, [r0], r2 | |
| 180 ldrb r6, [r0], r2 | |
| 181 vdup.u8 q0, r3 | |
| 182 vdup.u8 q1, r4 | |
| 183 vdup.u8 q2, r5 | |
| 184 vdup.u8 q3, r6 | |
| 185 vst1.u8 {q0}, [r1]! | |
| 186 vst1.u8 {q1}, [r1]! | |
| 187 vst1.u8 {q2}, [r1]! | |
| 188 vst1.u8 {q3}, [r1]! | |
| 189 | |
| 190 ldrb r3, [r0], r2 | |
| 191 ldrb r4, [r0], r2 | |
| 192 ldrb r5, [r0], r2 | |
| 193 ldrb r6, [r0], r2 | |
| 194 vdup.u8 q0, r3 | |
| 195 vdup.u8 q1, r4 | |
| 196 vdup.u8 q2, r5 | |
| 197 vdup.u8 q3, r6 | |
| 198 vst1.u8 {q0}, [r1]! | |
| 199 vst1.u8 {q1}, [r1]! | |
| 200 vst1.u8 {q2}, [r1]! | |
| 201 vst1.u8 {q3}, [r1]! | |
| 202 | |
| 203 | |
| 204 ldrb r3, [r0], r2 | |
| 205 ldrb r4, [r0], r2 | |
| 206 ldrb r5, [r0], r2 | |
| 207 ldrb r6, [r0], r2 | |
| 208 vdup.u8 q0, r3 | |
| 209 vdup.u8 q1, r4 | |
| 210 vdup.u8 q2, r5 | |
| 211 vdup.u8 q3, r6 | |
| 212 vst1.u8 {q0}, [r1]! | |
| 213 vst1.u8 {q1}, [r1]! | |
| 214 vst1.u8 {q2}, [r1]! | |
| 215 vst1.u8 {q3}, [r1]! | |
| 216 | |
| 217 ldrb r3, [r0], r2 | |
| 218 ldrb r4, [r0], r2 | |
| 219 ldrb r5, [r0], r2 | |
| 220 ldrb r6, [r0], r2 | |
| 221 vdup.u8 q0, r3 | |
| 222 vdup.u8 q1, r4 | |
| 223 vdup.u8 q2, r5 | |
| 224 vdup.u8 q3, r6 | |
| 225 vst1.u8 {q0}, [r1]! | |
| 226 vst1.u8 {q1}, [r1]! | |
| 227 vst1.u8 {q2}, [r1]! | |
| 228 vst1.u8 {q3}, [r1]! | |
| 229 | |
| 230 vpop {d8-d15} | |
| 231 pop {r4-r8,pc} | |
| 232 | |
| 233 case_tm_pred | |
| 234 ; Load yabove_row | |
| 235 sub r3, r0, r2 | |
| 236 vld1.8 {q8}, [r3] | |
| 237 | |
| 238 ; Load ytop_left | |
| 239 sub r3, r3, #1 | |
| 240 ldrb r7, [r3] | |
| 241 | |
| 242 vdup.u16 q7, r7 | |
| 243 | |
| 244 ; Compute yabove_row - ytop_left | |
| 245 mov r3, #1 | |
| 246 vdup.u8 q0, r3 | |
| 247 | |
| 248 vmull.u8 q4, d16, d0 | |
| 249 vmull.u8 q5, d17, d0 | |
| 250 | |
| 251 vsub.s16 q4, q4, q7 | |
| 252 vsub.s16 q5, q5, q7 | |
| 253 | |
| 254 ; Load 4x yleft_col | |
| 255 sub r0, r0, #1 | |
| 256 mov r12, #4 | |
| 257 | |
| 258 case_tm_pred_loop | |
| 259 ldrb r3, [r0], r2 | |
| 260 ldrb r4, [r0], r2 | |
| 261 ldrb r5, [r0], r2 | |
| 262 ldrb r6, [r0], r2 | |
| 263 vdup.u16 q0, r3 | |
| 264 vdup.u16 q1, r4 | |
| 265 vdup.u16 q2, r5 | |
| 266 vdup.u16 q3, r6 | |
| 267 | |
| 268 vqadd.s16 q8, q0, q4 | |
| 269 vqadd.s16 q9, q0, q5 | |
| 270 | |
| 271 vqadd.s16 q10, q1, q4 | |
| 272 vqadd.s16 q11, q1, q5 | |
| 273 | |
| 274 vqadd.s16 q12, q2, q4 | |
| 275 vqadd.s16 q13, q2, q5 | |
| 276 | |
| 277 vqadd.s16 q14, q3, q4 | |
| 278 vqadd.s16 q15, q3, q5 | |
| 279 | |
| 280 vqshrun.s16 d0, q8, #0 | |
| 281 vqshrun.s16 d1, q9, #0 | |
| 282 | |
| 283 vqshrun.s16 d2, q10, #0 | |
| 284 vqshrun.s16 d3, q11, #0 | |
| 285 | |
| 286 vqshrun.s16 d4, q12, #0 | |
| 287 vqshrun.s16 d5, q13, #0 | |
| 288 | |
| 289 vqshrun.s16 d6, q14, #0 | |
| 290 vqshrun.s16 d7, q15, #0 | |
| 291 | |
| 292 vst1.u8 {q0}, [r1]! | |
| 293 vst1.u8 {q1}, [r1]! | |
| 294 vst1.u8 {q2}, [r1]! | |
| 295 vst1.u8 {q3}, [r1]! | |
| 296 | |
| 297 subs r12, r12, #1 | |
| 298 bne case_tm_pred_loop | |
| 299 | |
| 300 vpop {d8-d15} | |
| 301 pop {r4-r8,pc} | |
| 302 | |
| 303 ENDP | |
| 304 | |
| 305 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
| 306 ; r0 unsigned char *y_buffer | |
| 307 ; r1 unsigned char *ypred_ptr | |
| 308 ; r2 int y_stride | |
| 309 ; r3 int mode | |
| 310 ; stack int Up | |
| 311 ; stack int Left | |
| 312 | |
| 313 |vp8_build_intra_predictors_mby_s_neon_func| PROC | |
| 314 push {r4-r8, lr} | |
| 315 vpush {d8-d15} | |
| 316 | |
| 317 mov r1, r0 ; unsigned char *ypred_ptr = x->dst.y_buffer;
//x->Predictor; | |
| 318 | |
| 319 cmp r3, #0 | |
| 320 beq case_dc_pred_s | |
| 321 cmp r3, #1 | |
| 322 beq case_v_pred_s | |
| 323 cmp r3, #2 | |
| 324 beq case_h_pred_s | |
| 325 cmp r3, #3 | |
| 326 beq case_tm_pred_s | |
| 327 | |
| 328 case_dc_pred_s | |
| 329 ldr r4, [sp, #88] ; Up | |
| 330 ldr r5, [sp, #92] ; Left | |
| 331 | |
| 332 ; Default the DC average to 128 | |
| 333 mov r12, #128 | |
| 334 vdup.u8 q0, r12 | |
| 335 | |
| 336 ; Zero out running sum | |
| 337 mov r12, #0 | |
| 338 | |
| 339 ; compute shift and jump | |
| 340 adds r7, r4, r5 | |
| 341 beq skip_dc_pred_up_left_s | |
| 342 | |
| 343 ; Load above row, if it exists | |
| 344 cmp r4, #0 | |
| 345 beq skip_dc_pred_up_s | |
| 346 | |
| 347 sub r6, r0, r2 | |
| 348 vld1.8 {q1}, [r6] | |
| 349 vpaddl.u8 q2, q1 | |
| 350 vpaddl.u16 q3, q2 | |
| 351 vpaddl.u32 q4, q3 | |
| 352 | |
| 353 vmov.32 r4, d8[0] | |
| 354 vmov.32 r6, d9[0] | |
| 355 | |
| 356 add r12, r4, r6 | |
| 357 | |
| 358 ; Move back to interger registers | |
| 359 | |
| 360 skip_dc_pred_up_s | |
| 361 | |
| 362 cmp r5, #0 | |
| 363 beq skip_dc_pred_left_s | |
| 364 | |
| 365 sub r0, r0, #1 | |
| 366 | |
| 367 ; Load left row, if it exists | |
| 368 ldrb r3, [r0], r2 | |
| 369 ldrb r4, [r0], r2 | |
| 370 ldrb r5, [r0], r2 | |
| 371 ldrb r6, [r0], r2 | |
| 372 | |
| 373 add r12, r12, r3 | |
| 374 add r12, r12, r4 | |
| 375 add r12, r12, r5 | |
| 376 add r12, r12, r6 | |
| 377 | |
| 378 ldrb r3, [r0], r2 | |
| 379 ldrb r4, [r0], r2 | |
| 380 ldrb r5, [r0], r2 | |
| 381 ldrb r6, [r0], r2 | |
| 382 | |
| 383 add r12, r12, r3 | |
| 384 add r12, r12, r4 | |
| 385 add r12, r12, r5 | |
| 386 add r12, r12, r6 | |
| 387 | |
| 388 ldrb r3, [r0], r2 | |
| 389 ldrb r4, [r0], r2 | |
| 390 ldrb r5, [r0], r2 | |
| 391 ldrb r6, [r0], r2 | |
| 392 | |
| 393 add r12, r12, r3 | |
| 394 add r12, r12, r4 | |
| 395 add r12, r12, r5 | |
| 396 add r12, r12, r6 | |
| 397 | |
| 398 ldrb r3, [r0], r2 | |
| 399 ldrb r4, [r0], r2 | |
| 400 ldrb r5, [r0], r2 | |
| 401 ldrb r6, [r0] | |
| 402 | |
| 403 add r12, r12, r3 | |
| 404 add r12, r12, r4 | |
| 405 add r12, r12, r5 | |
| 406 add r12, r12, r6 | |
| 407 | |
| 408 skip_dc_pred_left_s | |
| 409 add r7, r7, #3 ; Shift | |
| 410 sub r4, r7, #1 | |
| 411 mov r5, #1 | |
| 412 add r12, r12, r5, lsl r4 | |
| 413 mov r5, r12, lsr r7 ; expected_dc | |
| 414 | |
| 415 vdup.u8 q0, r5 | |
| 416 | |
| 417 skip_dc_pred_up_left_s | |
| 418 vst1.u8 {q0}, [r1], r2 | |
| 419 vst1.u8 {q0}, [r1], r2 | |
| 420 vst1.u8 {q0}, [r1], r2 | |
| 421 vst1.u8 {q0}, [r1], r2 | |
| 422 vst1.u8 {q0}, [r1], r2 | |
| 423 vst1.u8 {q0}, [r1], r2 | |
| 424 vst1.u8 {q0}, [r1], r2 | |
| 425 vst1.u8 {q0}, [r1], r2 | |
| 426 vst1.u8 {q0}, [r1], r2 | |
| 427 vst1.u8 {q0}, [r1], r2 | |
| 428 vst1.u8 {q0}, [r1], r2 | |
| 429 vst1.u8 {q0}, [r1], r2 | |
| 430 vst1.u8 {q0}, [r1], r2 | |
| 431 vst1.u8 {q0}, [r1], r2 | |
| 432 vst1.u8 {q0}, [r1], r2 | |
| 433 vst1.u8 {q0}, [r1], r2 | |
| 434 | |
| 435 vpop {d8-d15} | |
| 436 pop {r4-r8,pc} | |
| 437 case_v_pred_s | |
| 438 ; Copy down above row | |
| 439 sub r6, r0, r2 | |
| 440 vld1.8 {q0}, [r6] | |
| 441 | |
| 442 vst1.u8 {q0}, [r1], r2 | |
| 443 vst1.u8 {q0}, [r1], r2 | |
| 444 vst1.u8 {q0}, [r1], r2 | |
| 445 vst1.u8 {q0}, [r1], r2 | |
| 446 vst1.u8 {q0}, [r1], r2 | |
| 447 vst1.u8 {q0}, [r1], r2 | |
| 448 vst1.u8 {q0}, [r1], r2 | |
| 449 vst1.u8 {q0}, [r1], r2 | |
| 450 vst1.u8 {q0}, [r1], r2 | |
| 451 vst1.u8 {q0}, [r1], r2 | |
| 452 vst1.u8 {q0}, [r1], r2 | |
| 453 vst1.u8 {q0}, [r1], r2 | |
| 454 vst1.u8 {q0}, [r1], r2 | |
| 455 vst1.u8 {q0}, [r1], r2 | |
| 456 vst1.u8 {q0}, [r1], r2 | |
| 457 vst1.u8 {q0}, [r1], r2 | |
| 458 | |
| 459 vpop {d8-d15} | |
| 460 pop {r4-r8,pc} | |
| 461 | |
| 462 case_h_pred_s | |
| 463 ; Load 4x yleft_col | |
| 464 sub r0, r0, #1 | |
| 465 | |
| 466 ldrb r3, [r0], r2 | |
| 467 ldrb r4, [r0], r2 | |
| 468 ldrb r5, [r0], r2 | |
| 469 ldrb r6, [r0], r2 | |
| 470 vdup.u8 q0, r3 | |
| 471 vdup.u8 q1, r4 | |
| 472 vdup.u8 q2, r5 | |
| 473 vdup.u8 q3, r6 | |
| 474 vst1.u8 {q0}, [r1], r2 | |
| 475 vst1.u8 {q1}, [r1], r2 | |
| 476 vst1.u8 {q2}, [r1], r2 | |
| 477 vst1.u8 {q3}, [r1], r2 | |
| 478 | |
| 479 ldrb r3, [r0], r2 | |
| 480 ldrb r4, [r0], r2 | |
| 481 ldrb r5, [r0], r2 | |
| 482 ldrb r6, [r0], r2 | |
| 483 vdup.u8 q0, r3 | |
| 484 vdup.u8 q1, r4 | |
| 485 vdup.u8 q2, r5 | |
| 486 vdup.u8 q3, r6 | |
| 487 vst1.u8 {q0}, [r1], r2 | |
| 488 vst1.u8 {q1}, [r1], r2 | |
| 489 vst1.u8 {q2}, [r1], r2 | |
| 490 vst1.u8 {q3}, [r1], r2 | |
| 491 | |
| 492 | |
| 493 ldrb r3, [r0], r2 | |
| 494 ldrb r4, [r0], r2 | |
| 495 ldrb r5, [r0], r2 | |
| 496 ldrb r6, [r0], r2 | |
| 497 vdup.u8 q0, r3 | |
| 498 vdup.u8 q1, r4 | |
| 499 vdup.u8 q2, r5 | |
| 500 vdup.u8 q3, r6 | |
| 501 vst1.u8 {q0}, [r1], r2 | |
| 502 vst1.u8 {q1}, [r1], r2 | |
| 503 vst1.u8 {q2}, [r1], r2 | |
| 504 vst1.u8 {q3}, [r1], r2 | |
| 505 | |
| 506 ldrb r3, [r0], r2 | |
| 507 ldrb r4, [r0], r2 | |
| 508 ldrb r5, [r0], r2 | |
| 509 ldrb r6, [r0], r2 | |
| 510 vdup.u8 q0, r3 | |
| 511 vdup.u8 q1, r4 | |
| 512 vdup.u8 q2, r5 | |
| 513 vdup.u8 q3, r6 | |
| 514 vst1.u8 {q0}, [r1], r2 | |
| 515 vst1.u8 {q1}, [r1], r2 | |
| 516 vst1.u8 {q2}, [r1], r2 | |
| 517 vst1.u8 {q3}, [r1], r2 | |
| 518 | |
| 519 vpop {d8-d15} | |
| 520 pop {r4-r8,pc} | |
| 521 | |
| 522 case_tm_pred_s | |
| 523 ; Load yabove_row | |
| 524 sub r3, r0, r2 | |
| 525 vld1.8 {q8}, [r3] | |
| 526 | |
| 527 ; Load ytop_left | |
| 528 sub r3, r3, #1 | |
| 529 ldrb r7, [r3] | |
| 530 | |
| 531 vdup.u16 q7, r7 | |
| 532 | |
| 533 ; Compute yabove_row - ytop_left | |
| 534 mov r3, #1 | |
| 535 vdup.u8 q0, r3 | |
| 536 | |
| 537 vmull.u8 q4, d16, d0 | |
| 538 vmull.u8 q5, d17, d0 | |
| 539 | |
| 540 vsub.s16 q4, q4, q7 | |
| 541 vsub.s16 q5, q5, q7 | |
| 542 | |
| 543 ; Load 4x yleft_col | |
| 544 sub r0, r0, #1 | |
| 545 mov r12, #4 | |
| 546 | |
| 547 case_tm_pred_loop_s | |
| 548 ldrb r3, [r0], r2 | |
| 549 ldrb r4, [r0], r2 | |
| 550 ldrb r5, [r0], r2 | |
| 551 ldrb r6, [r0], r2 | |
| 552 vdup.u16 q0, r3 | |
| 553 vdup.u16 q1, r4 | |
| 554 vdup.u16 q2, r5 | |
| 555 vdup.u16 q3, r6 | |
| 556 | |
| 557 vqadd.s16 q8, q0, q4 | |
| 558 vqadd.s16 q9, q0, q5 | |
| 559 | |
| 560 vqadd.s16 q10, q1, q4 | |
| 561 vqadd.s16 q11, q1, q5 | |
| 562 | |
| 563 vqadd.s16 q12, q2, q4 | |
| 564 vqadd.s16 q13, q2, q5 | |
| 565 | |
| 566 vqadd.s16 q14, q3, q4 | |
| 567 vqadd.s16 q15, q3, q5 | |
| 568 | |
| 569 vqshrun.s16 d0, q8, #0 | |
| 570 vqshrun.s16 d1, q9, #0 | |
| 571 | |
| 572 vqshrun.s16 d2, q10, #0 | |
| 573 vqshrun.s16 d3, q11, #0 | |
| 574 | |
| 575 vqshrun.s16 d4, q12, #0 | |
| 576 vqshrun.s16 d5, q13, #0 | |
| 577 | |
| 578 vqshrun.s16 d6, q14, #0 | |
| 579 vqshrun.s16 d7, q15, #0 | |
| 580 | |
| 581 vst1.u8 {q0}, [r1], r2 | |
| 582 vst1.u8 {q1}, [r1], r2 | |
| 583 vst1.u8 {q2}, [r1], r2 | |
| 584 vst1.u8 {q3}, [r1], r2 | |
| 585 | |
| 586 subs r12, r12, #1 | |
| 587 bne case_tm_pred_loop_s | |
| 588 | |
| 589 vpop {d8-d15} | |
| 590 pop {r4-r8,pc} | |
| 591 | |
| 592 ENDP | |
| 593 | |
| 594 | |
| 595 END | |
| OLD | NEW |