| OLD | NEW | 
|---|
| 1 ; | 1 ; | 
| 2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 
| 3 ; | 3 ; | 
| 4 ;  Use of this source code is governed by a BSD-style license | 4 ;  Use of this source code is governed by a BSD-style license | 
| 5 ;  that can be found in the LICENSE file in the root of the source | 5 ;  that can be found in the LICENSE file in the root of the source | 
| 6 ;  tree. An additional intellectual property rights grant can be found | 6 ;  tree. An additional intellectual property rights grant can be found | 
| 7 ;  in the file PATENTS.  All contributing project authors may | 7 ;  in the file PATENTS.  All contributing project authors may | 
| 8 ;  be found in the AUTHORS file in the root of the source tree. | 8 ;  be found in the AUTHORS file in the root of the source tree. | 
| 9 ; | 9 ; | 
| 10 | 10 | 
| (...skipping 13 matching lines...) Expand all  Loading... | 
| 24 ;************************************************************* | 24 ;************************************************************* | 
| 25 ;static const int cospi8sqrt2minus1=20091; | 25 ;static const int cospi8sqrt2minus1=20091; | 
| 26 ;static const int sinpi8sqrt2      =35468; | 26 ;static const int sinpi8sqrt2      =35468; | 
| 27 ;static const int rounding = 0; | 27 ;static const int rounding = 0; | 
| 28 ;Optimization note: The resulted data from dequantization are signed 13-bit data
      that is | 28 ;Optimization note: The resulted data from dequantization are signed 13-bit data
      that is | 
| 29 ;in the range of [-4096, 4095]. This allows to use "vqdmulh"(neon) instruction s
     ince | 29 ;in the range of [-4096, 4095]. This allows to use "vqdmulh"(neon) instruction s
     ince | 
| 30 ;it won't go out of range (13+16+1=30bits<32bits). This instruction gives the hi
     gh half | 30 ;it won't go out of range (13+16+1=30bits<32bits). This instruction gives the hi
     gh half | 
| 31 ;result of the multiplication that is needed in IDCT. | 31 ;result of the multiplication that is needed in IDCT. | 
| 32 | 32 | 
| 33 |vp8_short_idct4x4llm_neon| PROC | 33 |vp8_short_idct4x4llm_neon| PROC | 
| 34     ldr             r12, _idct_coeff_ | 34     adr             r12, idct_coeff | 
| 35     vld1.16         {q1, q2}, [r0] | 35     vld1.16         {q1, q2}, [r0] | 
| 36     vld1.16         {d0}, [r12] | 36     vld1.16         {d0}, [r12] | 
| 37 | 37 | 
| 38     vswp            d3, d4                  ;q2(vp[4] vp[12]) | 38     vswp            d3, d4                  ;q2(vp[4] vp[12]) | 
| 39 | 39 | 
| 40     vqdmulh.s16     q3, q2, d0[2] | 40     vqdmulh.s16     q3, q2, d0[2] | 
| 41     vqdmulh.s16     q4, q2, d0[0] | 41     vqdmulh.s16     q4, q2, d0[0] | 
| 42 | 42 | 
| 43     vqadd.s16       d12, d2, d3             ;a1 | 43     vqadd.s16       d12, d2, d3             ;a1 | 
| 44     vqsub.s16       d13, d2, d3             ;b1 | 44     vqsub.s16       d13, d2, d3             ;b1 | 
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 107     vst1.16         {d3}, [r3] | 107     vst1.16         {d3}, [r3] | 
| 108     vst1.16         {d4}, [r12] | 108     vst1.16         {d4}, [r12] | 
| 109     vst1.16         {d5}, [r0] | 109     vst1.16         {d5}, [r0] | 
| 110 | 110 | 
| 111     bx             lr | 111     bx             lr | 
| 112 | 112 | 
| 113     ENDP | 113     ENDP | 
| 114 | 114 | 
| 115 ;----------------- | 115 ;----------------- | 
| 116 | 116 | 
| 117 _idct_coeff_ |  | 
| 118     DCD     idct_coeff |  | 
| 119 idct_coeff | 117 idct_coeff | 
| 120     DCD     0x4e7b4e7b, 0x8a8c8a8c | 118     DCD     0x4e7b4e7b, 0x8a8c8a8c | 
| 121 | 119 | 
| 122 ;20091, 20091, 35468, 35468 | 120 ;20091, 20091, 35468, 35468 | 
| 123 | 121 | 
| 124     END | 122     END | 
| OLD | NEW | 
|---|