| OLD | NEW | 
|---|
| 1 ; | 1 ; | 
| 2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 
| 3 ; | 3 ; | 
| 4 ;  Use of this source code is governed by a BSD-style license | 4 ;  Use of this source code is governed by a BSD-style license | 
| 5 ;  that can be found in the LICENSE file in the root of the source | 5 ;  that can be found in the LICENSE file in the root of the source | 
| 6 ;  tree. An additional intellectual property rights grant can be found | 6 ;  tree. An additional intellectual property rights grant can be found | 
| 7 ;  in the file PATENTS.  All contributing project authors may | 7 ;  in the file PATENTS.  All contributing project authors may | 
| 8 ;  be found in the AUTHORS file in the root of the source tree. | 8 ;  be found in the AUTHORS file in the root of the source tree. | 
| 9 ; | 9 ; | 
| 10 | 10 | 
| 11 | 11 | 
| 12 %include "vpx_ports/x86_abi_support.asm" | 12 %include "vpx_ports/x86_abi_support.asm" | 
| 13 | 13 | 
| 14 ; /**************************************************************************** | 14 ; /**************************************************************************** | 
| 15 ; * Notes: | 15 ; * Notes: | 
| 16 ; * | 16 ; * | 
| 17 ; * This implementation makes use of 16 bit fixed point verio of two multiply | 17 ; * This implementation makes use of 16 bit fixed point version of two multiply | 
| 18 ; * constants: | 18 ; * constants: | 
| 19 ; *        1.   sqrt(2) * cos (pi/8) | 19 ; *        1.   sqrt(2) * cos (pi/8) | 
| 20 ; *         2.   sqrt(2) * sin (pi/8) | 20 ; *        2.   sqrt(2) * sin (pi/8) | 
| 21 ; * Becuase the first constant is bigger than 1, to maintain the same 16 bit | 21 ; * Because the first constant is bigger than 1, to maintain the same 16 bit | 
| 22 ; * fixed point prrcision as the second one, we use a trick of | 22 ; * fixed point precision as the second one, we use a trick of | 
| 23 ; *        x * a = x + x*(a-1) | 23 ; *        x * a = x + x*(a-1) | 
| 24 ; * so | 24 ; * so | 
| 25 ; *        x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1). | 25 ; *        x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1). | 
| 26 ; * | 26 ; * | 
| 27 ; * For     the second constant, becuase of the 16bit version is 35468, which | 27 ; * For the second constant, because of the 16bit version is 35468, which | 
| 28 ; * is bigger than 32768, in signed 16 bit multiply, it become a negative | 28 ; * is bigger than 32768, in signed 16 bit multiply, it becomes a negative | 
| 29 ; * number. | 29 ; * number. | 
| 30 ; *        (x * (unsigned)35468 >> 16) = x * (signed)35468 >> 16 + x | 30 ; *        (x * (unsigned)35468 >> 16) = x * (signed)35468 >> 16 + x | 
| 31 ; * | 31 ; * | 
| 32 ; **************************************************************************/ | 32 ; **************************************************************************/ | 
| 33 | 33 | 
| 34 | 34 | 
| 35 ;void short_idct4x4llm_mmx(short *input, short *output, int pitch) | 35 ;void short_idct4x4llm_mmx(short *input, short *output, int pitch) | 
| 36 global sym(vp8_short_idct4x4llm_mmx) | 36 global sym(vp8_short_idct4x4llm_mmx) | 
| 37 sym(vp8_short_idct4x4llm_mmx): | 37 sym(vp8_short_idct4x4llm_mmx): | 
| 38     push        rbp | 38     push        rbp | 
| (...skipping 244 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 283 SECTION_RODATA | 283 SECTION_RODATA | 
| 284 align 16 | 284 align 16 | 
| 285 x_s1sqr2: | 285 x_s1sqr2: | 
| 286     times 4 dw 0x8A8C | 286     times 4 dw 0x8A8C | 
| 287 align 16 | 287 align 16 | 
| 288 x_c1sqr2less1: | 288 x_c1sqr2less1: | 
| 289     times 4 dw 0x4E7B | 289     times 4 dw 0x4E7B | 
| 290 align 16 | 290 align 16 | 
| 291 fours: | 291 fours: | 
| 292     times 4 dw 0x0004 | 292     times 4 dw 0x0004 | 
| OLD | NEW | 
|---|