| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 ; | 3 ; |
| 4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
| 5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
| 6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
| 7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
| 8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
| 9 ; | 9 ; |
| 10 | 10 |
| 11 | 11 |
| 12 %include "vpx_ports/x86_abi_support.asm" | 12 %include "vpx_ports/x86_abi_support.asm" |
| 13 | 13 |
| 14 ; /**************************************************************************** | 14 ; /**************************************************************************** |
| 15 ; * Notes: | 15 ; * Notes: |
| 16 ; * | 16 ; * |
| 17 ; * This implementation makes use of 16 bit fixed point verio of two multiply | 17 ; * This implementation makes use of 16 bit fixed point version of two multiply |
| 18 ; * constants: | 18 ; * constants: |
| 19 ; * 1. sqrt(2) * cos (pi/8) | 19 ; * 1. sqrt(2) * cos (pi/8) |
| 20 ; * 2. sqrt(2) * sin (pi/8) | 20 ; * 2. sqrt(2) * sin (pi/8) |
| 21 ; * Becuase the first constant is bigger than 1, to maintain the same 16 bit | 21 ; * Because the first constant is bigger than 1, to maintain the same 16 bit |
| 22 ; * fixed point prrcision as the second one, we use a trick of | 22 ; * fixed point precision as the second one, we use a trick of |
| 23 ; * x * a = x + x*(a-1) | 23 ; * x * a = x + x*(a-1) |
| 24 ; * so | 24 ; * so |
| 25 ; * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1). | 25 ; * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1). |
| 26 ; * | 26 ; * |
| 27 ; * For the second constant, becuase of the 16bit version is 35468, which | 27 ; * For the second constant, because of the 16bit version is 35468, which |
| 28 ; * is bigger than 32768, in signed 16 bit multiply, it become a negative | 28 ; * is bigger than 32768, in signed 16 bit multiply, it becomes a negative |
| 29 ; * number. | 29 ; * number. |
| 30 ; * (x * (unsigned)35468 >> 16) = x * (signed)35468 >> 16 + x | 30 ; * (x * (unsigned)35468 >> 16) = x * (signed)35468 >> 16 + x |
| 31 ; * | 31 ; * |
| 32 ; **************************************************************************/ | 32 ; **************************************************************************/ |
| 33 | 33 |
| 34 | 34 |
| 35 ;void short_idct4x4llm_mmx(short *input, short *output, int pitch) | 35 ;void short_idct4x4llm_mmx(short *input, short *output, int pitch) |
| 36 global sym(vp8_short_idct4x4llm_mmx) | 36 global sym(vp8_short_idct4x4llm_mmx) |
| 37 sym(vp8_short_idct4x4llm_mmx): | 37 sym(vp8_short_idct4x4llm_mmx): |
| 38 push rbp | 38 push rbp |
| (...skipping 244 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 283 SECTION_RODATA | 283 SECTION_RODATA |
| 284 align 16 | 284 align 16 |
| 285 x_s1sqr2: | 285 x_s1sqr2: |
| 286 times 4 dw 0x8A8C | 286 times 4 dw 0x8A8C |
| 287 align 16 | 287 align 16 |
| 288 x_c1sqr2less1: | 288 x_c1sqr2less1: |
| 289 times 4 dw 0x4E7B | 289 times 4 dw 0x4E7B |
| 290 align 16 | 290 align 16 |
| 291 fours: | 291 fours: |
| 292 times 4 dw 0x0004 | 292 times 4 dw 0x0004 |
| OLD | NEW |