OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include <assert.h> // for alignment checks | 11 #include <assert.h> // for alignment checks |
12 #include <emmintrin.h> // SSE2 | 12 #include <emmintrin.h> // SSE2 |
13 #include "vp9/common/vp9_filter.h" | 13 #include "vp9/common/vp9_filter.h" |
| 14 #include "vpx_ports/emmintrin_compat.h" |
14 #include "vpx_ports/mem.h" // for DECLARE_ALIGNED | 15 #include "vpx_ports/mem.h" // for DECLARE_ALIGNED |
15 #include "vp9_rtcd.h" | 16 #include "vp9_rtcd.h" |
16 | 17 |
17 // TODO(cd): After cleanup, commit faster versions for non 4x4 size. This is | 18 // TODO(cd): After cleanup, commit faster versions for non 4x4 size. This is |
18 // just a quick partial snapshot so that other can already use some | 19 // just a quick partial snapshot so that other can already use some |
19 // speedup. | 20 // speedup. |
20 // TODO(cd): Use vectorized 8 tap filtering code as speedup to pure C 6 tap | 21 // TODO(cd): Use vectorized 8 tap filtering code as speedup to pure C 6 tap |
21 // filtering. | 22 // filtering. |
22 // TODO(cd): Add some comments, better variable naming. | 23 // TODO(cd): Add some comments, better variable naming. |
23 // TODO(cd): Maybe use _mm_maddubs_epi16 if smaller filter coeficients (no sum | 24 // TODO(cd): Maybe use _mm_maddubs_epi16 if smaller filter coeficients (no sum |
(...skipping 256 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
280 ) { | 281 ) { |
281 int i, j; | 282 int i, j; |
282 for (i=0; i<16; i+=4) { | 283 for (i=0; i<16; i+=4) { |
283 for (j=0; j<16; j+=4) { | 284 for (j=0; j<16; j+=4) { |
284 vp9_filter_block2d_4x4_8_sse2(src_ptr + j + i*src_stride, src_stride, | 285 vp9_filter_block2d_4x4_8_sse2(src_ptr + j + i*src_stride, src_stride, |
285 HFilter_aligned16, VFilter_aligned16, | 286 HFilter_aligned16, VFilter_aligned16, |
286 dst_ptr + j + i*dst_stride, dst_stride); | 287 dst_ptr + j + i*dst_stride, dst_stride); |
287 } | 288 } |
288 } | 289 } |
289 } | 290 } |
OLD | NEW |