OLD | NEW |
1 // Copyright 2012 Google Inc. All Rights Reserved. | 1 // Copyright 2012 Google Inc. All Rights Reserved. |
2 // | 2 // |
3 // Use of this source code is governed by a BSD-style license | 3 // Use of this source code is governed by a BSD-style license |
4 // that can be found in the COPYING file in the root of the source | 4 // that can be found in the COPYING file in the root of the source |
5 // tree. An additional intellectual property rights grant can be found | 5 // tree. An additional intellectual property rights grant can be found |
6 // in the file PATENTS. All contributing project authors may | 6 // in the file PATENTS. All contributing project authors may |
7 // be found in the AUTHORS file in the root of the source tree. | 7 // be found in the AUTHORS file in the root of the source tree. |
8 // ----------------------------------------------------------------------------- | 8 // ----------------------------------------------------------------------------- |
9 // | 9 // |
10 // ARM NEON version of dsp functions and loop filtering. | 10 // ARM NEON version of dsp functions and loop filtering. |
11 // | 11 // |
12 // Authors: Somnath Banerjee (somnath@google.com) | 12 // Authors: Somnath Banerjee (somnath@google.com) |
13 // Johann Koenig (johannkoenig@google.com) | 13 // Johann Koenig (johannkoenig@google.com) |
14 | 14 |
15 #include "./dsp.h" | 15 #include "./dsp.h" |
16 | 16 |
17 #if defined(WEBP_USE_NEON) | 17 #if defined(WEBP_USE_NEON) |
18 | 18 |
19 #include "./neon.h" | 19 #include "./neon.h" |
20 #include "../dec/vp8i.h" | 20 #include "../dec/vp8i.h" |
21 | 21 |
22 //------------------------------------------------------------------------------ | 22 //------------------------------------------------------------------------------ |
23 // NxM Loading functions | 23 // NxM Loading functions |
24 | 24 |
25 // Load/Store vertical edge | 25 // Load/Store vertical edge |
26 #define LOAD8x4(c1, c2, c3, c4, b1, b2, stride) \ | 26 #define LOAD8x4(c1, c2, c3, c4, b1, b2, stride) \ |
27 "vld4.8 {" #c1"[0], " #c2"[0], " #c3"[0], " #c4"[0]}," #b1 "," #stride"\n" \ | 27 "vld4.8 {" #c1 "[0]," #c2 "[0]," #c3 "[0]," #c4 "[0]}," #b1 "," #stride "\n" \ |
28 "vld4.8 {" #c1"[1], " #c2"[1], " #c3"[1], " #c4"[1]}," #b2 "," #stride"\n" \ | 28 "vld4.8 {" #c1 "[1]," #c2 "[1]," #c3 "[1]," #c4 "[1]}," #b2 "," #stride "\n" \ |
29 "vld4.8 {" #c1"[2], " #c2"[2], " #c3"[2], " #c4"[2]}," #b1 "," #stride"\n" \ | 29 "vld4.8 {" #c1 "[2]," #c2 "[2]," #c3 "[2]," #c4 "[2]}," #b1 "," #stride "\n" \ |
30 "vld4.8 {" #c1"[3], " #c2"[3], " #c3"[3], " #c4"[3]}," #b2 "," #stride"\n" \ | 30 "vld4.8 {" #c1 "[3]," #c2 "[3]," #c3 "[3]," #c4 "[3]}," #b2 "," #stride "\n" \ |
31 "vld4.8 {" #c1"[4], " #c2"[4], " #c3"[4], " #c4"[4]}," #b1 "," #stride"\n" \ | 31 "vld4.8 {" #c1 "[4]," #c2 "[4]," #c3 "[4]," #c4 "[4]}," #b1 "," #stride "\n" \ |
32 "vld4.8 {" #c1"[5], " #c2"[5], " #c3"[5], " #c4"[5]}," #b2 "," #stride"\n" \ | 32 "vld4.8 {" #c1 "[5]," #c2 "[5]," #c3 "[5]," #c4 "[5]}," #b2 "," #stride "\n" \ |
33 "vld4.8 {" #c1"[6], " #c2"[6], " #c3"[6], " #c4"[6]}," #b1 "," #stride"\n" \ | 33 "vld4.8 {" #c1 "[6]," #c2 "[6]," #c3 "[6]," #c4 "[6]}," #b1 "," #stride "\n" \ |
34 "vld4.8 {" #c1"[7], " #c2"[7], " #c3"[7], " #c4"[7]}," #b2 "," #stride"\n" | 34 "vld4.8 {" #c1 "[7]," #c2 "[7]," #c3 "[7]," #c4 "[7]}," #b2 "," #stride "\n" |
35 | 35 |
36 #define STORE8x2(c1, c2, p, stride) \ | 36 #define STORE8x2(c1, c2, p, stride) \ |
37 "vst2.8 {" #c1"[0], " #c2"[0]}," #p "," #stride " \n" \ | 37 "vst2.8 {" #c1 "[0], " #c2 "[0]}," #p "," #stride " \n" \ |
38 "vst2.8 {" #c1"[1], " #c2"[1]}," #p "," #stride " \n" \ | 38 "vst2.8 {" #c1 "[1], " #c2 "[1]}," #p "," #stride " \n" \ |
39 "vst2.8 {" #c1"[2], " #c2"[2]}," #p "," #stride " \n" \ | 39 "vst2.8 {" #c1 "[2], " #c2 "[2]}," #p "," #stride " \n" \ |
40 "vst2.8 {" #c1"[3], " #c2"[3]}," #p "," #stride " \n" \ | 40 "vst2.8 {" #c1 "[3], " #c2 "[3]}," #p "," #stride " \n" \ |
41 "vst2.8 {" #c1"[4], " #c2"[4]}," #p "," #stride " \n" \ | 41 "vst2.8 {" #c1 "[4], " #c2 "[4]}," #p "," #stride " \n" \ |
42 "vst2.8 {" #c1"[5], " #c2"[5]}," #p "," #stride " \n" \ | 42 "vst2.8 {" #c1 "[5], " #c2 "[5]}," #p "," #stride " \n" \ |
43 "vst2.8 {" #c1"[6], " #c2"[6]}," #p "," #stride " \n" \ | 43 "vst2.8 {" #c1 "[6], " #c2 "[6]}," #p "," #stride " \n" \ |
44 "vst2.8 {" #c1"[7], " #c2"[7]}," #p "," #stride " \n" | 44 "vst2.8 {" #c1 "[7], " #c2 "[7]}," #p "," #stride " \n" |
45 | 45 |
46 #if !defined(WORK_AROUND_GCC) | 46 #if !defined(WORK_AROUND_GCC) |
47 | 47 |
48 // This intrinsics version makes gcc-4.6.3 crash during Load4x??() compilation | 48 // This intrinsics version makes gcc-4.6.3 crash during Load4x??() compilation |
49 // (register alloc, probably). The variants somewhat mitigate the problem, but | 49 // (register alloc, probably). The variants somewhat mitigate the problem, but |
50 // not quite. HFilter16i() remains problematic. | 50 // not quite. HFilter16i() remains problematic. |
51 static WEBP_INLINE uint8x8x4_t Load4x8(const uint8_t* const src, int stride) { | 51 static WEBP_INLINE uint8x8x4_t Load4x8(const uint8_t* const src, int stride) { |
52 const uint8x8_t zero = vdup_n_u8(0); | 52 const uint8x8_t zero = vdup_n_u8(0); |
53 uint8x8x4_t out; | 53 uint8x8x4_t out; |
54 INIT_VECTOR4(out, zero, zero, zero, zero); | 54 INIT_VECTOR4(out, zero, zero, zero, zero); |
(...skipping 1228 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1283 #if !defined(WORK_AROUND_GCC) | 1283 #if !defined(WORK_AROUND_GCC) |
1284 VP8HFilter8 = HFilter8; | 1284 VP8HFilter8 = HFilter8; |
1285 VP8HFilter8i = HFilter8i; | 1285 VP8HFilter8i = HFilter8i; |
1286 #endif | 1286 #endif |
1287 VP8SimpleVFilter16 = SimpleVFilter16; | 1287 VP8SimpleVFilter16 = SimpleVFilter16; |
1288 VP8SimpleHFilter16 = SimpleHFilter16; | 1288 VP8SimpleHFilter16 = SimpleHFilter16; |
1289 VP8SimpleVFilter16i = SimpleVFilter16i; | 1289 VP8SimpleVFilter16i = SimpleVFilter16i; |
1290 VP8SimpleHFilter16i = SimpleHFilter16i; | 1290 VP8SimpleHFilter16i = SimpleHFilter16i; |
1291 #endif // WEBP_USE_NEON | 1291 #endif // WEBP_USE_NEON |
1292 } | 1292 } |
OLD | NEW |