OLD | NEW |
1 /* | 1 /* |
2 * SPARC VIS optimized inverse DCT | 2 * SPARC VIS optimized inverse DCT |
3 * Copyright (c) 2007 Denes Balatoni < dbalatoni XatX interware XdotX hu > | 3 * Copyright (c) 2007 Denes Balatoni < dbalatoni XatX interware XdotX hu > |
4 * | 4 * |
5 * I did consult the following fine web page about dct | 5 * I did consult the following fine web page about dct |
6 * http://www.geocities.com/ssavekar/dct.htm | 6 * http://www.geocities.com/ssavekar/dct.htm |
7 * | 7 * |
8 * This file is part of FFmpeg. | 8 * This file is part of FFmpeg. |
9 * | 9 * |
10 * FFmpeg is free software; you can redistribute it and/or | 10 * FFmpeg is free software; you can redistribute it and/or |
11 * modify it under the terms of the GNU Lesser General Public | 11 * modify it under the terms of the GNU Lesser General Public |
12 * License as published by the Free Software Foundation; either | 12 * License as published by the Free Software Foundation; either |
13 * version 2.1 of the License, or (at your option) any later version. | 13 * version 2.1 of the License, or (at your option) any later version. |
14 * | 14 * |
15 * FFmpeg is distributed in the hope that it will be useful, | 15 * FFmpeg is distributed in the hope that it will be useful, |
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
18 * Lesser General Public License for more details. | 18 * Lesser General Public License for more details. |
19 * | 19 * |
20 * You should have received a copy of the GNU Lesser General Public | 20 * You should have received a copy of the GNU Lesser General Public |
21 * License along with FFmpeg; if not, write to the Free Software | 21 * License along with FFmpeg; if not, write to the Free Software |
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
23 */ | 23 */ |
24 | 24 |
25 #include "libavcodec/dsputil.h" | 25 #include "libavcodec/dsputil.h" |
26 | 26 |
27 static const DECLARE_ALIGNED_8(int16_t, coeffs)[28] = { | 27 static const DECLARE_ALIGNED(8, int16_t, coeffs)[28] = { |
28 - 1259,- 1259,- 1259,- 1259, | 28 - 1259,- 1259,- 1259,- 1259, |
29 - 4989,- 4989,- 4989,- 4989, | 29 - 4989,- 4989,- 4989,- 4989, |
30 -11045,-11045,-11045,-11045, | 30 -11045,-11045,-11045,-11045, |
31 -19195,-19195,-19195,-19195, | 31 -19195,-19195,-19195,-19195, |
32 -29126,-29126,-29126,-29126, | 32 -29126,-29126,-29126,-29126, |
33 25080, 25080, 25080, 25080, | 33 25080, 25080, 25080, 25080, |
34 12785, 12785, 12785, 12785 | 34 12785, 12785, 12785, 12785 |
35 }; | 35 }; |
36 static const DECLARE_ALIGNED_8(uint16_t, scale)[4] = { | 36 static const DECLARE_ALIGNED(8, uint16_t, scale)[4] = { |
37 65536>>6, 65536>>6, 65536>>6, 65536>>6 | 37 65536>>6, 65536>>6, 65536>>6, 65536>>6 |
38 }; | 38 }; |
39 static const DECLARE_ALIGNED_8(uint16_t, rounder)[4] = { | 39 static const DECLARE_ALIGNED(8, uint16_t, rounder)[4] = { |
40 1<<5, 1<<5, 1<<5, 1<<5 | 40 1<<5, 1<<5, 1<<5, 1<<5 |
41 }; | 41 }; |
42 static const DECLARE_ALIGNED_8(uint16_t, expand)[4] = { | 42 static const DECLARE_ALIGNED(8, uint16_t, expand)[4] = { |
43 1<<14, 1<<14, 1<<14, 1<<14 | 43 1<<14, 1<<14, 1<<14, 1<<14 |
44 }; | 44 }; |
45 | 45 |
46 #define INIT_IDCT \ | 46 #define INIT_IDCT \ |
47 "ldd [%1], %%f32 \n\t"\ | 47 "ldd [%1], %%f32 \n\t"\ |
48 "ldd [%1+8], %%f34 \n\t"\ | 48 "ldd [%1+8], %%f34 \n\t"\ |
49 "ldd [%1+16], %%f36 \n\t"\ | 49 "ldd [%1+16], %%f36 \n\t"\ |
50 "ldd [%1+24], %%f38 \n\t"\ | 50 "ldd [%1+24], %%f38 \n\t"\ |
51 "ldd [%1+32], %%f40 \n\t"\ | 51 "ldd [%1+32], %%f40 \n\t"\ |
52 "ldd [%1+40], %%f42 \n\t"\ | 52 "ldd [%1+40], %%f42 \n\t"\ |
(...skipping 326 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
379 "st %%f4, [%7+" dest "] \n\t"\ | 379 "st %%f4, [%7+" dest "] \n\t"\ |
380 "st %%f6, [%8+" dest "] \n\t"\ | 380 "st %%f6, [%8+" dest "] \n\t"\ |
381 "st %%f8, [%9+" dest "] \n\t"\ | 381 "st %%f8, [%9+" dest "] \n\t"\ |
382 "st %%f10, [%10+" dest "] \n\t"\ | 382 "st %%f10, [%10+" dest "] \n\t"\ |
383 "st %%f12, [%11+" dest "] \n\t"\ | 383 "st %%f12, [%11+" dest "] \n\t"\ |
384 "st %%f14, [%12+" dest "] \n\t"\ | 384 "st %%f14, [%12+" dest "] \n\t"\ |
385 | 385 |
386 | 386 |
387 void ff_simple_idct_vis(DCTELEM *data) { | 387 void ff_simple_idct_vis(DCTELEM *data) { |
388 int out1, out2, out3, out4; | 388 int out1, out2, out3, out4; |
389 DECLARE_ALIGNED_8(int16_t, temp)[8*8]; | 389 DECLARE_ALIGNED(8, int16_t, temp)[8*8]; |
390 | 390 |
391 __asm__ volatile( | 391 __asm__ volatile( |
392 INIT_IDCT | 392 INIT_IDCT |
393 | 393 |
394 #define ADDROUNDER | 394 #define ADDROUNDER |
395 | 395 |
396 // shift right 16-4=12 | 396 // shift right 16-4=12 |
397 LOADSCALE("%2+8") | 397 LOADSCALE("%2+8") |
398 IDCT4ROWS | 398 IDCT4ROWS |
399 STOREROWS("%3+8") | 399 STOREROWS("%3+8") |
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
519 LOAD("%2+64") | 519 LOAD("%2+64") |
520 TRANSPOSE | 520 TRANSPOSE |
521 IDCT4ROWS | 521 IDCT4ROWS |
522 ADDPIXELSCLAMPED("4") | 522 ADDPIXELSCLAMPED("4") |
523 | 523 |
524 : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r"
(out6), | 524 : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r"
(out6), |
525 "=r" (r1), "=r" (r2), "=r" (r3), "=r" (r4), "=r" (r5), "=r" (r6), "=r"
(r7) | 525 "=r" (r1), "=r" (r2), "=r" (r3), "=r" (r4), "=r" (r5), "=r" (r6), "=r"
(r7) |
526 : "0" (rounder), "1" (coeffs), "2" (data), "3" (dest), "4" (line_size),
"5" (expand) | 526 : "0" (rounder), "1" (coeffs), "2" (data), "3" (dest), "4" (line_size),
"5" (expand) |
527 ); | 527 ); |
528 } | 528 } |
OLD | NEW |