| OLD | NEW | 
|    1 /* |    1 /* | 
|    2  * jidctfst.c |    2  * jidctfst.c | 
|    3  * |    3  * | 
 |    4  * This file was part of the Independent JPEG Group's software: | 
|    4  * Copyright (C) 1994-1998, Thomas G. Lane. |    5  * Copyright (C) 1994-1998, Thomas G. Lane. | 
|    5  * This file is part of the Independent JPEG Group's software. |    6  * libjpeg-turbo Modifications: | 
|    6  * For conditions of distribution and use, see the accompanying README file. |    7  * Copyright (C) 2015, D. R. Commander. | 
 |    8  * For conditions of distribution and use, see the accompanying README.ijg | 
 |    9  * file. | 
|    7  * |   10  * | 
|    8  * This file contains a fast, not so accurate integer implementation of the |   11  * This file contains a fast, not so accurate integer implementation of the | 
|    9  * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine |   12  * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine | 
|   10  * must also perform dequantization of the input coefficients. |   13  * must also perform dequantization of the input coefficients. | 
|   11  * |   14  * | 
|   12  * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT |   15  * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT | 
|   13  * on each row (or vice versa, but it's more convenient to emit a row at |   16  * on each row (or vice versa, but it's more convenient to emit a row at | 
|   14  * a time).  Direct algorithms are also available, but they are much more |   17  * a time).  Direct algorithms are also available, but they are much more | 
|   15  * complex and seem not to be any faster when reduced to code. |   18  * complex and seem not to be any faster when reduced to code. | 
|   16  * |   19  * | 
|   17  * This implementation is based on Arai, Agui, and Nakajima's algorithm for |   20  * This implementation is based on Arai, Agui, and Nakajima's algorithm for | 
|   18  * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in |   21  * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in | 
|   19  * Japanese, but the algorithm is described in the Pennebaker & Mitchell |   22  * Japanese, but the algorithm is described in the Pennebaker & Mitchell | 
|   20  * JPEG textbook (see REFERENCES section in file README).  The following code |   23  * JPEG textbook (see REFERENCES section in file README.ijg).  The following | 
|   21  * is based directly on figure 4-8 in P&M. |   24  * code is based directly on figure 4-8 in P&M. | 
|   22  * While an 8-point DCT cannot be done in less than 11 multiplies, it is |   25  * While an 8-point DCT cannot be done in less than 11 multiplies, it is | 
|   23  * possible to arrange the computation so that many of the multiplies are |   26  * possible to arrange the computation so that many of the multiplies are | 
|   24  * simple scalings of the final outputs.  These multiplies can then be |   27  * simple scalings of the final outputs.  These multiplies can then be | 
|   25  * folded into the multiplications or divisions by the JPEG quantization |   28  * folded into the multiplications or divisions by the JPEG quantization | 
|   26  * table entries.  The AA&N method leaves only 5 multiplies and 29 adds |   29  * table entries.  The AA&N method leaves only 5 multiplies and 29 adds | 
|   27  * to be done in the DCT itself. |   30  * to be done in the DCT itself. | 
|   28  * The primary disadvantage of this method is that with fixed-point math, |   31  * The primary disadvantage of this method is that with fixed-point math, | 
|   29  * accuracy is lost due to imprecise representation of the scaled |   32  * accuracy is lost due to imprecise representation of the scaled | 
|   30  * quantization values.  The smaller the quantization table entry, the less |   33  * quantization values.  The smaller the quantization table entry, the less | 
|   31  * precise the scaled value, so this implementation does worse with high- |   34  * precise the scaled value, so this implementation does worse with high- | 
|   32  * quality-setting files than with low-quality ones. |   35  * quality-setting files than with low-quality ones. | 
|   33  */ |   36  */ | 
|   34  |   37  | 
|   35 #define JPEG_INTERNALS |   38 #define JPEG_INTERNALS | 
|   36 #include "jinclude.h" |   39 #include "jinclude.h" | 
|   37 #include "jpeglib.h" |   40 #include "jpeglib.h" | 
|   38 #include "jdct.h"»      »       /* Private declarations for DCT subsystem */ |   41 #include "jdct.h"               /* Private declarations for DCT subsystem */ | 
|   39  |   42  | 
|   40 #ifdef DCT_IFAST_SUPPORTED |   43 #ifdef DCT_IFAST_SUPPORTED | 
|   41  |   44  | 
|   42  |   45  | 
|   43 /* |   46 /* | 
|   44  * This module is specialized to the case DCTSIZE = 8. |   47  * This module is specialized to the case DCTSIZE = 8. | 
|   45  */ |   48  */ | 
|   46  |   49  | 
|   47 #if DCTSIZE != 8 |   50 #if DCTSIZE != 8 | 
|   48   Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ |   51   Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ | 
| (...skipping 22 matching lines...) Expand all  Loading... | 
|   71  * 8 fractional bits, rather than 13.  This saves some shifting work on some |   74  * 8 fractional bits, rather than 13.  This saves some shifting work on some | 
|   72  * machines, and may also reduce the cost of multiplication (since there |   75  * machines, and may also reduce the cost of multiplication (since there | 
|   73  * are fewer one-bits in the constants). |   76  * are fewer one-bits in the constants). | 
|   74  */ |   77  */ | 
|   75  |   78  | 
|   76 #if BITS_IN_JSAMPLE == 8 |   79 #if BITS_IN_JSAMPLE == 8 | 
|   77 #define CONST_BITS  8 |   80 #define CONST_BITS  8 | 
|   78 #define PASS1_BITS  2 |   81 #define PASS1_BITS  2 | 
|   79 #else |   82 #else | 
|   80 #define CONST_BITS  8 |   83 #define CONST_BITS  8 | 
|   81 #define PASS1_BITS  1»  »       /* lose a little precision to avoid overflow */ |   84 #define PASS1_BITS  1           /* lose a little precision to avoid overflow */ | 
|   82 #endif |   85 #endif | 
|   83  |   86  | 
|   84 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus |   87 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus | 
|   85  * causing a lot of useless floating-point operations at run time. |   88  * causing a lot of useless floating-point operations at run time. | 
|   86  * To get around this we use the following pre-calculated constants. |   89  * To get around this we use the following pre-calculated constants. | 
|   87  * If you change CONST_BITS you may want to add appropriate values. |   90  * If you change CONST_BITS you may want to add appropriate values. | 
|   88  * (With a reasonable C compiler, you can just rely on the FIX() macro...) |   91  * (With a reasonable C compiler, you can just rely on the FIX() macro...) | 
|   89  */ |   92  */ | 
|   90  |   93  | 
|   91 #if CONST_BITS == 8 |   94 #if CONST_BITS == 8 | 
|   92 #define FIX_1_082392200  ((INT32)  277)»»       /* FIX(1.082392200) */ |   95 #define FIX_1_082392200  ((JLONG)  277)         /* FIX(1.082392200) */ | 
|   93 #define FIX_1_414213562  ((INT32)  362)»»       /* FIX(1.414213562) */ |   96 #define FIX_1_414213562  ((JLONG)  362)         /* FIX(1.414213562) */ | 
|   94 #define FIX_1_847759065  ((INT32)  473)»»       /* FIX(1.847759065) */ |   97 #define FIX_1_847759065  ((JLONG)  473)         /* FIX(1.847759065) */ | 
|   95 #define FIX_2_613125930  ((INT32)  669)»»       /* FIX(2.613125930) */ |   98 #define FIX_2_613125930  ((JLONG)  669)         /* FIX(2.613125930) */ | 
|   96 #else |   99 #else | 
|   97 #define FIX_1_082392200  FIX(1.082392200) |  100 #define FIX_1_082392200  FIX(1.082392200) | 
|   98 #define FIX_1_414213562  FIX(1.414213562) |  101 #define FIX_1_414213562  FIX(1.414213562) | 
|   99 #define FIX_1_847759065  FIX(1.847759065) |  102 #define FIX_1_847759065  FIX(1.847759065) | 
|  100 #define FIX_2_613125930  FIX(2.613125930) |  103 #define FIX_2_613125930  FIX(2.613125930) | 
|  101 #endif |  104 #endif | 
|  102  |  105  | 
|  103  |  106  | 
|  104 /* We can gain a little more speed, with a further compromise in accuracy, |  107 /* We can gain a little more speed, with a further compromise in accuracy, | 
|  105  * by omitting the addition in a descaling shift.  This yields an incorrectly |  108  * by omitting the addition in a descaling shift.  This yields an incorrectly | 
|  106  * rounded result half the time... |  109  * rounded result half the time... | 
|  107  */ |  110  */ | 
|  108  |  111  | 
|  109 #ifndef USE_ACCURATE_ROUNDING |  112 #ifndef USE_ACCURATE_ROUNDING | 
|  110 #undef DESCALE |  113 #undef DESCALE | 
|  111 #define DESCALE(x,n)  RIGHT_SHIFT(x, n) |  114 #define DESCALE(x,n)  RIGHT_SHIFT(x, n) | 
|  112 #endif |  115 #endif | 
|  113  |  116  | 
|  114  |  117  | 
|  115 /* Multiply a DCTELEM variable by an INT32 constant, and immediately |  118 /* Multiply a DCTELEM variable by an JLONG constant, and immediately | 
|  116  * descale to yield a DCTELEM result. |  119  * descale to yield a DCTELEM result. | 
|  117  */ |  120  */ | 
|  118  |  121  | 
|  119 #define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS)) |  122 #define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS)) | 
|  120  |  123  | 
|  121  |  124  | 
|  122 /* Dequantize a coefficient by multiplying it by the multiplier-table |  125 /* Dequantize a coefficient by multiplying it by the multiplier-table | 
|  123  * entry; produce a DCTELEM result.  For 8-bit data a 16x16->16 |  126  * entry; produce a DCTELEM result.  For 8-bit data a 16x16->16 | 
|  124  * multiplication will do.  For 12-bit data, the multiplier table is |  127  * multiplication will do.  For 12-bit data, the multiplier table is | 
|  125  * declared INT32, so a 32-bit multiply will be used. |  128  * declared JLONG, so a 32-bit multiply will be used. | 
|  126  */ |  129  */ | 
|  127  |  130  | 
|  128 #if BITS_IN_JSAMPLE == 8 |  131 #if BITS_IN_JSAMPLE == 8 | 
|  129 #define DEQUANTIZE(coef,quantval)  (((IFAST_MULT_TYPE) (coef)) * (quantval)) |  132 #define DEQUANTIZE(coef,quantval)  (((IFAST_MULT_TYPE) (coef)) * (quantval)) | 
|  130 #else |  133 #else | 
|  131 #define DEQUANTIZE(coef,quantval)  \ |  134 #define DEQUANTIZE(coef,quantval)  \ | 
|  132 »       DESCALE((coef)*(quantval), IFAST_SCALE_BITS-PASS1_BITS) |  135         DESCALE((coef)*(quantval), IFAST_SCALE_BITS-PASS1_BITS) | 
|  133 #endif |  136 #endif | 
|  134  |  137  | 
|  135  |  138  | 
|  136 /* Like DESCALE, but applies to a DCTELEM and produces an int. |  139 /* Like DESCALE, but applies to a DCTELEM and produces an int. | 
|  137  * We assume that int right shift is unsigned if INT32 right shift is. |  140  * We assume that int right shift is unsigned if JLONG right shift is. | 
|  138  */ |  141  */ | 
|  139  |  142  | 
|  140 #ifdef RIGHT_SHIFT_IS_UNSIGNED |  143 #ifdef RIGHT_SHIFT_IS_UNSIGNED | 
|  141 #define ISHIFT_TEMPS»   DCTELEM ishift_temp; |  144 #define ISHIFT_TEMPS    DCTELEM ishift_temp; | 
|  142 #if BITS_IN_JSAMPLE == 8 |  145 #if BITS_IN_JSAMPLE == 8 | 
|  143 #define DCTELEMBITS  16»»       /* DCTELEM may be 16 or 32 bits */ |  146 #define DCTELEMBITS  16         /* DCTELEM may be 16 or 32 bits */ | 
|  144 #else |  147 #else | 
|  145 #define DCTELEMBITS  32»»       /* DCTELEM must be 32 bits */ |  148 #define DCTELEMBITS  32         /* DCTELEM must be 32 bits */ | 
|  146 #endif |  149 #endif | 
|  147 #define IRIGHT_SHIFT(x,shft)  \ |  150 #define IRIGHT_SHIFT(x,shft)  \ | 
|  148     ((ishift_temp = (x)) < 0 ? \ |  151     ((ishift_temp = (x)) < 0 ? \ | 
|  149      (ishift_temp >> (shft)) | ((~((DCTELEM) 0)) << (DCTELEMBITS-(shft))) : \ |  152      (ishift_temp >> (shft)) | ((~((DCTELEM) 0)) << (DCTELEMBITS-(shft))) : \ | 
|  150      (ishift_temp >> (shft))) |  153      (ishift_temp >> (shft))) | 
|  151 #else |  154 #else | 
|  152 #define ISHIFT_TEMPS |  155 #define ISHIFT_TEMPS | 
|  153 #define IRIGHT_SHIFT(x,shft)»   ((x) >> (shft)) |  156 #define IRIGHT_SHIFT(x,shft)    ((x) >> (shft)) | 
|  154 #endif |  157 #endif | 
|  155  |  158  | 
|  156 #ifdef USE_ACCURATE_ROUNDING |  159 #ifdef USE_ACCURATE_ROUNDING | 
|  157 #define IDESCALE(x,n)  ((int) IRIGHT_SHIFT((x) + (1 << ((n)-1)), n)) |  160 #define IDESCALE(x,n)  ((int) IRIGHT_SHIFT((x) + (1 << ((n)-1)), n)) | 
|  158 #else |  161 #else | 
|  159 #define IDESCALE(x,n)  ((int) IRIGHT_SHIFT(x, n)) |  162 #define IDESCALE(x,n)  ((int) IRIGHT_SHIFT(x, n)) | 
|  160 #endif |  163 #endif | 
|  161  |  164  | 
|  162  |  165  | 
|  163 /* |  166 /* | 
|  164  * Perform dequantization and inverse DCT on one block of coefficients. |  167  * Perform dequantization and inverse DCT on one block of coefficients. | 
|  165  */ |  168  */ | 
|  166  |  169  | 
|  167 GLOBAL(void) |  170 GLOBAL(void) | 
|  168 jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, |  171 jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, | 
|  169 »       »        JCOEFPTR coef_block, |  172                  JCOEFPTR coef_block, | 
|  170 »       »        JSAMPARRAY output_buf, JDIMENSION output_col) |  173                  JSAMPARRAY output_buf, JDIMENSION output_col) | 
|  171 { |  174 { | 
|  172   DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; |  175   DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; | 
|  173   DCTELEM tmp10, tmp11, tmp12, tmp13; |  176   DCTELEM tmp10, tmp11, tmp12, tmp13; | 
|  174   DCTELEM z5, z10, z11, z12, z13; |  177   DCTELEM z5, z10, z11, z12, z13; | 
|  175   JCOEFPTR inptr; |  178   JCOEFPTR inptr; | 
|  176   IFAST_MULT_TYPE * quantptr; |  179   IFAST_MULT_TYPE *quantptr; | 
|  177   int * wsptr; |  180   int *wsptr; | 
|  178   JSAMPROW outptr; |  181   JSAMPROW outptr; | 
|  179   JSAMPLE *range_limit = IDCT_range_limit(cinfo); |  182   JSAMPLE *range_limit = IDCT_range_limit(cinfo); | 
|  180   int ctr; |  183   int ctr; | 
|  181   int workspace[DCTSIZE2];»     /* buffers data between passes */ |  184   int workspace[DCTSIZE2];      /* buffers data between passes */ | 
|  182   SHIFT_TEMPS»  »       »       /* for DESCALE */ |  185   SHIFT_TEMPS                   /* for DESCALE */ | 
|  183   ISHIFT_TEMPS» »       »       /* for IDESCALE */ |  186   ISHIFT_TEMPS                  /* for IDESCALE */ | 
|  184  |  187  | 
|  185   /* Pass 1: process columns from input, store into work array. */ |  188   /* Pass 1: process columns from input, store into work array. */ | 
|  186  |  189  | 
|  187   inptr = coef_block; |  190   inptr = coef_block; | 
|  188   quantptr = (IFAST_MULT_TYPE *) compptr->dct_table; |  191   quantptr = (IFAST_MULT_TYPE *) compptr->dct_table; | 
|  189   wsptr = workspace; |  192   wsptr = workspace; | 
|  190   for (ctr = DCTSIZE; ctr > 0; ctr--) { |  193   for (ctr = DCTSIZE; ctr > 0; ctr--) { | 
|  191     /* Due to quantization, we will usually find that many of the input |  194     /* Due to quantization, we will usually find that many of the input | 
|  192      * coefficients are zero, especially the AC terms.  We can exploit this |  195      * coefficients are zero, especially the AC terms.  We can exploit this | 
|  193      * by short-circuiting the IDCT calculation for any column in which all |  196      * by short-circuiting the IDCT calculation for any column in which all | 
|  194      * the AC terms are zero.  In that case each output is equal to the |  197      * the AC terms are zero.  In that case each output is equal to the | 
|  195      * DC coefficient (with scale factor as needed). |  198      * DC coefficient (with scale factor as needed). | 
|  196      * With typical images and quantization tables, half or more of the |  199      * With typical images and quantization tables, half or more of the | 
|  197      * column DCT calculations can be simplified this way. |  200      * column DCT calculations can be simplified this way. | 
|  198      */ |  201      */ | 
|  199      |  202  | 
|  200     if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && |  203     if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && | 
|  201 »       inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && |  204         inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && | 
|  202 »       inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && |  205         inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && | 
|  203 »       inptr[DCTSIZE*7] == 0) { |  206         inptr[DCTSIZE*7] == 0) { | 
|  204       /* AC terms all zero */ |  207       /* AC terms all zero */ | 
|  205       int dcval = (int) DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |  208       int dcval = (int) DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | 
|  206  |  209  | 
|  207       wsptr[DCTSIZE*0] = dcval; |  210       wsptr[DCTSIZE*0] = dcval; | 
|  208       wsptr[DCTSIZE*1] = dcval; |  211       wsptr[DCTSIZE*1] = dcval; | 
|  209       wsptr[DCTSIZE*2] = dcval; |  212       wsptr[DCTSIZE*2] = dcval; | 
|  210       wsptr[DCTSIZE*3] = dcval; |  213       wsptr[DCTSIZE*3] = dcval; | 
|  211       wsptr[DCTSIZE*4] = dcval; |  214       wsptr[DCTSIZE*4] = dcval; | 
|  212       wsptr[DCTSIZE*5] = dcval; |  215       wsptr[DCTSIZE*5] = dcval; | 
|  213       wsptr[DCTSIZE*6] = dcval; |  216       wsptr[DCTSIZE*6] = dcval; | 
|  214       wsptr[DCTSIZE*7] = dcval; |  217       wsptr[DCTSIZE*7] = dcval; | 
|  215        |  218  | 
|  216       inptr++;» »       »       /* advance pointers to next column */ |  219       inptr++;                  /* advance pointers to next column */ | 
|  217       quantptr++; |  220       quantptr++; | 
|  218       wsptr++; |  221       wsptr++; | 
|  219       continue; |  222       continue; | 
|  220     } |  223     } | 
|  221      |  224  | 
|  222     /* Even part */ |  225     /* Even part */ | 
|  223  |  226  | 
|  224     tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |  227     tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); | 
|  225     tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |  228     tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); | 
|  226     tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |  229     tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); | 
|  227     tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |  230     tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); | 
|  228  |  231  | 
|  229     tmp10 = tmp0 + tmp2;»       /* phase 3 */ |  232     tmp10 = tmp0 + tmp2;        /* phase 3 */ | 
|  230     tmp11 = tmp0 - tmp2; |  233     tmp11 = tmp0 - tmp2; | 
|  231  |  234  | 
|  232     tmp13 = tmp1 + tmp3;»       /* phases 5-3 */ |  235     tmp13 = tmp1 + tmp3;        /* phases 5-3 */ | 
|  233     tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */ |  236     tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */ | 
|  234  |  237  | 
|  235     tmp0 = tmp10 + tmp13;»      /* phase 2 */ |  238     tmp0 = tmp10 + tmp13;       /* phase 2 */ | 
|  236     tmp3 = tmp10 - tmp13; |  239     tmp3 = tmp10 - tmp13; | 
|  237     tmp1 = tmp11 + tmp12; |  240     tmp1 = tmp11 + tmp12; | 
|  238     tmp2 = tmp11 - tmp12; |  241     tmp2 = tmp11 - tmp12; | 
|  239      |  242  | 
|  240     /* Odd part */ |  243     /* Odd part */ | 
|  241  |  244  | 
|  242     tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |  245     tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); | 
|  243     tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |  246     tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); | 
|  244     tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |  247     tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); | 
|  245     tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); |  248     tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); | 
|  246  |  249  | 
|  247     z13 = tmp6 + tmp5;» »       /* phase 6 */ |  250     z13 = tmp6 + tmp5;          /* phase 6 */ | 
|  248     z10 = tmp6 - tmp5; |  251     z10 = tmp6 - tmp5; | 
|  249     z11 = tmp4 + tmp7; |  252     z11 = tmp4 + tmp7; | 
|  250     z12 = tmp4 - tmp7; |  253     z12 = tmp4 - tmp7; | 
|  251  |  254  | 
|  252     tmp7 = z11 + z13;»  »       /* phase 5 */ |  255     tmp7 = z11 + z13;           /* phase 5 */ | 
|  253     tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */ |  256     tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */ | 
|  254  |  257  | 
|  255     z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */ |  258     z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */ | 
|  256     tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */ |  259     tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */ | 
|  257     tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */ |  260     tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */ | 
|  258  |  261  | 
|  259     tmp6 = tmp12 - tmp7;»       /* phase 2 */ |  262     tmp6 = tmp12 - tmp7;        /* phase 2 */ | 
|  260     tmp5 = tmp11 - tmp6; |  263     tmp5 = tmp11 - tmp6; | 
|  261     tmp4 = tmp10 + tmp5; |  264     tmp4 = tmp10 + tmp5; | 
|  262  |  265  | 
|  263     wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7); |  266     wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7); | 
|  264     wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7); |  267     wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7); | 
|  265     wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6); |  268     wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6); | 
|  266     wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6); |  269     wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6); | 
|  267     wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5); |  270     wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5); | 
|  268     wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5); |  271     wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5); | 
|  269     wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); |  272     wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); | 
|  270     wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); |  273     wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); | 
|  271  |  274  | 
|  272     inptr++;»   »       »       /* advance pointers to next column */ |  275     inptr++;                    /* advance pointers to next column */ | 
|  273     quantptr++; |  276     quantptr++; | 
|  274     wsptr++; |  277     wsptr++; | 
|  275   } |  278   } | 
|  276    |  279  | 
|  277   /* Pass 2: process rows from work array, store into output array. */ |  280   /* Pass 2: process rows from work array, store into output array. */ | 
|  278   /* Note that we must descale the results by a factor of 8 == 2**3, */ |  281   /* Note that we must descale the results by a factor of 8 == 2**3, */ | 
|  279   /* and also undo the PASS1_BITS scaling. */ |  282   /* and also undo the PASS1_BITS scaling. */ | 
|  280  |  283  | 
|  281   wsptr = workspace; |  284   wsptr = workspace; | 
|  282   for (ctr = 0; ctr < DCTSIZE; ctr++) { |  285   for (ctr = 0; ctr < DCTSIZE; ctr++) { | 
|  283     outptr = output_buf[ctr] + output_col; |  286     outptr = output_buf[ctr] + output_col; | 
|  284     /* Rows of zeroes can be exploited in the same way as we did with columns. |  287     /* Rows of zeroes can be exploited in the same way as we did with columns. | 
|  285      * However, the column calculation has created many nonzero AC terms, so |  288      * However, the column calculation has created many nonzero AC terms, so | 
|  286      * the simplification applies less often (typically 5% to 10% of the time). |  289      * the simplification applies less often (typically 5% to 10% of the time). | 
|  287      * On machines with very fast multiplication, it's possible that the |  290      * On machines with very fast multiplication, it's possible that the | 
|  288      * test takes more time than it's worth.  In that case this section |  291      * test takes more time than it's worth.  In that case this section | 
|  289      * may be commented out. |  292      * may be commented out. | 
|  290      */ |  293      */ | 
|  291      |  294  | 
|  292 #ifndef NO_ZERO_ROW_TEST |  295 #ifndef NO_ZERO_ROW_TEST | 
|  293     if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 && |  296     if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 && | 
|  294 »       wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { |  297         wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { | 
|  295       /* AC terms all zero */ |  298       /* AC terms all zero */ | 
|  296       JSAMPLE dcval = range_limit[IDESCALE(wsptr[0], PASS1_BITS+3) |  299       JSAMPLE dcval = range_limit[IDESCALE(wsptr[0], PASS1_BITS+3) | 
|  297 »       »       »       »         & RANGE_MASK]; |  300                                   & RANGE_MASK]; | 
|  298        |  301  | 
|  299       outptr[0] = dcval; |  302       outptr[0] = dcval; | 
|  300       outptr[1] = dcval; |  303       outptr[1] = dcval; | 
|  301       outptr[2] = dcval; |  304       outptr[2] = dcval; | 
|  302       outptr[3] = dcval; |  305       outptr[3] = dcval; | 
|  303       outptr[4] = dcval; |  306       outptr[4] = dcval; | 
|  304       outptr[5] = dcval; |  307       outptr[5] = dcval; | 
|  305       outptr[6] = dcval; |  308       outptr[6] = dcval; | 
|  306       outptr[7] = dcval; |  309       outptr[7] = dcval; | 
|  307  |  310  | 
|  308       wsptr += DCTSIZE;»»       /* advance pointer to next row */ |  311       wsptr += DCTSIZE;         /* advance pointer to next row */ | 
|  309       continue; |  312       continue; | 
|  310     } |  313     } | 
|  311 #endif |  314 #endif | 
|  312      |  315  | 
|  313     /* Even part */ |  316     /* Even part */ | 
|  314  |  317  | 
|  315     tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]); |  318     tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]); | 
|  316     tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]); |  319     tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]); | 
|  317  |  320  | 
|  318     tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]); |  321     tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]); | 
|  319     tmp12 = MULTIPLY((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6], FIX_1_414213562) |  322     tmp12 = MULTIPLY((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6], FIX_1_414213562) | 
|  320 »           - tmp13; |  323             - tmp13; | 
|  321  |  324  | 
|  322     tmp0 = tmp10 + tmp13; |  325     tmp0 = tmp10 + tmp13; | 
|  323     tmp3 = tmp10 - tmp13; |  326     tmp3 = tmp10 - tmp13; | 
|  324     tmp1 = tmp11 + tmp12; |  327     tmp1 = tmp11 + tmp12; | 
|  325     tmp2 = tmp11 - tmp12; |  328     tmp2 = tmp11 - tmp12; | 
|  326  |  329  | 
|  327     /* Odd part */ |  330     /* Odd part */ | 
|  328  |  331  | 
|  329     z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3]; |  332     z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3]; | 
|  330     z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3]; |  333     z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3]; | 
|  331     z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7]; |  334     z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7]; | 
|  332     z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7]; |  335     z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7]; | 
|  333  |  336  | 
|  334     tmp7 = z11 + z13;»  »       /* phase 5 */ |  337     tmp7 = z11 + z13;           /* phase 5 */ | 
|  335     tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */ |  338     tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */ | 
|  336  |  339  | 
|  337     z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */ |  340     z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */ | 
|  338     tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */ |  341     tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */ | 
|  339     tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */ |  342     tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */ | 
|  340  |  343  | 
|  341     tmp6 = tmp12 - tmp7;»       /* phase 2 */ |  344     tmp6 = tmp12 - tmp7;        /* phase 2 */ | 
|  342     tmp5 = tmp11 - tmp6; |  345     tmp5 = tmp11 - tmp6; | 
|  343     tmp4 = tmp10 + tmp5; |  346     tmp4 = tmp10 + tmp5; | 
|  344  |  347  | 
|  345     /* Final output stage: scale down by a factor of 8 and range-limit */ |  348     /* Final output stage: scale down by a factor of 8 and range-limit */ | 
|  346  |  349  | 
|  347     outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3) |  350     outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3) | 
|  348 »       »       »           & RANGE_MASK]; |  351                             & RANGE_MASK]; | 
|  349     outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3) |  352     outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3) | 
|  350 »       »       »           & RANGE_MASK]; |  353                             & RANGE_MASK]; | 
|  351     outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3) |  354     outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3) | 
|  352 »       »       »           & RANGE_MASK]; |  355                             & RANGE_MASK]; | 
|  353     outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3) |  356     outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3) | 
|  354 »       »       »           & RANGE_MASK]; |  357                             & RANGE_MASK]; | 
|  355     outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3) |  358     outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3) | 
|  356 »       »       »           & RANGE_MASK]; |  359                             & RANGE_MASK]; | 
|  357     outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3) |  360     outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3) | 
|  358 »       »       »           & RANGE_MASK]; |  361                             & RANGE_MASK]; | 
|  359     outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3) |  362     outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3) | 
|  360 »       »       »           & RANGE_MASK]; |  363                             & RANGE_MASK]; | 
|  361     outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3) |  364     outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3) | 
|  362 »       »       »           & RANGE_MASK]; |  365                             & RANGE_MASK]; | 
|  363  |  366  | 
|  364     wsptr += DCTSIZE;»  »       /* advance pointer to next row */ |  367     wsptr += DCTSIZE;           /* advance pointer to next row */ | 
|  365   } |  368   } | 
|  366 } |  369 } | 
|  367  |  370  | 
|  368 #endif /* DCT_IFAST_SUPPORTED */ |  371 #endif /* DCT_IFAST_SUPPORTED */ | 
| OLD | NEW |