jfdctint.c - Issue 1934113002: Update libjpeg_turbo to 1.4.90 from https://github.com/libjpeg-turbo/

Unified Diff: jfdctint.c

Issue 1934113002: Update libjpeg_turbo to 1.4.90 from https://github.com/libjpeg-turbo/ (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libjpeg_turbo.git@master

Patch Set: Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: jfdctint.c

diff --git a/jfdctint.c b/jfdctint.c

index 0a78b64aee8ffd7fc6c3469495ec577a59d44ed1..73e0b59032c90a1da7e2b8f9bfc0fd9593019587 100644

--- a/jfdctint.c

+++ b/jfdctint.c

@@ -1,9 +1,12 @@

* jfdctint.c

+ * This file was part of the Independent JPEG Group's software.

- * This file is part of the Independent JPEG Group's software.

- * For conditions of distribution and use, see the accompanying README file.

+ * libjpeg-turbo Modifications:

+ * For conditions of distribution and use, see the accompanying README.ijg

+ * file.

* This file contains a slow-but-accurate integer implementation of the

* forward DCT (Discrete Cosine Transform).

@@ -26,7 +29,7 @@

#define JPEG_INTERNALS

#include "jinclude.h"

#include "jpeglib.h"

-#include "jdct.h" /* Private declarations for DCT subsystem */

+#include "jdct.h" /* Private declarations for DCT subsystem */

#ifdef DCT_ISLOW_SUPPORTED

@@ -67,7 +70,7 @@

* they are represented to better-than-integral precision. These outputs

* require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word

* with the recommended scaling. (For 12-bit sample data, the intermediate

- * array is INT32 anyway.)

+ * array is JLONG anyway.)

* To avoid overflow of the 32-bit intermediate results in pass 2, we must

* have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis

@@ -79,7 +82,7 @@

#define PASS1_BITS 2

#else

#define CONST_BITS 13

-#define PASS1_BITS 1 /* lose a little precision to avoid overflow */

+#define PASS1_BITS 1 /* lose a little precision to avoid overflow */

#endif

/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus

@@ -90,18 +93,18 @@

#if CONST_BITS == 13

-#define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */

-#define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */

-#define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */

-#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */

-#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */

-#define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */

-#define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */

-#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */

-#define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */

-#define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */

-#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */

-#define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */

+#define FIX_0_298631336 ((JLONG) 2446) /* FIX(0.298631336) */

+#define FIX_0_390180644 ((JLONG) 3196) /* FIX(0.390180644) */

+#define FIX_0_541196100 ((JLONG) 4433) /* FIX(0.541196100) */

+#define FIX_0_765366865 ((JLONG) 6270) /* FIX(0.765366865) */

+#define FIX_0_899976223 ((JLONG) 7373) /* FIX(0.899976223) */

+#define FIX_1_175875602 ((JLONG) 9633) /* FIX(1.175875602) */

+#define FIX_1_501321110 ((JLONG) 12299) /* FIX(1.501321110) */

+#define FIX_1_847759065 ((JLONG) 15137) /* FIX(1.847759065) */

+#define FIX_1_961570560 ((JLONG) 16069) /* FIX(1.961570560) */

+#define FIX_2_053119869 ((JLONG) 16819) /* FIX(2.053119869) */

+#define FIX_2_562915447 ((JLONG) 20995) /* FIX(2.562915447) */

+#define FIX_3_072711026 ((JLONG) 25172) /* FIX(3.072711026) */

#else

#define FIX_0_298631336 FIX(0.298631336)

#define FIX_0_390180644 FIX(0.390180644)

@@ -118,7 +121,7 @@

#endif

-/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.

+/* Multiply an JLONG variable by an JLONG constant to yield an JLONG result.

* For 8-bit samples with the recommended scaling, all the variable

* and constant values involved are no more than 16 bits wide, so a

* 16x16->32 bit multiply can be used instead of a full 32x32 multiply.

@@ -137,11 +140,11 @@

GLOBAL(void)

-jpeg_fdct_islow (DCTELEM * data)

+jpeg_fdct_islow (DCTELEM *data)

{

- INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;

- INT32 tmp10, tmp11, tmp12, tmp13;

- INT32 z1, z2, z3, z4, z5;

+ JLONG tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;

+ JLONG tmp10, tmp11, tmp12, tmp13;

+ JLONG z1, z2, z3, z4, z5;

DCTELEM *dataptr;

int ctr;

SHIFT_TEMPS

@@ -160,36 +163,36 @@ jpeg_fdct_islow (DCTELEM * data)

tmp5 = dataptr[2] - dataptr[5];

tmp3 = dataptr[3] + dataptr[4];

tmp4 = dataptr[3] - dataptr[4];

/* Even part per LL&M figure 1 --- note that published figure is faulty;

* rotator "sqrt(2)*c1" should be "sqrt(2)*c6".

tmp10 = tmp0 + tmp3;

tmp13 = tmp0 - tmp3;

tmp11 = tmp1 + tmp2;

tmp12 = tmp1 - tmp2;

- dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);

- dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);

+ dataptr[0] = (DCTELEM) LEFT_SHIFT(tmp10 + tmp11, PASS1_BITS);

+ dataptr[4] = (DCTELEM) LEFT_SHIFT(tmp10 - tmp11, PASS1_BITS);

z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);

dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),

- CONST_BITS-PASS1_BITS);

+ CONST_BITS-PASS1_BITS);

dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),

- CONST_BITS-PASS1_BITS);

+ CONST_BITS-PASS1_BITS);

/* Odd part per figure 8 --- note paper omits factor of sqrt(2).

* cK represents cos(K*pi/16).

* i0..i3 in the paper are tmp4..tmp7 here.

z1 = tmp4 + tmp7;

z2 = tmp5 + tmp6;

z3 = tmp4 + tmp6;

z4 = tmp5 + tmp7;

z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */

tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */

tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */

tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */

@@ -198,16 +201,16 @@ jpeg_fdct_islow (DCTELEM * data)

z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */

z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */

z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */

z3 += z5;

z4 += z5;

dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);

dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);

dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);

dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);

- dataptr += DCTSIZE; /* advance pointer to next row */

+ dataptr += DCTSIZE; /* advance pointer to next row */

}

/* Pass 2: process columns.

@@ -225,36 +228,36 @@ jpeg_fdct_islow (DCTELEM * data)

tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];

tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];

tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];

/* Even part per LL&M figure 1 --- note that published figure is faulty;

* rotator "sqrt(2)*c1" should be "sqrt(2)*c6".

tmp10 = tmp0 + tmp3;

tmp13 = tmp0 - tmp3;

tmp11 = tmp1 + tmp2;

tmp12 = tmp1 - tmp2;

dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);

dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);

z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);

dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),

- CONST_BITS+PASS1_BITS);

+ CONST_BITS+PASS1_BITS);

dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),

- CONST_BITS+PASS1_BITS);

+ CONST_BITS+PASS1_BITS);

/* Odd part per figure 8 --- note paper omits factor of sqrt(2).

* cK represents cos(K*pi/16).

* i0..i3 in the paper are tmp4..tmp7 here.

z1 = tmp4 + tmp7;

z2 = tmp5 + tmp6;

z3 = tmp4 + tmp6;

z4 = tmp5 + tmp7;

z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */

tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */

tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */

tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */

@@ -263,20 +266,20 @@ jpeg_fdct_islow (DCTELEM * data)

z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */

z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */

z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */

z3 += z5;

z4 += z5;

dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3,

- CONST_BITS+PASS1_BITS);

+ CONST_BITS+PASS1_BITS);

dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4,

- CONST_BITS+PASS1_BITS);

+ CONST_BITS+PASS1_BITS);

dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3,

- CONST_BITS+PASS1_BITS);

+ CONST_BITS+PASS1_BITS);

dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4,

- CONST_BITS+PASS1_BITS);

- dataptr++; /* advance pointer to next column */

+ CONST_BITS+PASS1_BITS);

+ dataptr++; /* advance pointer to next column */

}

« jconfigint.h ('K') | « jfdctfst.c ('k') | jidctflt.c » ('j') | simd/jccolext-sse2-64.asm » ('J')