Index: google.patch |
diff --git a/google.patch b/google.patch |
index f12156b4eecc5f5d219844cce2c54c60b1e6d4f3..fcb53b443086670ad83f4abbac4187bb530ffeca 100644 |
--- a/google.patch |
+++ b/google.patch |
@@ -1,174 +1,9613 @@ |
-Index: jdmarker.c |
+Index: README |
=================================================================== |
---- jdmarker.c (revision 829) |
-+++ jdmarker.c (working copy) |
-@@ -910,7 +910,7 @@ |
+--- README (revision 829) |
++++ README (working copy) |
+@@ -1,26 +1,26 @@ |
++libjpeg-turbo note: This file has been modified by The libjpeg-turbo Project |
++to include only information relevant to libjpeg-turbo, to wordsmith certain |
++sections, and to remove impolitic language that existed in the libjpeg v8 |
++README. It is included only for reference. Please see README-turbo.txt for |
++information specific to libjpeg-turbo. |
++ |
++ |
+ The Independent JPEG Group's JPEG software |
+ ========================================== |
+ |
+-README for release 6b of 27-Mar-1998 |
+-==================================== |
++This distribution contains a release of the Independent JPEG Group's free JPEG |
++software. You are welcome to redistribute this software and to use it for any |
++purpose, subject to the conditions under LEGAL ISSUES, below. |
+ |
+-This distribution contains the sixth public release of the Independent JPEG |
+-Group's free JPEG software. You are welcome to redistribute this software and |
+-to use it for any purpose, subject to the conditions under LEGAL ISSUES, below. |
++This software is the work of Tom Lane, Guido Vollbeding, Philip Gladstone, |
++Bill Allombert, Jim Boucher, Lee Crocker, Bob Friesenhahn, Ben Jackson, |
++Julian Minguillon, Luis Ortiz, George Phillips, Davide Rossi, Ge' Weijers, |
++and other members of the Independent JPEG Group. |
+ |
+-Serious users of this software (particularly those incorporating it into |
+-larger programs) should contact IJG at jpeg-info@uunet.uu.net to be added to |
+-our electronic mailing list. Mailing list members are notified of updates |
+-and have a chance to participate in technical discussions, etc. |
++IJG is not affiliated with the ISO/IEC JTC1/SC29/WG1 standards committee |
++(also known as JPEG, together with ITU-T SG16). |
+ |
+-This software is the work of Tom Lane, Philip Gladstone, Jim Boucher, |
+-Lee Crocker, Julian Minguillon, Luis Ortiz, George Phillips, Davide Rossi, |
+-Guido Vollbeding, Ge' Weijers, and other members of the Independent JPEG |
+-Group. |
+ |
+-IJG is not affiliated with the official ISO JPEG standards committee. |
+- |
+- |
+ DOCUMENTATION ROADMAP |
+ ===================== |
+ |
+@@ -30,7 +30,6 @@ |
+ LEGAL ISSUES Copyright, lack of warranty, terms of distribution. |
+ REFERENCES Where to learn more about JPEG. |
+ ARCHIVE LOCATIONS Where to find newer versions of this software. |
+-RELATED SOFTWARE Other stuff you should get. |
+ FILE FORMAT WARS Software *not* to get. |
+ TO DO Plans for future IJG releases. |
+ |
+@@ -37,20 +36,19 @@ |
+ Other documentation files in the distribution are: |
+ |
+ User documentation: |
+- install.doc How to configure and install the IJG software. |
+- usage.doc Usage instructions for cjpeg, djpeg, jpegtran, |
++ install.txt How to configure and install the IJG software. |
++ usage.txt Usage instructions for cjpeg, djpeg, jpegtran, |
+ rdjpgcom, and wrjpgcom. |
+- *.1 Unix-style man pages for programs (same info as usage.doc). |
+- wizard.doc Advanced usage instructions for JPEG wizards only. |
++ *.1 Unix-style man pages for programs (same info as usage.txt). |
++ wizard.txt Advanced usage instructions for JPEG wizards only. |
+ change.log Version-to-version change highlights. |
+ Programmer and internal documentation: |
+- libjpeg.doc How to use the JPEG library in your own programs. |
++ libjpeg.txt How to use the JPEG library in your own programs. |
+ example.c Sample code for calling the JPEG library. |
+- structure.doc Overview of the JPEG library's internal structure. |
+- filelist.doc Road map of IJG files. |
+- coderules.doc Coding style rules --- please read if you contribute code. |
++ structure.txt Overview of the JPEG library's internal structure. |
++ coderules.txt Coding style rules --- please read if you contribute code. |
+ |
+-Please read at least the files install.doc and usage.doc. Useful information |
++Please read at least the files install.txt and usage.txt. Some information |
+ can also be found in the JPEG FAQ (Frequently Asked Questions) article. See |
+ ARCHIVE LOCATIONS below to find out where to obtain the FAQ article. |
+ |
+@@ -62,24 +60,27 @@ |
+ OVERVIEW |
+ ======== |
+ |
+-This package contains C software to implement JPEG image compression and |
+-decompression. JPEG (pronounced "jay-peg") is a standardized compression |
+-method for full-color and gray-scale images. JPEG is intended for compressing |
+-"real-world" scenes; line drawings, cartoons and other non-realistic images |
+-are not its strong suit. JPEG is lossy, meaning that the output image is not |
+-exactly identical to the input image. Hence you must not use JPEG if you |
+-have to have identical output bits. However, on typical photographic images, |
+-very good compression levels can be obtained with no visible change, and |
+-remarkably high compression levels are possible if you can tolerate a |
+-low-quality image. For more details, see the references, or just experiment |
+-with various compression settings. |
++This package contains C software to implement JPEG image encoding, decoding, |
++and transcoding. JPEG (pronounced "jay-peg") is a standardized compression |
++method for full-color and gray-scale images. JPEG's strong suit is compressing |
++photographic images or other types of images that have smooth color and |
++brightness transitions between neighboring pixels. Images with sharp lines or |
++other abrupt features may not compress well with JPEG, and a higher JPEG |
++quality may have to be used to avoid visible compression artifacts with such |
++images. |
+ |
++JPEG is lossy, meaning that the output pixels are not necessarily identical to |
++the input pixels. However, on photographic content and other "smooth" images, |
++very good compression ratios can be obtained with no visible compression |
++artifacts, and extremely high compression ratios are possible if you are |
++willing to sacrifice image quality (by reducing the "quality" setting in the |
++compressor.) |
++ |
+ This software implements JPEG baseline, extended-sequential, and progressive |
+ compression processes. Provision is made for supporting all variants of these |
+ processes, although some uncommon parameter settings aren't implemented yet. |
+-For legal reasons, we are not distributing code for the arithmetic-coding |
+-variants of JPEG; see LEGAL ISSUES. We have made no provision for supporting |
+-the hierarchical or lossless processes defined in the standard. |
++We have made no provision for supporting the hierarchical or lossless |
++processes defined in the standard. |
+ |
+ We provide a set of library routines for reading and writing JPEG image files, |
+ plus two sample applications "cjpeg" and "djpeg", which use the library to |
+@@ -91,11 +92,12 @@ |
+ for example, the color quantization modules are not strictly part of JPEG |
+ decoding, but they are essential for output to colormapped file formats or |
+ colormapped displays. These extra functions can be compiled out of the |
+-library if not required for a particular application. We have also included |
+-"jpegtran", a utility for lossless transcoding between different JPEG |
+-processes, and "rdjpgcom" and "wrjpgcom", two simple applications for |
+-inserting and extracting textual comments in JFIF files. |
++library if not required for a particular application. |
+ |
++We have also included "jpegtran", a utility for lossless transcoding between |
++different JPEG processes, and "rdjpgcom" and "wrjpgcom", two simple |
++applications for inserting and extracting textual comments in JFIF files. |
++ |
+ The emphasis in designing this software has been on achieving portability and |
+ flexibility, while also making it fast enough to be useful. In particular, |
+ the software is not intended to be read as a tutorial on JPEG. (See the |
+@@ -127,7 +129,7 @@ |
+ fitness for a particular purpose. This software is provided "AS IS", and you, |
+ its user, assume the entire risk as to its quality and accuracy. |
+ |
+-This software is copyright (C) 1991-1998, Thomas G. Lane. |
++This software is copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding. |
+ All Rights Reserved except as specified below. |
+ |
+ Permission is hereby granted to use, copy, modify, and distribute this |
+@@ -158,30 +160,12 @@ |
+ assumed by the product vendor. |
+ |
+ |
+-ansi2knr.c is included in this distribution by permission of L. Peter Deutsch, |
+-sole proprietor of its copyright holder, Aladdin Enterprises of Menlo Park, CA. |
+-ansi2knr.c is NOT covered by the above copyright and conditions, but instead |
+-by the usual distribution terms of the Free Software Foundation; principally, |
+-that you must include source code if you redistribute it. (See the file |
+-ansi2knr.c for full details.) However, since ansi2knr.c is not needed as part |
+-of any program generated from the IJG code, this does not limit you more than |
+-the foregoing paragraphs do. |
+- |
+ The Unix configuration script "configure" was produced with GNU Autoconf. |
+ It is copyright by the Free Software Foundation but is freely distributable. |
+ The same holds for its supporting scripts (config.guess, config.sub, |
+-ltconfig, ltmain.sh). Another support script, install-sh, is copyright |
+-by M.I.T. but is also freely distributable. |
++ltmain.sh). Another support script, install-sh, is copyright by X Consortium |
++but is also freely distributable. |
+ |
+-It appears that the arithmetic coding option of the JPEG spec is covered by |
+-patents owned by IBM, AT&T, and Mitsubishi. Hence arithmetic coding cannot |
+-legally be used without obtaining one or more licenses. For this reason, |
+-support for arithmetic coding has been removed from the free JPEG software. |
+-(Since arithmetic coding provides only a marginal gain over the unpatented |
+-Huffman mode, it is unlikely that very many implementations will support it.) |
+-So far as we are aware, there are no patent restrictions on the remaining |
+-code. |
+- |
+ The IJG distribution formerly included code to read and write GIF files. |
+ To avoid entanglement with the Unisys LZW patent, GIF reading support has |
+ been removed altogether, and the GIF writer has been simplified to produce |
+@@ -198,7 +182,7 @@ |
+ REFERENCES |
+ ========== |
+ |
+-We highly recommend reading one or more of these references before trying to |
++We recommend reading one or more of these references before trying to |
+ understand the innards of the JPEG software. |
+ |
+ The best short technical introduction to the JPEG compression algorithm is |
+@@ -207,7 +191,7 @@ |
+ (Adjacent articles in that issue discuss MPEG motion picture compression, |
+ applications of JPEG, and related topics.) If you don't have the CACM issue |
+ handy, a PostScript file containing a revised version of Wallace's article is |
+-available at ftp://ftp.uu.net/graphics/jpeg/wallace.ps.gz. The file (actually |
++available at http://www.ijg.org/files/wallace.ps.gz. The file (actually |
+ a preprint for an article that appeared in IEEE Trans. Consumer Electronics) |
+ omits the sample images that appeared in CACM, but it includes corrections |
+ and some added material. Note: the Wallace article is copyright ACM and IEEE, |
+@@ -222,45 +206,29 @@ |
+ sample code is far from industrial-strength, but when you are ready to look |
+ at a full implementation, you've got one here... |
+ |
+-The best full description of JPEG is the textbook "JPEG Still Image Data |
+-Compression Standard" by William B. Pennebaker and Joan L. Mitchell, published |
+-by Van Nostrand Reinhold, 1993, ISBN 0-442-01272-1. Price US$59.95, 638 pp. |
+-The book includes the complete text of the ISO JPEG standards (DIS 10918-1 |
+-and draft DIS 10918-2). This is by far the most complete exposition of JPEG |
+-in existence, and we highly recommend it. |
++The best currently available description of JPEG is the textbook "JPEG Still |
++Image Data Compression Standard" by William B. Pennebaker and Joan L. |
++Mitchell, published by Van Nostrand Reinhold, 1993, ISBN 0-442-01272-1. |
++Price US$59.95, 638 pp. The book includes the complete text of the ISO JPEG |
++standards (DIS 10918-1 and draft DIS 10918-2). |
+ |
+-The JPEG standard itself is not available electronically; you must order a |
+-paper copy through ISO or ITU. (Unless you feel a need to own a certified |
+-official copy, we recommend buying the Pennebaker and Mitchell book instead; |
+-it's much cheaper and includes a great deal of useful explanatory material.) |
+-In the USA, copies of the standard may be ordered from ANSI Sales at (212) |
+-642-4900, or from Global Engineering Documents at (800) 854-7179. (ANSI |
+-doesn't take credit card orders, but Global does.) It's not cheap: as of |
+-1992, ANSI was charging $95 for Part 1 and $47 for Part 2, plus 7% |
+-shipping/handling. The standard is divided into two parts, Part 1 being the |
+-actual specification, while Part 2 covers compliance testing methods. Part 1 |
+-is titled "Digital Compression and Coding of Continuous-tone Still Images, |
++The original JPEG standard is divided into two parts, Part 1 being the actual |
++specification, while Part 2 covers compliance testing methods. Part 1 is |
++titled "Digital Compression and Coding of Continuous-tone Still Images, |
+ Part 1: Requirements and guidelines" and has document numbers ISO/IEC IS |
+ 10918-1, ITU-T T.81. Part 2 is titled "Digital Compression and Coding of |
+ Continuous-tone Still Images, Part 2: Compliance testing" and has document |
+ numbers ISO/IEC IS 10918-2, ITU-T T.83. |
+ |
+-Some extensions to the original JPEG standard are defined in JPEG Part 3, |
+-a newer ISO standard numbered ISO/IEC IS 10918-3 and ITU-T T.84. IJG |
+-currently does not support any Part 3 extensions. |
+- |
+ The JPEG standard does not specify all details of an interchangeable file |
+ format. For the omitted details we follow the "JFIF" conventions, revision |
+-1.02. A copy of the JFIF spec is available from: |
+- Literature Department |
+- C-Cube Microsystems, Inc. |
+- 1778 McCarthy Blvd. |
+- Milpitas, CA 95035 |
+- phone (408) 944-6300, fax (408) 944-6314 |
+-A PostScript version of this document is available by FTP at |
+-ftp://ftp.uu.net/graphics/jpeg/jfif.ps.gz. There is also a plain text |
+-version at ftp://ftp.uu.net/graphics/jpeg/jfif.txt.gz, but it is missing |
+-the figures. |
++1.02. JFIF 1.02 has been adopted as an Ecma International Technical Report |
++and thus received a formal publication status. It is available as a free |
++download in PDF format from |
++http://www.ecma-international.org/publications/techreports/E-TR-098.htm. |
++A PostScript version of the JFIF document is available at |
++http://www.ijg.org/files/jfif.ps.gz. There is also a plain text version at |
++http://www.ijg.org/files/jfif.txt.gz, but it is missing the figures. |
+ |
+ The TIFF 6.0 file format specification can be obtained by FTP from |
+ ftp://ftp.sgi.com/graphics/tiff/TIFF6.ps.gz. The JPEG incorporation scheme |
+@@ -267,37 +235,24 @@ |
+ found in the TIFF 6.0 spec of 3-June-92 has a number of serious problems. |
+ IJG does not recommend use of the TIFF 6.0 design (TIFF Compression tag 6). |
+ Instead, we recommend the JPEG design proposed by TIFF Technical Note #2 |
+-(Compression tag 7). Copies of this Note can be obtained from ftp.sgi.com or |
+-from ftp://ftp.uu.net/graphics/jpeg/. It is expected that the next revision |
++(Compression tag 7). Copies of this Note can be obtained from |
++http://www.ijg.org/files/. It is expected that the next revision |
+ of the TIFF spec will replace the 6.0 JPEG design with the Note's design. |
+ Although IJG's own code does not support TIFF/JPEG, the free libtiff library |
+-uses our library to implement TIFF/JPEG per the Note. libtiff is available |
+-from ftp://ftp.sgi.com/graphics/tiff/. |
++uses our library to implement TIFF/JPEG per the Note. |
+ |
+ |
+ ARCHIVE LOCATIONS |
+ ================= |
+ |
+-The "official" archive site for this software is ftp.uu.net (Internet |
+-address 192.48.96.9). The most recent released version can always be found |
+-there in directory graphics/jpeg. This particular version will be archived |
+-as ftp://ftp.uu.net/graphics/jpeg/jpegsrc.v6b.tar.gz. If you don't have |
+-direct Internet access, UUNET's archives are also available via UUCP; contact |
+-help@uunet.uu.net for information on retrieving files that way. |
++The "official" archive site for this software is www.ijg.org. |
++The most recent released version can always be found there in |
++directory "files". This particular version will be archived as |
++http://www.ijg.org/files/jpegsrc.v8d.tar.gz, and in Windows-compatible |
++"zip" archive format as http://www.ijg.org/files/jpegsr8d.zip. |
+ |
+-Numerous Internet sites maintain copies of the UUNET files. However, only |
+-ftp.uu.net is guaranteed to have the latest official version. |
+- |
+-You can also obtain this software in DOS-compatible "zip" archive format from |
+-the SimTel archives (ftp://ftp.simtel.net/pub/simtelnet/msdos/graphics/), or |
+-on CompuServe in the Graphics Support forum (GO CIS:GRAPHSUP), library 12 |
+-"JPEG Tools". Again, these versions may sometimes lag behind the ftp.uu.net |
+-release. |
+- |
+-The JPEG FAQ (Frequently Asked Questions) article is a useful source of |
+-general information about JPEG. It is updated constantly and therefore is |
+-not included in this distribution. The FAQ is posted every two weeks to |
+-Usenet newsgroups comp.graphics.misc, news.answers, and other groups. |
++The JPEG FAQ (Frequently Asked Questions) article is a source of some |
++general information about JPEG. |
+ It is available on the World Wide Web at http://www.faqs.org/faqs/jpeg-faq/ |
+ and other news.answers archive sites, including the official news.answers |
+ archive at rtfm.mit.edu: ftp://rtfm.mit.edu/pub/usenet/news.answers/jpeg-faq/. |
+@@ -307,79 +262,21 @@ |
+ send usenet/news.answers/jpeg-faq/part2 |
+ |
+ |
+-RELATED SOFTWARE |
+-================ |
+- |
+-Numerous viewing and image manipulation programs now support JPEG. (Quite a |
+-few of them use this library to do so.) The JPEG FAQ described above lists |
+-some of the more popular free and shareware viewers, and tells where to |
+-obtain them on Internet. |
+- |
+-If you are on a Unix machine, we highly recommend Jef Poskanzer's free |
+-PBMPLUS software, which provides many useful operations on PPM-format image |
+-files. In particular, it can convert PPM images to and from a wide range of |
+-other formats, thus making cjpeg/djpeg considerably more useful. The latest |
+-version is distributed by the NetPBM group, and is available from numerous |
+-sites, notably ftp://wuarchive.wustl.edu/graphics/graphics/packages/NetPBM/. |
+-Unfortunately PBMPLUS/NETPBM is not nearly as portable as the IJG software is; |
+-you are likely to have difficulty making it work on any non-Unix machine. |
+- |
+-A different free JPEG implementation, written by the PVRG group at Stanford, |
+-is available from ftp://havefun.stanford.edu/pub/jpeg/. This program |
+-is designed for research and experimentation rather than production use; |
+-it is slower, harder to use, and less portable than the IJG code, but it |
+-is easier to read and modify. Also, the PVRG code supports lossless JPEG, |
+-which we do not. (On the other hand, it doesn't do progressive JPEG.) |
+- |
+- |
+ FILE FORMAT WARS |
+ ================ |
+ |
+-Some JPEG programs produce files that are not compatible with our library. |
+-The root of the problem is that the ISO JPEG committee failed to specify a |
+-concrete file format. Some vendors "filled in the blanks" on their own, |
+-creating proprietary formats that no one else could read. (For example, none |
+-of the early commercial JPEG implementations for the Macintosh were able to |
+-exchange compressed files.) |
++The ISO/IEC JTC1/SC29/WG1 standards committee (also known as JPEG, together |
++with ITU-T SG16) currently promotes different formats containing the name |
++"JPEG" which are incompatible with original DCT-based JPEG. IJG therefore does |
++not support these formats (see REFERENCES). Indeed, one of the original |
++reasons for developing this free software was to help force convergence on |
++common, interoperable format standards for JPEG files. |
++Don't use an incompatible file format! |
++(In any case, our decoder will remain capable of reading existing JPEG |
++image files indefinitely.) |
+ |
+-The file format we have adopted is called JFIF (see REFERENCES). This format |
+-has been agreed to by a number of major commercial JPEG vendors, and it has |
+-become the de facto standard. JFIF is a minimal or "low end" representation. |
+-We recommend the use of TIFF/JPEG (TIFF revision 6.0 as modified by TIFF |
+-Technical Note #2) for "high end" applications that need to record a lot of |
+-additional data about an image. TIFF/JPEG is fairly new and not yet widely |
+-supported, unfortunately. |
+ |
+-The upcoming JPEG Part 3 standard defines a file format called SPIFF. |
+-SPIFF is interoperable with JFIF, in the sense that most JFIF decoders should |
+-be able to read the most common variant of SPIFF. SPIFF has some technical |
+-advantages over JFIF, but its major claim to fame is simply that it is an |
+-official standard rather than an informal one. At this point it is unclear |
+-whether SPIFF will supersede JFIF or whether JFIF will remain the de-facto |
+-standard. IJG intends to support SPIFF once the standard is frozen, but we |
+-have not decided whether it should become our default output format or not. |
+-(In any case, our decoder will remain capable of reading JFIF indefinitely.) |
+- |
+-Various proprietary file formats incorporating JPEG compression also exist. |
+-We have little or no sympathy for the existence of these formats. Indeed, |
+-one of the original reasons for developing this free software was to help |
+-force convergence on common, open format standards for JPEG files. Don't |
+-use a proprietary file format! |
+- |
+- |
+ TO DO |
+ ===== |
+ |
+-The major thrust for v7 will probably be improvement of visual quality. |
+-The current method for scaling the quantization tables is known not to be |
+-very good at low Q values. We also intend to investigate block boundary |
+-smoothing, "poor man's variable quantization", and other means of improving |
+-quality-vs-file-size performance without sacrificing compatibility. |
+- |
+-In future versions, we are considering supporting some of the upcoming JPEG |
+-Part 3 extensions --- principally, variable quantization and the SPIFF file |
+-format. |
+- |
+-As always, speeding things up is of great interest. |
+- |
+-Please send bug reports, offers of help, etc. to jpeg-info@uunet.uu.net. |
++Please send bug reports, offers of help, etc. to jpeg-info@jpegclub.org. |
+Index: bmp.c |
+=================================================================== |
+--- bmp.c (revision 829) |
++++ bmp.c (working copy) |
+@@ -1,370 +1,274 @@ |
+-/* Copyright (C)2004 Landmark Graphics Corporation |
+- * Copyright (C)2005 Sun Microsystems, Inc. |
++/* |
++ * Copyright (C)2011 D. R. Commander. All Rights Reserved. |
+ * |
+- * This library is free software and may be redistributed and/or modified under |
+- * the terms of the wxWindows Library License, Version 3.1 or (at your option) |
+- * any later version. The full license is in the LICENSE.txt file included |
+- * with this distribution. |
++ * Redistribution and use in source and binary forms, with or without |
++ * modification, are permitted provided that the following conditions are met: |
+ * |
+- * This library is distributed in the hope that it will be useful, |
+- * but WITHOUT ANY WARRANTY; without even the implied warranty of |
+- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
+- * wxWindows Library License for more details. |
+-*/ |
++ * - Redistributions of source code must retain the above copyright notice, |
++ * this list of conditions and the following disclaimer. |
++ * - Redistributions in binary form must reproduce the above copyright notice, |
++ * this list of conditions and the following disclaimer in the documentation |
++ * and/or other materials provided with the distribution. |
++ * - Neither the name of the libjpeg-turbo Project nor the names of its |
++ * contributors may be used to endorse or promote products derived from this |
++ * software without specific prior written permission. |
++ * |
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", |
++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE |
++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
++ * POSSIBILITY OF SUCH DAMAGE. |
++ */ |
+ |
+-#include <fcntl.h> |
+-#include <sys/types.h> |
+-#include <sys/stat.h> |
+-#include <errno.h> |
+-#include <stdlib.h> |
+ #include <stdio.h> |
+ #include <string.h> |
+-#ifdef _WIN32 |
+- #include <io.h> |
+-#else |
+- #include <unistd.h> |
+-#endif |
+-#include "./rrutil.h" |
+-#include "./bmp.h" |
++#include <setjmp.h> |
++#include <errno.h> |
++#include "cdjpeg.h" |
++#include <jpeglib.h> |
++#include <jpegint.h> |
++#include "tjutil.h" |
++#include "bmp.h" |
+ |
+-#ifndef BI_BITFIELDS |
+-#define BI_BITFIELDS 3L |
+-#endif |
+-#ifndef BI_RGB |
+-#define BI_RGB 0L |
+-#endif |
+ |
+-#define BMPHDRSIZE 54 |
+-typedef struct _bmphdr |
+-{ |
+- unsigned short bfType; |
+- unsigned int bfSize; |
+- unsigned short bfReserved1, bfReserved2; |
+- unsigned int bfOffBits; |
++/* This duplicates the functionality of the VirtualGL bitmap library using |
++ the components from cjpeg and djpeg */ |
+ |
+- unsigned int biSize; |
+- int biWidth, biHeight; |
+- unsigned short biPlanes, biBitCount; |
+- unsigned int biCompression, biSizeImage; |
+- int biXPelsPerMeter, biYPelsPerMeter; |
+- unsigned int biClrUsed, biClrImportant; |
+-} bmphdr; |
+ |
+-static const char *__bmperr="No error"; |
++/* Error handling (based on example in example.c) */ |
+ |
+-static const int ps[BMPPIXELFORMATS]={3, 4, 3, 4, 4, 4}; |
+-static const int roffset[BMPPIXELFORMATS]={0, 0, 2, 2, 3, 1}; |
+-static const int goffset[BMPPIXELFORMATS]={1, 1, 1, 1, 2, 2}; |
+-static const int boffset[BMPPIXELFORMATS]={2, 2, 0, 0, 1, 3}; |
++static char errStr[JMSG_LENGTH_MAX]="No error"; |
+ |
+-#define _throw(m) {__bmperr=m; retcode=-1; goto finally;} |
+-#define _unix(f) {if((f)==-1) _throw(strerror(errno));} |
+-#define _catch(f) {if((f)==-1) {retcode=-1; goto finally;}} |
++struct my_error_mgr |
++{ |
++ struct jpeg_error_mgr pub; |
++ jmp_buf setjmp_buffer; |
++}; |
++typedef struct my_error_mgr *my_error_ptr; |
+ |
+-#define readme(fd, addr, size) \ |
+- if((bytesread=read(fd, addr, (size)))==-1) _throw(strerror(errno)); \ |
+- if(bytesread!=(size)) _throw("Read error"); |
+- |
+-void pixelconvert(unsigned char *srcbuf, enum BMPPIXELFORMAT srcformat, |
+- int srcpitch, unsigned char *dstbuf, enum BMPPIXELFORMAT dstformat, int dstpitch, |
+- int w, int h, int flip) |
++static void my_error_exit(j_common_ptr cinfo) |
+ { |
+- unsigned char *srcptr, *srcptr0, *dstptr, *dstptr0; |
+- int i, j; |
+- |
+- srcptr=flip? &srcbuf[srcpitch*(h-1)]:srcbuf; |
+- for(j=0, dstptr=dstbuf; j<h; j++, |
+- srcptr+=flip? -srcpitch:srcpitch, dstptr+=dstpitch) |
+- { |
+- for(i=0, srcptr0=srcptr, dstptr0=dstptr; i<w; i++, |
+- srcptr0+=ps[srcformat], dstptr0+=ps[dstformat]) |
+- { |
+- dstptr0[roffset[dstformat]]=srcptr0[roffset[srcformat]]; |
+- dstptr0[goffset[dstformat]]=srcptr0[goffset[srcformat]]; |
+- dstptr0[boffset[dstformat]]=srcptr0[boffset[srcformat]]; |
+- } |
+- } |
++ my_error_ptr myerr=(my_error_ptr)cinfo->err; |
++ (*cinfo->err->output_message)(cinfo); |
++ longjmp(myerr->setjmp_buffer, 1); |
+ } |
+ |
+-int loadppm(int *fd, unsigned char **buf, int *w, int *h, |
+- enum BMPPIXELFORMAT f, int align, int dstbottomup, int ascii) |
++/* Based on output_message() in jerror.c */ |
++ |
++static void my_output_message(j_common_ptr cinfo) |
+ { |
+- FILE *fs=NULL; int retcode=0, scalefactor, dstpitch; |
+- unsigned char *tempbuf=NULL; char temps[255], temps2[255]; |
+- int numread=0, totalread=0, pixel[3], i, j; |
++ (*cinfo->err->format_message)(cinfo, errStr); |
++} |
+ |
+- if((fs=fdopen(*fd, "r"))==NULL) _throw(strerror(errno)); |
++#define _throw(m) {snprintf(errStr, JMSG_LENGTH_MAX, "%s", m); \ |
++ retval=-1; goto bailout;} |
++#define _throwunix(m) {snprintf(errStr, JMSG_LENGTH_MAX, "%s\n%s", m, \ |
++ strerror(errno)); retval=-1; goto bailout;} |
+ |
+- do |
+- { |
+- if(!fgets(temps, 255, fs)) _throw("Read error"); |
+- if(strlen(temps)==0 || temps[0]=='\n') continue; |
+- if(sscanf(temps, "%s", temps2)==1 && temps2[1]=='#') continue; |
+- switch(totalread) |
+- { |
+- case 0: |
+- if((numread=sscanf(temps, "%d %d %d", w, h, &scalefactor))==EOF) |
+- _throw("Read error"); |
+- break; |
+- case 1: |
+- if((numread=sscanf(temps, "%d %d", h, &scalefactor))==EOF) |
+- _throw("Read error"); |
+- break; |
+- case 2: |
+- if((numread=sscanf(temps, "%d", &scalefactor))==EOF) |
+- _throw("Read error"); |
+- break; |
+- } |
+- totalread+=numread; |
+- } while(totalread<3); |
+- if((*w)<1 || (*h)<1 || scalefactor<1) _throw("Corrupt PPM header"); |
+ |
+- dstpitch=(((*w)*ps[f])+(align-1))&(~(align-1)); |
+- if((*buf=(unsigned char *)malloc(dstpitch*(*h)))==NULL) |
+- _throw("Memory allocation error"); |
+- if(ascii) |
++static void pixelconvert(unsigned char *srcbuf, int srcpf, int srcbottomup, |
++ unsigned char *dstbuf, int dstpf, int dstbottomup, int w, int h) |
++{ |
++ unsigned char *srcptr=srcbuf, *srcptr2; |
++ int srcps=tjPixelSize[srcpf]; |
++ int srcstride=srcbottomup? -w*srcps:w*srcps; |
++ unsigned char *dstptr=dstbuf, *dstptr2; |
++ int dstps=tjPixelSize[dstpf]; |
++ int dststride=dstbottomup? -w*dstps:w*dstps; |
++ int row, col; |
++ |
++ if(srcbottomup) srcptr=&srcbuf[w*srcps*(h-1)]; |
++ if(dstbottomup) dstptr=&dstbuf[w*dstps*(h-1)]; |
++ for(row=0; row<h; row++, srcptr+=srcstride, dstptr+=dststride) |
+ { |
+- for(j=0; j<*h; j++) |
++ for(col=0, srcptr2=srcptr, dstptr2=dstptr; col<w; col++, srcptr2+=srcps, |
++ dstptr2+=dstps) |
+ { |
+- for(i=0; i<*w; i++) |
+- { |
+- if(fscanf(fs, "%d%d%d", &pixel[0], &pixel[1], &pixel[2])!=3) |
+- _throw("Read error"); |
+- (*buf)[j*dstpitch+i*ps[f]+roffset[f]]=(unsigned char)(pixel[0]*255/scalefactor); |
+- (*buf)[j*dstpitch+i*ps[f]+goffset[f]]=(unsigned char)(pixel[1]*255/scalefactor); |
+- (*buf)[j*dstpitch+i*ps[f]+boffset[f]]=(unsigned char)(pixel[2]*255/scalefactor); |
+- } |
++ dstptr2[tjRedOffset[dstpf]]=srcptr2[tjRedOffset[srcpf]]; |
++ dstptr2[tjGreenOffset[dstpf]]=srcptr2[tjGreenOffset[srcpf]]; |
++ dstptr2[tjBlueOffset[dstpf]]=srcptr2[tjBlueOffset[srcpf]]; |
+ } |
+ } |
+- else |
+- { |
+- if(scalefactor!=255) |
+- _throw("Binary PPMs must have 8-bit components"); |
+- if((tempbuf=(unsigned char *)malloc((*w)*(*h)*3))==NULL) |
+- _throw("Memory allocation error"); |
+- if(fread(tempbuf, (*w)*(*h)*3, 1, fs)!=1) _throw("Read error"); |
+- pixelconvert(tempbuf, BMP_RGB, (*w)*3, *buf, f, dstpitch, *w, *h, dstbottomup); |
+- } |
+- |
+- finally: |
+- if(fs) {fclose(fs); *fd=-1;} |
+- if(tempbuf) free(tempbuf); |
+- return retcode; |
+ } |
+ |
+ |
+ int loadbmp(char *filename, unsigned char **buf, int *w, int *h, |
+- enum BMPPIXELFORMAT f, int align, int dstbottomup) |
++ int dstpf, int bottomup) |
+ { |
+- int fd=-1, bytesread, srcpitch, srcbottomup=1, srcps, dstpitch, |
+- retcode=0; |
+- unsigned char *tempbuf=NULL; |
+- bmphdr bh; int flags=O_RDONLY; |
++ int retval=0, dstps, srcpf, tempc; |
++ struct jpeg_compress_struct cinfo; |
++ struct my_error_mgr jerr; |
++ cjpeg_source_ptr src; |
++ FILE *file=NULL; |
+ |
+- dstbottomup=dstbottomup? 1:0; |
+- #ifdef _WIN32 |
+- flags|=O_BINARY; |
+- #endif |
+- if(!filename || !buf || !w || !h || f<0 || f>BMPPIXELFORMATS-1 || align<1) |
+- _throw("invalid argument to loadbmp()"); |
+- if((align&(align-1))!=0) |
+- _throw("Alignment must be a power of 2"); |
+- _unix(fd=open(filename, flags)); |
++ memset(&cinfo, 0, sizeof(struct jpeg_compress_struct)); |
+ |
+- readme(fd, &bh.bfType, sizeof(unsigned short)); |
+- if(!littleendian()) bh.bfType=byteswap16(bh.bfType); |
++ if(!filename || !buf || !w || !h || dstpf<0 || dstpf>=TJ_NUMPF) |
++ _throw("loadbmp(): Invalid argument"); |
+ |
+- if(bh.bfType==0x3650) |
++ if((file=fopen(filename, "rb"))==NULL) |
++ _throwunix("loadbmp(): Cannot open input file"); |
++ |
++ cinfo.err=jpeg_std_error(&jerr.pub); |
++ jerr.pub.error_exit=my_error_exit; |
++ jerr.pub.output_message=my_output_message; |
++ |
++ if(setjmp(jerr.setjmp_buffer)) |
+ { |
+- _catch(loadppm(&fd, buf, w, h, f, align, dstbottomup, 0)); |
+- goto finally; |
++ /* If we get here, the JPEG code has signaled an error. */ |
++ retval=-1; goto bailout; |
+ } |
+- if(bh.bfType==0x3350) |
+- { |
+- _catch(loadppm(&fd, buf, w, h, f, align, dstbottomup, 1)); |
+- goto finally; |
+- } |
+ |
+- readme(fd, &bh.bfSize, sizeof(unsigned int)); |
+- readme(fd, &bh.bfReserved1, sizeof(unsigned short)); |
+- readme(fd, &bh.bfReserved2, sizeof(unsigned short)); |
+- readme(fd, &bh.bfOffBits, sizeof(unsigned int)); |
+- readme(fd, &bh.biSize, sizeof(unsigned int)); |
+- readme(fd, &bh.biWidth, sizeof(int)); |
+- readme(fd, &bh.biHeight, sizeof(int)); |
+- readme(fd, &bh.biPlanes, sizeof(unsigned short)); |
+- readme(fd, &bh.biBitCount, sizeof(unsigned short)); |
+- readme(fd, &bh.biCompression, sizeof(unsigned int)); |
+- readme(fd, &bh.biSizeImage, sizeof(unsigned int)); |
+- readme(fd, &bh.biXPelsPerMeter, sizeof(int)); |
+- readme(fd, &bh.biYPelsPerMeter, sizeof(int)); |
+- readme(fd, &bh.biClrUsed, sizeof(unsigned int)); |
+- readme(fd, &bh.biClrImportant, sizeof(unsigned int)); |
++ jpeg_create_compress(&cinfo); |
++ if((tempc=getc(file))<0 || ungetc(tempc, file)==EOF) |
++ _throwunix("loadbmp(): Could not read input file") |
++ else if(tempc==EOF) _throw("loadbmp(): Input file contains no data"); |
+ |
+- if(!littleendian()) |
++ if(tempc=='B') |
+ { |
+- bh.bfSize=byteswap(bh.bfSize); |
+- bh.bfOffBits=byteswap(bh.bfOffBits); |
+- bh.biSize=byteswap(bh.biSize); |
+- bh.biWidth=byteswap(bh.biWidth); |
+- bh.biHeight=byteswap(bh.biHeight); |
+- bh.biPlanes=byteswap16(bh.biPlanes); |
+- bh.biBitCount=byteswap16(bh.biBitCount); |
+- bh.biCompression=byteswap(bh.biCompression); |
+- bh.biSizeImage=byteswap(bh.biSizeImage); |
+- bh.biXPelsPerMeter=byteswap(bh.biXPelsPerMeter); |
+- bh.biYPelsPerMeter=byteswap(bh.biYPelsPerMeter); |
+- bh.biClrUsed=byteswap(bh.biClrUsed); |
+- bh.biClrImportant=byteswap(bh.biClrImportant); |
++ if((src=jinit_read_bmp(&cinfo))==NULL) |
++ _throw("loadbmp(): Could not initialize bitmap loader"); |
+ } |
++ else if(tempc=='P') |
++ { |
++ if((src=jinit_read_ppm(&cinfo))==NULL) |
++ _throw("loadbmp(): Could not initialize bitmap loader"); |
++ } |
++ else _throw("loadbmp(): Unsupported file type"); |
+ |
+- if(bh.bfType!=0x4d42 || bh.bfOffBits<BMPHDRSIZE |
+- || bh.biWidth<1 || bh.biHeight==0) |
+- _throw("Corrupt bitmap header"); |
+- if((bh.biBitCount!=24 && bh.biBitCount!=32) || bh.biCompression!=BI_RGB) |
+- _throw("Only uncompessed RGB bitmaps are supported"); |
++ src->input_file=file; |
++ (*src->start_input)(&cinfo, src); |
++ (*cinfo.mem->realize_virt_arrays)((j_common_ptr)&cinfo); |
+ |
+- *w=bh.biWidth; *h=bh.biHeight; srcps=bh.biBitCount/8; |
+- if(*h<0) {*h=-(*h); srcbottomup=0;} |
+- srcpitch=(((*w)*srcps)+3)&(~3); |
+- dstpitch=(((*w)*ps[f])+(align-1))&(~(align-1)); |
++ *w=cinfo.image_width; *h=cinfo.image_height; |
+ |
+- if(srcpitch*(*h)+bh.bfOffBits!=bh.bfSize) _throw("Corrupt bitmap header"); |
+- if((tempbuf=(unsigned char *)malloc(srcpitch*(*h)))==NULL |
+- || (*buf=(unsigned char *)malloc(dstpitch*(*h)))==NULL) |
+- _throw("Memory allocation error"); |
+- if(lseek(fd, (long)bh.bfOffBits, SEEK_SET)!=(long)bh.bfOffBits) |
+- _throw(strerror(errno)); |
+- _unix(bytesread=read(fd, tempbuf, srcpitch*(*h))); |
+- if(bytesread!=srcpitch*(*h)) _throw("Read error"); |
++ if(cinfo.input_components==1 && cinfo.in_color_space==JCS_RGB) |
++ srcpf=TJPF_GRAY; |
++ else srcpf=TJPF_RGB; |
+ |
+- pixelconvert(tempbuf, BMP_BGR, srcpitch, *buf, f, dstpitch, *w, *h, |
+- srcbottomup!=dstbottomup); |
++ dstps=tjPixelSize[dstpf]; |
++ if((*buf=(unsigned char *)malloc((*w)*(*h)*dstps))==NULL) |
++ _throw("loadbmp(): Memory allocation failure"); |
+ |
+- finally: |
+- if(tempbuf) free(tempbuf); |
+- if(fd!=-1) close(fd); |
+- return retcode; |
++ while(cinfo.next_scanline<cinfo.image_height) |
++ { |
++ int i, nlines=(*src->get_pixel_rows)(&cinfo, src); |
++ for(i=0; i<nlines; i++) |
++ { |
++ unsigned char *outbuf; int row; |
++ row=cinfo.next_scanline+i; |
++ if(bottomup) outbuf=&(*buf)[((*h)-row-1)*(*w)*dstps]; |
++ else outbuf=&(*buf)[row*(*w)*dstps]; |
++ pixelconvert(src->buffer[i], srcpf, 0, outbuf, dstpf, bottomup, *w, |
++ nlines); |
++ } |
++ cinfo.next_scanline+=nlines; |
++ } |
++ |
++ (*src->finish_input)(&cinfo, src); |
++ |
++ bailout: |
++ jpeg_destroy_compress(&cinfo); |
++ if(file) fclose(file); |
++ if(retval<0 && buf && *buf) {free(*buf); *buf=NULL;} |
++ return retval; |
+ } |
+ |
+-#define writeme(fd, addr, size) \ |
+- if((byteswritten=write(fd, addr, (size)))==-1) _throw(strerror(errno)); \ |
+- if(byteswritten!=(size)) _throw("Write error"); |
+ |
+-int saveppm(char *filename, unsigned char *buf, int w, int h, |
+- enum BMPPIXELFORMAT f, int srcpitch, int srcbottomup) |
++int savebmp(char *filename, unsigned char *buf, int w, int h, int srcpf, |
++ int bottomup) |
+ { |
+- FILE *fs=NULL; int retcode=0; |
+- unsigned char *tempbuf=NULL; |
++ int retval=0, srcps, dstpf; |
++ struct jpeg_decompress_struct dinfo; |
++ struct my_error_mgr jerr; |
++ djpeg_dest_ptr dst; |
++ FILE *file=NULL; |
++ char *ptr=NULL; |
+ |
+- if((fs=fopen(filename, "wb"))==NULL) _throw(strerror(errno)); |
+- if(fprintf(fs, "P6\n")<1) _throw("Write error"); |
+- if(fprintf(fs, "%d %d\n", w, h)<1) _throw("Write error"); |
+- if(fprintf(fs, "255\n")<1) _throw("Write error"); |
++ memset(&dinfo, 0, sizeof(struct jpeg_decompress_struct)); |
+ |
+- if((tempbuf=(unsigned char *)malloc(w*h*3))==NULL) |
+- _throw("Memory allocation error"); |
++ if(!filename || !buf || w<1 || h<1 || srcpf<0 || srcpf>=TJ_NUMPF) |
++ _throw("savebmp(): Invalid argument"); |
+ |
+- pixelconvert(buf, f, srcpitch, tempbuf, BMP_RGB, w*3, w, h, |
+- srcbottomup); |
++ if((file=fopen(filename, "wb"))==NULL) |
++ _throwunix("savebmp(): Cannot open output file"); |
+ |
+- if((fwrite(tempbuf, w*h*3, 1, fs))!=1) _throw("Write error"); |
++ dinfo.err=jpeg_std_error(&jerr.pub); |
++ jerr.pub.error_exit=my_error_exit; |
++ jerr.pub.output_message=my_output_message; |
+ |
+- finally: |
+- if(tempbuf) free(tempbuf); |
+- if(fs) fclose(fs); |
+- return retcode; |
+-} |
++ if(setjmp(jerr.setjmp_buffer)) |
++ { |
++ /* If we get here, the JPEG code has signaled an error. */ |
++ retval=-1; goto bailout; |
++ } |
+ |
+-int savebmp(char *filename, unsigned char *buf, int w, int h, |
+- enum BMPPIXELFORMAT f, int srcpitch, int srcbottomup) |
+-{ |
+- int fd=-1, byteswritten, dstpitch, retcode=0; |
+- int flags=O_RDWR|O_CREAT|O_TRUNC; |
+- unsigned char *tempbuf=NULL; char *temp; |
+- bmphdr bh; int mode; |
++ jpeg_create_decompress(&dinfo); |
++ if(srcpf==TJPF_GRAY) |
++ { |
++ dinfo.out_color_components=dinfo.output_components=1; |
++ dinfo.out_color_space=JCS_GRAYSCALE; |
++ } |
++ else |
++ { |
++ dinfo.out_color_components=dinfo.output_components=3; |
++ dinfo.out_color_space=JCS_RGB; |
++ } |
++ dinfo.image_width=w; dinfo.image_height=h; |
++ dinfo.global_state=DSTATE_READY; |
++ dinfo.scale_num=dinfo.scale_denom=1; |
+ |
+- #ifdef _WIN32 |
+- flags|=O_BINARY; mode=_S_IREAD|_S_IWRITE; |
+- #else |
+- mode=S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH; |
+- #endif |
+- if(!filename || !buf || w<1 || h<1 || f<0 || f>BMPPIXELFORMATS-1 || srcpitch<0) |
+- _throw("bad argument to savebmp()"); |
+- |
+- if(srcpitch==0) srcpitch=w*ps[f]; |
+- |
+- if((temp=strrchr(filename, '.'))!=NULL) |
++ ptr=strrchr(filename, '.'); |
++ if(ptr && !strcasecmp(ptr, ".bmp")) |
+ { |
+- if(!stricmp(temp, ".ppm")) |
+- return saveppm(filename, buf, w, h, f, srcpitch, srcbottomup); |
++ if((dst=jinit_write_bmp(&dinfo, 0))==NULL) |
++ _throw("savebmp(): Could not initialize bitmap writer"); |
+ } |
++ else |
++ { |
++ if((dst=jinit_write_ppm(&dinfo))==NULL) |
++ _throw("savebmp(): Could not initialize PPM writer"); |
++ } |
+ |
+- _unix(fd=open(filename, flags, mode)); |
+- dstpitch=((w*3)+3)&(~3); |
++ dst->output_file=file; |
++ (*dst->start_output)(&dinfo, dst); |
++ (*dinfo.mem->realize_virt_arrays)((j_common_ptr)&dinfo); |
+ |
+- bh.bfType=0x4d42; |
+- bh.bfSize=BMPHDRSIZE+dstpitch*h; |
+- bh.bfReserved1=0; bh.bfReserved2=0; |
+- bh.bfOffBits=BMPHDRSIZE; |
+- bh.biSize=40; |
+- bh.biWidth=w; bh.biHeight=h; |
+- bh.biPlanes=0; bh.biBitCount=24; |
+- bh.biCompression=BI_RGB; bh.biSizeImage=0; |
+- bh.biXPelsPerMeter=0; bh.biYPelsPerMeter=0; |
+- bh.biClrUsed=0; bh.biClrImportant=0; |
++ if(srcpf==TJPF_GRAY) dstpf=srcpf; |
++ else dstpf=TJPF_RGB; |
++ srcps=tjPixelSize[srcpf]; |
+ |
+- if(!littleendian()) |
++ while(dinfo.output_scanline<dinfo.output_height) |
+ { |
+- bh.bfType=byteswap16(bh.bfType); |
+- bh.bfSize=byteswap(bh.bfSize); |
+- bh.bfOffBits=byteswap(bh.bfOffBits); |
+- bh.biSize=byteswap(bh.biSize); |
+- bh.biWidth=byteswap(bh.biWidth); |
+- bh.biHeight=byteswap(bh.biHeight); |
+- bh.biPlanes=byteswap16(bh.biPlanes); |
+- bh.biBitCount=byteswap16(bh.biBitCount); |
+- bh.biCompression=byteswap(bh.biCompression); |
+- bh.biSizeImage=byteswap(bh.biSizeImage); |
+- bh.biXPelsPerMeter=byteswap(bh.biXPelsPerMeter); |
+- bh.biYPelsPerMeter=byteswap(bh.biYPelsPerMeter); |
+- bh.biClrUsed=byteswap(bh.biClrUsed); |
+- bh.biClrImportant=byteswap(bh.biClrImportant); |
++ int i, nlines=dst->buffer_height; |
++ for(i=0; i<nlines; i++) |
++ { |
++ unsigned char *inbuf; int row; |
++ row=dinfo.output_scanline+i; |
++ if(bottomup) inbuf=&buf[(h-row-1)*w*srcps]; |
++ else inbuf=&buf[row*w*srcps]; |
++ pixelconvert(inbuf, srcpf, bottomup, dst->buffer[i], dstpf, 0, w, |
++ nlines); |
++ } |
++ (*dst->put_pixel_rows)(&dinfo, dst, nlines); |
++ dinfo.output_scanline+=nlines; |
+ } |
+ |
+- writeme(fd, &bh.bfType, sizeof(unsigned short)); |
+- writeme(fd, &bh.bfSize, sizeof(unsigned int)); |
+- writeme(fd, &bh.bfReserved1, sizeof(unsigned short)); |
+- writeme(fd, &bh.bfReserved2, sizeof(unsigned short)); |
+- writeme(fd, &bh.bfOffBits, sizeof(unsigned int)); |
+- writeme(fd, &bh.biSize, sizeof(unsigned int)); |
+- writeme(fd, &bh.biWidth, sizeof(int)); |
+- writeme(fd, &bh.biHeight, sizeof(int)); |
+- writeme(fd, &bh.biPlanes, sizeof(unsigned short)); |
+- writeme(fd, &bh.biBitCount, sizeof(unsigned short)); |
+- writeme(fd, &bh.biCompression, sizeof(unsigned int)); |
+- writeme(fd, &bh.biSizeImage, sizeof(unsigned int)); |
+- writeme(fd, &bh.biXPelsPerMeter, sizeof(int)); |
+- writeme(fd, &bh.biYPelsPerMeter, sizeof(int)); |
+- writeme(fd, &bh.biClrUsed, sizeof(unsigned int)); |
+- writeme(fd, &bh.biClrImportant, sizeof(unsigned int)); |
++ (*dst->finish_output)(&dinfo, dst); |
+ |
+- if((tempbuf=(unsigned char *)malloc(dstpitch*h))==NULL) |
+- _throw("Memory allocation error"); |
+- |
+- pixelconvert(buf, f, srcpitch, tempbuf, BMP_BGR, dstpitch, w, h, |
+- !srcbottomup); |
+- |
+- if((byteswritten=write(fd, tempbuf, dstpitch*h))!=dstpitch*h) |
+- _throw(strerror(errno)); |
+- |
+- finally: |
+- if(tempbuf) free(tempbuf); |
+- if(fd!=-1) close(fd); |
+- return retcode; |
++ bailout: |
++ jpeg_destroy_decompress(&dinfo); |
++ if(file) fclose(file); |
++ return retval; |
+ } |
+ |
+ const char *bmpgeterr(void) |
+ { |
+- return __bmperr; |
++ return errStr; |
+ } |
+Index: bmp.h |
+=================================================================== |
+--- bmp.h (revision 829) |
++++ bmp.h (working copy) |
+@@ -1,48 +1,42 @@ |
+-/* Copyright (C)2004 Landmark Graphics Corporation |
+- * Copyright (C)2005 Sun Microsystems, Inc. |
++/* |
++ * Copyright (C)2011 D. R. Commander. All Rights Reserved. |
+ * |
+- * This library is free software and may be redistributed and/or modified under |
+- * the terms of the wxWindows Library License, Version 3.1 or (at your option) |
+- * any later version. The full license is in the LICENSE.txt file included |
+- * with this distribution. |
++ * Redistribution and use in source and binary forms, with or without |
++ * modification, are permitted provided that the following conditions are met: |
+ * |
+- * This library is distributed in the hope that it will be useful, |
+- * but WITHOUT ANY WARRANTY; without even the implied warranty of |
+- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
+- * wxWindows Library License for more details. |
+-*/ |
++ * - Redistributions of source code must retain the above copyright notice, |
++ * this list of conditions and the following disclaimer. |
++ * - Redistributions in binary form must reproduce the above copyright notice, |
++ * this list of conditions and the following disclaimer in the documentation |
++ * and/or other materials provided with the distribution. |
++ * - Neither the name of the libjpeg-turbo Project nor the names of its |
++ * contributors may be used to endorse or promote products derived from this |
++ * software without specific prior written permission. |
++ * |
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", |
++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE |
++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
++ * POSSIBILITY OF SUCH DAMAGE. |
++ */ |
+ |
+-// This provides rudimentary facilities for loading and saving true color |
+-// BMP and PPM files |
+- |
+ #ifndef __BMP_H__ |
+ #define __BMP_H__ |
+ |
+-#define BMPPIXELFORMATS 6 |
+-enum BMPPIXELFORMAT {BMP_RGB=0, BMP_RGBA, BMP_BGR, BMP_BGRA, BMP_ABGR, BMP_ARGB}; |
++#include "./turbojpeg.h" |
+ |
+-#ifdef __cplusplus |
+-extern "C" { |
+-#endif |
++int loadbmp(char *filename, unsigned char **buf, int *w, int *h, int pf, |
++ int bottomup); |
+ |
+-// This will load a Windows bitmap from a file and return a buffer with the |
+-// specified pixel format, scanline alignment, and orientation. The width and |
+-// height are returned in w and h. |
++int savebmp(char *filename, unsigned char *buf, int w, int h, int pf, |
++ int bottomup); |
+ |
+-int loadbmp(char *filename, unsigned char **buf, int *w, int *h, |
+- enum BMPPIXELFORMAT f, int align, int dstbottomup); |
+- |
+-// This will save a buffer with the specified pixel format, pitch, orientation, |
+-// width, and height as a 24-bit Windows bitmap or PPM (the filename determines |
+-// which format to use) |
+- |
+-int savebmp(char *filename, unsigned char *buf, int w, int h, |
+- enum BMPPIXELFORMAT f, int srcpitch, int srcbottomup); |
+- |
+ const char *bmpgeterr(void); |
+ |
+-#ifdef __cplusplus |
+-} |
+ #endif |
+- |
+-#endif |
+Index: cderror.h |
+=================================================================== |
+--- cderror.h (revision 829) |
++++ cderror.h (working copy) |
+@@ -2,6 +2,7 @@ |
+ * cderror.h |
+ * |
+ * Copyright (C) 1994-1997, Thomas G. Lane. |
++ * Modified 2009 by Guido Vollbeding. |
+ * This file is part of the Independent JPEG Group's software. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+@@ -45,6 +46,7 @@ |
+ JMESSAGE(JERR_BMP_BADPLANES, "Invalid BMP file: biPlanes not equal to 1") |
+ JMESSAGE(JERR_BMP_COLORSPACE, "BMP output must be grayscale or RGB") |
+ JMESSAGE(JERR_BMP_COMPRESSED, "Sorry, compressed BMPs not yet supported") |
++JMESSAGE(JERR_BMP_EMPTY, "Empty BMP image") |
+ JMESSAGE(JERR_BMP_NOT, "Not a BMP file - does not start with BM") |
+ JMESSAGE(JTRC_BMP, "%ux%u 24-bit BMP image") |
+ JMESSAGE(JTRC_BMP_MAPPED, "%ux%u 8-bit colormapped BMP image") |
+Index: cdjpeg.h |
+=================================================================== |
+--- cdjpeg.h (revision 829) |
++++ cdjpeg.h (working copy) |
+@@ -104,6 +104,7 @@ |
+ #define jinit_write_targa jIWrTarga |
+ #define read_quant_tables RdQTables |
+ #define read_scan_script RdScnScript |
++#define set_quality_ratings SetQRates |
+ #define set_quant_slots SetQSlots |
+ #define set_sample_factors SetSFacts |
+ #define read_color_map RdCMap |
+@@ -131,8 +132,10 @@ |
+ /* cjpeg support routines (in rdswitch.c) */ |
+ |
+ EXTERN(boolean) read_quant_tables JPP((j_compress_ptr cinfo, char * filename, |
+- int scale_factor, boolean force_baseline)); |
++ boolean force_baseline)); |
+ EXTERN(boolean) read_scan_script JPP((j_compress_ptr cinfo, char * filename)); |
++EXTERN(boolean) set_quality_ratings JPP((j_compress_ptr cinfo, char *arg, |
++ boolean force_baseline)); |
+ EXTERN(boolean) set_quant_slots JPP((j_compress_ptr cinfo, char *arg)); |
+ EXTERN(boolean) set_sample_factors JPP((j_compress_ptr cinfo, char *arg)); |
+ |
+Index: cjpeg.c |
+=================================================================== |
+--- cjpeg.c (revision 829) |
++++ cjpeg.c (working copy) |
+@@ -1,8 +1,11 @@ |
+ /* |
+ * cjpeg.c |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1991-1998, Thomas G. Lane. |
+- * This file is part of the Independent JPEG Group's software. |
++ * Modified 2003-2011 by Guido Vollbeding. |
++ * libjpeg-turbo Modifications: |
++ * Copyright (C) 2010, 2013, D. R. Commander. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains a command-line user interface for the JPEG compressor. |
+@@ -25,6 +28,7 @@ |
+ |
+ #include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ |
+ #include "jversion.h" /* for version message */ |
++#include "config.h" |
+ |
+ #ifdef USE_CCOMMAND /* command-line reader for Macintosh */ |
+ #ifdef __MWERKS__ |
+@@ -135,6 +139,7 @@ |
+ |
+ static const char * progname; /* program name for error messages */ |
+ static char * outfilename; /* for -outfile switch */ |
++boolean memdst; /* for -memdst switch */ |
+ |
+ |
+ LOCAL(void) |
+@@ -149,8 +154,9 @@ |
+ #endif |
+ |
+ fprintf(stderr, "Switches (names may be abbreviated):\n"); |
+- fprintf(stderr, " -quality N Compression quality (0..100; 5-95 is useful range)\n"); |
++ fprintf(stderr, " -quality N[,...] Compression quality (0..100; 5-95 is useful range)\n"); |
+ fprintf(stderr, " -grayscale Create monochrome JPEG file\n"); |
++ fprintf(stderr, " -rgb Create RGB JPEG file\n"); |
+ #ifdef ENTROPY_OPT_SUPPORTED |
+ fprintf(stderr, " -optimize Optimize Huffman table (smaller file, but slow compression)\n"); |
+ #endif |
+@@ -161,6 +167,9 @@ |
+ fprintf(stderr, " -targa Input file is Targa format (usually not needed)\n"); |
+ #endif |
+ fprintf(stderr, "Switches for advanced users:\n"); |
++#ifdef C_ARITH_CODING_SUPPORTED |
++ fprintf(stderr, " -arithmetic Use arithmetic coding\n"); |
++#endif |
+ #ifdef DCT_ISLOW_SUPPORTED |
+ fprintf(stderr, " -dct int Use integer DCT method%s\n", |
+ (JDCT_DEFAULT == JDCT_ISLOW ? " (default)" : "")); |
+@@ -179,11 +188,11 @@ |
+ #endif |
+ fprintf(stderr, " -maxmemory N Maximum memory to use (in kbytes)\n"); |
+ fprintf(stderr, " -outfile name Specify name for output file\n"); |
++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) |
++ fprintf(stderr, " -memdst Compress to memory instead of file (useful for benchmarking)\n"); |
++#endif |
+ fprintf(stderr, " -verbose or -debug Emit debug output\n"); |
+ fprintf(stderr, "Switches for wizards:\n"); |
+-#ifdef C_ARITH_CODING_SUPPORTED |
+- fprintf(stderr, " -arithmetic Use arithmetic coding\n"); |
+-#endif |
+ fprintf(stderr, " -baseline Force baseline quantization tables\n"); |
+ fprintf(stderr, " -qtables file Use quantization tables given in file\n"); |
+ fprintf(stderr, " -qslots N[,...] Set component quantization tables\n"); |
+@@ -209,10 +218,9 @@ |
+ { |
+ int argn; |
+ char * arg; |
+- int quality; /* -quality parameter */ |
+- int q_scale_factor; /* scaling percentage for -qtables */ |
+ boolean force_baseline; |
+ boolean simple_progressive; |
++ char * qualityarg = NULL; /* saves -quality parm if any */ |
+ char * qtablefile = NULL; /* saves -qtables filename if any */ |
+ char * qslotsarg = NULL; /* saves -qslots parm if any */ |
+ char * samplearg = NULL; /* saves -sample parm if any */ |
+@@ -219,15 +227,12 @@ |
+ char * scansarg = NULL; /* saves -scans parm if any */ |
+ |
+ /* Set up default JPEG parameters. */ |
+- /* Note that default -quality level need not, and does not, |
+- * match the default scaling for an explicit -qtables argument. |
+- */ |
+- quality = 75; /* default -quality value */ |
+- q_scale_factor = 100; /* default to no scaling for -qtables */ |
++ |
+ force_baseline = FALSE; /* by default, allow 16-bit quantizers */ |
+ simple_progressive = FALSE; |
+ is_targa = FALSE; |
+ outfilename = NULL; |
++ memdst = FALSE; |
+ cinfo->err->trace_level = 0; |
+ |
+ /* Scan command line options, adjust parameters */ |
+@@ -277,8 +282,11 @@ |
+ static boolean printed_version = FALSE; |
+ |
+ if (! printed_version) { |
+- fprintf(stderr, "Independent JPEG Group's CJPEG, version %s\n%s\n", |
+- JVERSION, JCOPYRIGHT); |
++ fprintf(stderr, "%s version %s (build %s)\n", |
++ PACKAGE_NAME, VERSION, BUILD); |
++ fprintf(stderr, "%s\n\n", JCOPYRIGHT); |
++ fprintf(stderr, "Emulating The Independent JPEG Group's software, version %s\n\n", |
++ JVERSION); |
+ printed_version = TRUE; |
+ } |
+ cinfo->err->trace_level++; |
+@@ -287,6 +295,10 @@ |
+ /* Force a monochrome JPEG file to be generated. */ |
+ jpeg_set_colorspace(cinfo, JCS_GRAYSCALE); |
+ |
++ } else if (keymatch(arg, "rgb", 3)) { |
++ /* Force an RGB JPEG file to be generated. */ |
++ jpeg_set_colorspace(cinfo, JCS_RGB); |
++ |
+ } else if (keymatch(arg, "maxmemory", 3)) { |
+ /* Maximum memory in Kb (or Mb with 'm'). */ |
+ long lval; |
+@@ -305,7 +317,7 @@ |
+ #ifdef ENTROPY_OPT_SUPPORTED |
+ cinfo->optimize_coding = TRUE; |
+ #else |
+- fprintf(stderr, "%s: sorry, entropy optimization was not compiled\n", |
++ fprintf(stderr, "%s: sorry, entropy optimization was not compiled in\n", |
+ progname); |
+ exit(EXIT_FAILURE); |
+ #endif |
+@@ -322,19 +334,26 @@ |
+ simple_progressive = TRUE; |
+ /* We must postpone execution until num_components is known. */ |
+ #else |
+- fprintf(stderr, "%s: sorry, progressive output was not compiled\n", |
++ fprintf(stderr, "%s: sorry, progressive output was not compiled in\n", |
+ progname); |
+ exit(EXIT_FAILURE); |
+ #endif |
+ |
++ } else if (keymatch(arg, "memdst", 2)) { |
++ /* Use in-memory destination manager */ |
++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) |
++ memdst = TRUE; |
++#else |
++ fprintf(stderr, "%s: sorry, in-memory destination manager was not compiled in\n", |
++ progname); |
++ exit(EXIT_FAILURE); |
++#endif |
++ |
+ } else if (keymatch(arg, "quality", 1)) { |
+- /* Quality factor (quantization table scaling factor). */ |
++ /* Quality ratings (quantization table scaling factors). */ |
+ if (++argn >= argc) /* advance to next argument */ |
+ usage(); |
+- if (sscanf(argv[argn], "%d", &quality) != 1) |
+- usage(); |
+- /* Change scale factor in case -qtables is present. */ |
+- q_scale_factor = jpeg_quality_scaling(quality); |
++ qualityarg = argv[argn]; |
+ |
+ } else if (keymatch(arg, "qslots", 2)) { |
+ /* Quantization table slot numbers. */ |
+@@ -382,7 +401,7 @@ |
+ * default sampling factors. |
+ */ |
+ |
+- } else if (keymatch(arg, "scans", 2)) { |
++ } else if (keymatch(arg, "scans", 4)) { |
+ /* Set scan script. */ |
+ #ifdef C_MULTISCAN_FILES_SUPPORTED |
+ if (++argn >= argc) /* advance to next argument */ |
+@@ -390,7 +409,7 @@ |
+ scansarg = argv[argn]; |
+ /* We must postpone reading the file in case -progressive appears. */ |
+ #else |
+- fprintf(stderr, "%s: sorry, multi-scan output was not compiled\n", |
++ fprintf(stderr, "%s: sorry, multi-scan output was not compiled in\n", |
+ progname); |
+ exit(EXIT_FAILURE); |
+ #endif |
+@@ -422,11 +441,12 @@ |
+ |
+ /* Set quantization tables for selected quality. */ |
+ /* Some or all may be overridden if -qtables is present. */ |
+- jpeg_set_quality(cinfo, quality, force_baseline); |
++ if (qualityarg != NULL) /* process -quality if it was present */ |
++ if (! set_quality_ratings(cinfo, qualityarg, force_baseline)) |
++ usage(); |
+ |
+ if (qtablefile != NULL) /* process -qtables if it was present */ |
+- if (! read_quant_tables(cinfo, qtablefile, |
+- q_scale_factor, force_baseline)) |
++ if (! read_quant_tables(cinfo, qtablefile, force_baseline)) |
+ usage(); |
+ |
+ if (qslotsarg != NULL) /* process -qslots if it was present */ |
+@@ -468,7 +488,9 @@ |
+ int file_index; |
+ cjpeg_source_ptr src_mgr; |
+ FILE * input_file; |
+- FILE * output_file; |
++ FILE * output_file = NULL; |
++ unsigned char *outbuffer = NULL; |
++ unsigned long outsize = 0; |
+ JDIMENSION num_scanlines; |
+ |
+ /* On Mac, fetch a command line. */ |
+@@ -511,20 +533,22 @@ |
+ file_index = parse_switches(&cinfo, argc, argv, 0, FALSE); |
+ |
+ #ifdef TWO_FILE_COMMANDLINE |
+- /* Must have either -outfile switch or explicit output file name */ |
+- if (outfilename == NULL) { |
+- if (file_index != argc-2) { |
+- fprintf(stderr, "%s: must name one input and one output file\n", |
+- progname); |
+- usage(); |
++ if (!memdst) { |
++ /* Must have either -outfile switch or explicit output file name */ |
++ if (outfilename == NULL) { |
++ if (file_index != argc-2) { |
++ fprintf(stderr, "%s: must name one input and one output file\n", |
++ progname); |
++ usage(); |
++ } |
++ outfilename = argv[file_index+1]; |
++ } else { |
++ if (file_index != argc-1) { |
++ fprintf(stderr, "%s: must name one input and one output file\n", |
++ progname); |
++ usage(); |
++ } |
+ } |
+- outfilename = argv[file_index+1]; |
+- } else { |
+- if (file_index != argc-1) { |
+- fprintf(stderr, "%s: must name one input and one output file\n", |
+- progname); |
+- usage(); |
+- } |
+ } |
+ #else |
+ /* Unix style: expect zero or one file name */ |
+@@ -551,7 +575,7 @@ |
+ fprintf(stderr, "%s: can't open %s\n", progname, outfilename); |
+ exit(EXIT_FAILURE); |
+ } |
+- } else { |
++ } else if (!memdst) { |
+ /* default output file is stdout */ |
+ output_file = write_stdout(); |
} |
+@@ -574,7 +598,12 @@ |
+ file_index = parse_switches(&cinfo, argc, argv, 0, TRUE); |
+ |
+ /* Specify data destination for compression */ |
+- jpeg_stdio_dest(&cinfo, output_file); |
++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) |
++ if (memdst) |
++ jpeg_mem_dest(&cinfo, &outbuffer, &outsize); |
++ else |
++#endif |
++ jpeg_stdio_dest(&cinfo, output_file); |
+ |
+ /* Start compressor */ |
+ jpeg_start_compress(&cinfo, TRUE); |
+@@ -593,7 +622,7 @@ |
+ /* Close files, if we opened them */ |
+ if (input_file != stdin) |
+ fclose(input_file); |
+- if (output_file != stdout) |
++ if (output_file != stdout && output_file != NULL) |
+ fclose(output_file); |
+ |
+ #ifdef PROGRESS_REPORT |
+@@ -600,6 +629,12 @@ |
+ end_progress_monitor((j_common_ptr) &cinfo); |
+ #endif |
- if (cinfo->marker->discarded_bytes != 0) { |
-- WARNMS2(cinfo, JWRN_EXTRANEOUS_DATA, cinfo->marker->discarded_bytes, c); |
-+ TRACEMS2(cinfo, 1, JWRN_EXTRANEOUS_DATA, cinfo->marker->discarded_bytes, c); |
- cinfo->marker->discarded_bytes = 0; |
++ if (memdst) { |
++ fprintf(stderr, "Compressed size: %lu bytes\n", outsize); |
++ if (outbuffer != NULL) |
++ free(outbuffer); |
++ } |
++ |
+ /* All done. */ |
+ exit(jerr.num_warnings ? EXIT_WARNING : EXIT_SUCCESS); |
+ return 0; /* suppress no-return-value warnings */ |
+Index: djpeg.c |
+=================================================================== |
+--- djpeg.c (revision 829) |
++++ djpeg.c (working copy) |
+@@ -1,8 +1,11 @@ |
+ /* |
+ * djpeg.c |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1991-1997, Thomas G. Lane. |
+- * This file is part of the Independent JPEG Group's software. |
++ * libjpeg-turbo Modifications: |
++ * Copyright (C) 2010-2011, 2013-2015, D. R. Commander. |
++ * Copyright (C) 2015, Google, Inc. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains a command-line user interface for the JPEG decompressor. |
+@@ -25,6 +28,7 @@ |
+ |
+ #include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ |
+ #include "jversion.h" /* for version message */ |
++#include "config.h" |
+ |
+ #include <ctype.h> /* to declare isprint() */ |
+ |
+@@ -84,6 +88,10 @@ |
+ |
+ static const char * progname; /* program name for error messages */ |
+ static char * outfilename; /* for -outfile switch */ |
++boolean memsrc; /* for -memsrc switch */ |
++boolean strip, skip; |
++JDIMENSION startY, endY; |
++#define INPUT_BUF_SIZE 4096 |
+ |
+ |
+ LOCAL(void) |
+@@ -101,6 +109,7 @@ |
+ fprintf(stderr, " -colors N Reduce image to no more than N colors\n"); |
+ fprintf(stderr, " -fast Fast, low-quality processing\n"); |
+ fprintf(stderr, " -grayscale Force grayscale output\n"); |
++ fprintf(stderr, " -rgb Force RGB output\n"); |
+ #ifdef IDCT_SCALING_SUPPORTED |
+ fprintf(stderr, " -scale M/N Scale output image by fraction M/N, eg, 1/8\n"); |
+ #endif |
+@@ -153,6 +162,12 @@ |
+ #endif |
+ fprintf(stderr, " -maxmemory N Maximum memory to use (in kbytes)\n"); |
+ fprintf(stderr, " -outfile name Specify name for output file\n"); |
++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) |
++ fprintf(stderr, " -memsrc Load input file into memory before decompressing\n"); |
++#endif |
++ |
++ fprintf(stderr, " -skip Y0,Y1 Decode all rows except those between Y0 and Y1 (inclusive)\n"); |
++ fprintf(stderr, " -strip Y0,Y1 Decode only rows between Y0 and Y1 (inclusive)\n"); |
+ fprintf(stderr, " -verbose or -debug Emit debug output\n"); |
+ exit(EXIT_FAILURE); |
+ } |
+@@ -176,6 +191,9 @@ |
+ /* Set up default JPEG parameters. */ |
+ requested_fmt = DEFAULT_FMT; /* set default output file format */ |
+ outfilename = NULL; |
++ memsrc = FALSE; |
++ strip = FALSE; |
++ skip = FALSE; |
+ cinfo->err->trace_level = 0; |
+ |
+ /* Scan command line options, adjust parameters */ |
+@@ -240,8 +258,11 @@ |
+ static boolean printed_version = FALSE; |
+ |
+ if (! printed_version) { |
+- fprintf(stderr, "Independent JPEG Group's DJPEG, version %s\n%s\n", |
+- JVERSION, JCOPYRIGHT); |
++ fprintf(stderr, "%s version %s (build %s)\n", |
++ PACKAGE_NAME, VERSION, BUILD); |
++ fprintf(stderr, "%s\n\n", JCOPYRIGHT); |
++ fprintf(stderr, "Emulating The Independent JPEG Group's software, version %s\n\n", |
++ JVERSION); |
+ printed_version = TRUE; |
+ } |
+ cinfo->err->trace_level++; |
+@@ -263,6 +284,10 @@ |
+ /* Force monochrome output. */ |
+ cinfo->out_color_space = JCS_GRAYSCALE; |
+ |
++ } else if (keymatch(arg, "rgb", 2)) { |
++ /* Force RGB output. */ |
++ cinfo->out_color_space = JCS_RGB; |
++ |
+ } else if (keymatch(arg, "map", 3)) { |
+ /* Quantize to a color map taken from an input file. */ |
+ if (++argn >= argc) /* advance to next argument */ |
+@@ -314,6 +339,16 @@ |
+ usage(); |
+ outfilename = argv[argn]; /* save it away for later use */ |
+ |
++ } else if (keymatch(arg, "memsrc", 2)) { |
++ /* Use in-memory source manager */ |
++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) |
++ memsrc = TRUE; |
++#else |
++ fprintf(stderr, "%s: sorry, in-memory source manager was not compiled in\n", |
++ progname); |
++ exit(EXIT_FAILURE); |
++#endif |
++ |
+ } else if (keymatch(arg, "pnm", 1) || keymatch(arg, "ppm", 1)) { |
+ /* PPM/PGM output format. */ |
+ requested_fmt = FMT_PPM; |
+@@ -322,7 +357,7 @@ |
+ /* RLE output format. */ |
+ requested_fmt = FMT_RLE; |
+ |
+- } else if (keymatch(arg, "scale", 1)) { |
++ } else if (keymatch(arg, "scale", 2)) { |
+ /* Scale the output image by a fraction M/N. */ |
+ if (++argn >= argc) /* advance to next argument */ |
+ usage(); |
+@@ -330,6 +365,20 @@ |
+ &cinfo->scale_num, &cinfo->scale_denom) != 2) |
+ usage(); |
+ |
++ } else if (keymatch(arg, "strip", 2)) { |
++ if (++argn >= argc) |
++ usage(); |
++ if (sscanf(argv[argn], "%d,%d", &startY, &endY) != 2 || startY > endY) |
++ usage(); |
++ strip = TRUE; |
++ |
++ } else if (keymatch(arg, "skip", 2)) { |
++ if (++argn >= argc) |
++ usage(); |
++ if (sscanf(argv[argn], "%d,%d", &startY, &endY) != 2 || startY > endY) |
++ usage(); |
++ skip = TRUE; |
++ |
+ } else if (keymatch(arg, "targa", 1)) { |
+ /* Targa output format. */ |
+ requested_fmt = FMT_TARGA; |
+@@ -432,6 +481,8 @@ |
+ djpeg_dest_ptr dest_mgr = NULL; |
+ FILE * input_file; |
+ FILE * output_file; |
++ unsigned char *inbuffer = NULL; |
++ unsigned long insize = 0; |
+ JDIMENSION num_scanlines; |
+ |
+ /* On Mac, fetch a command line. */ |
+@@ -455,7 +506,7 @@ |
+ * APP12 is used by some digital camera makers for textual info, |
+ * so we provide the ability to display it as text. |
+ * If you like, additional APPn marker types can be selected for display, |
+- * but don't try to override APP0 or APP14 this way (see libjpeg.doc). |
++ * but don't try to override APP0 or APP14 this way (see libjpeg.txt). |
+ */ |
+ jpeg_set_marker_processor(&cinfo, JPEG_COM, print_text_marker); |
+ jpeg_set_marker_processor(&cinfo, JPEG_APP0+12, print_text_marker); |
+@@ -526,7 +577,30 @@ |
+ #endif |
+ |
+ /* Specify data source for decompression */ |
+- jpeg_stdio_src(&cinfo, input_file); |
++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) |
++ if (memsrc) { |
++ size_t nbytes; |
++ do { |
++ inbuffer = (unsigned char *)realloc(inbuffer, insize + INPUT_BUF_SIZE); |
++ if (inbuffer == NULL) { |
++ fprintf(stderr, "%s: memory allocation failure\n", progname); |
++ exit(EXIT_FAILURE); |
++ } |
++ nbytes = JFREAD(input_file, &inbuffer[insize], INPUT_BUF_SIZE); |
++ if (nbytes < INPUT_BUF_SIZE && ferror(input_file)) { |
++ if (file_index < argc) |
++ fprintf(stderr, "%s: can't read from %s\n", progname, |
++ argv[file_index]); |
++ else |
++ fprintf(stderr, "%s: can't read from stdin\n", progname); |
++ } |
++ insize += (unsigned long)nbytes; |
++ } while (nbytes == INPUT_BUF_SIZE); |
++ fprintf(stderr, "Compressed size: %lu bytes\n", insize); |
++ jpeg_mem_src(&cinfo, inbuffer, insize); |
++ } else |
++#endif |
++ jpeg_stdio_src(&cinfo, input_file); |
+ |
+ /* Read file header, set default decompression parameters */ |
+ (void) jpeg_read_header(&cinfo, TRUE); |
+@@ -575,14 +649,64 @@ |
+ /* Start decompressor */ |
+ (void) jpeg_start_decompress(&cinfo); |
+ |
+- /* Write output file header */ |
+- (*dest_mgr->start_output) (&cinfo, dest_mgr); |
++ /* Strip decode */ |
++ if (strip || skip) { |
++ JDIMENSION tmp; |
+ |
+- /* Process data */ |
+- while (cinfo.output_scanline < cinfo.output_height) { |
+- num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer, |
+- dest_mgr->buffer_height); |
+- (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines); |
++ /* Check for valid endY. We cannot check this value until after |
++ * jpeg_start_decompress() is called. Note that we have already verified |
++ * that startY <= endY. |
++ */ |
++ if (endY > cinfo.output_height - 1) { |
++ fprintf(stderr, "%s: strip %d-%d exceeds image height %d\n", progname, |
++ startY, endY, cinfo.output_height); |
++ exit(EXIT_FAILURE); |
++ } |
++ |
++ /* Write output file header. This is a hack to ensure that the destination |
++ * manager creates an image of the proper size for the partial decode. |
++ */ |
++ tmp = cinfo.output_height; |
++ cinfo.output_height = endY - startY + 1; |
++ if (skip) |
++ cinfo.output_height = tmp - cinfo.output_height; |
++ (*dest_mgr->start_output) (&cinfo, dest_mgr); |
++ cinfo.output_height = tmp; |
++ |
++ /* Process data */ |
++ if (skip) { |
++ while (cinfo.output_scanline < startY) { |
++ num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer, |
++ dest_mgr->buffer_height); |
++ (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines); |
++ } |
++ jpeg_skip_scanlines(&cinfo, endY - startY + 1); |
++ while (cinfo.output_scanline < cinfo.output_height) { |
++ num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer, |
++ dest_mgr->buffer_height); |
++ (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines); |
++ } |
++ } else { |
++ jpeg_skip_scanlines(&cinfo, startY); |
++ while (cinfo.output_scanline <= endY) { |
++ num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer, |
++ dest_mgr->buffer_height); |
++ (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines); |
++ } |
++ jpeg_skip_scanlines(&cinfo, cinfo.output_height - endY + 1); |
++ } |
++ |
++ /* Normal full image decode */ |
++ } else { |
++ /* Write output file header */ |
++ (*dest_mgr->start_output) (&cinfo, dest_mgr); |
++ |
++ /* Process data */ |
++ while (cinfo.output_scanline < cinfo.output_height) { |
++ num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer, |
++ dest_mgr->buffer_height); |
++ (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines); |
++ } |
} |
-@@ -944,7 +944,144 @@ |
- return TRUE; |
+ #ifdef PROGRESS_REPORT |
+@@ -610,6 +734,9 @@ |
+ end_progress_monitor((j_common_ptr) &cinfo); |
+ #endif |
+ |
++ if (memsrc && inbuffer != NULL) |
++ free(inbuffer); |
++ |
+ /* All done. */ |
+ exit(jerr.num_warnings ? EXIT_WARNING : EXIT_SUCCESS); |
+ return 0; /* suppress no-return-value warnings */ |
+Index: jcapimin.c |
+=================================================================== |
+--- jcapimin.c (revision 829) |
++++ jcapimin.c (working copy) |
+@@ -2,6 +2,7 @@ |
+ * jcapimin.c |
+ * |
+ * Copyright (C) 1994-1998, Thomas G. Lane. |
++ * Modified 2003-2010 by Guido Vollbeding. |
+ * This file is part of the Independent JPEG Group's software. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+@@ -63,8 +64,12 @@ |
+ |
+ cinfo->comp_info = NULL; |
+ |
+- for (i = 0; i < NUM_QUANT_TBLS; i++) |
++ for (i = 0; i < NUM_QUANT_TBLS; i++) { |
+ cinfo->quant_tbl_ptrs[i] = NULL; |
++#if JPEG_LIB_VERSION >= 70 |
++ cinfo->q_scale_factor[i] = 100; |
++#endif |
++ } |
+ |
+ for (i = 0; i < NUM_HUFF_TBLS; i++) { |
+ cinfo->dc_huff_tbl_ptrs[i] = NULL; |
+@@ -71,6 +76,13 @@ |
+ cinfo->ac_huff_tbl_ptrs[i] = NULL; |
+ } |
+ |
++#if JPEG_LIB_VERSION >= 80 |
++ /* Must do it here for emit_dqt in case jpeg_write_tables is used */ |
++ cinfo->block_size = DCTSIZE; |
++ cinfo->natural_order = jpeg_natural_order; |
++ cinfo->lim_Se = DCTSIZE2-1; |
++#endif |
++ |
+ cinfo->script_space = NULL; |
+ |
+ cinfo->input_gamma = 1.0; /* in case application forgets */ |
+Index: jccolor.c |
+=================================================================== |
+--- jccolor.c (revision 829) |
++++ jccolor.c (working copy) |
+@@ -1,10 +1,11 @@ |
+ /* |
+ * jccolor.c |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1991-1996, Thomas G. Lane. |
++ * libjpeg-turbo Modifications: |
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
+- * Copyright 2009 D. R. Commander |
+- * This file is part of the Independent JPEG Group's software. |
++ * Copyright (C) 2009-2012, D. R. Commander. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains input colorspace conversion routines. |
+@@ -14,6 +15,7 @@ |
+ #include "jinclude.h" |
+ #include "jpeglib.h" |
+ #include "jsimd.h" |
++#include "config.h" |
+ |
+ |
+ /* Private subobject */ |
+@@ -81,6 +83,111 @@ |
+ #define TABLE_SIZE (8*(MAXJSAMPLE+1)) |
+ |
+ |
++/* Include inline routines for colorspace extensions */ |
++ |
++#include "jccolext.c" |
++#undef RGB_RED |
++#undef RGB_GREEN |
++#undef RGB_BLUE |
++#undef RGB_PIXELSIZE |
++ |
++#define RGB_RED EXT_RGB_RED |
++#define RGB_GREEN EXT_RGB_GREEN |
++#define RGB_BLUE EXT_RGB_BLUE |
++#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE |
++#define rgb_ycc_convert_internal extrgb_ycc_convert_internal |
++#define rgb_gray_convert_internal extrgb_gray_convert_internal |
++#define rgb_rgb_convert_internal extrgb_rgb_convert_internal |
++#include "jccolext.c" |
++#undef RGB_RED |
++#undef RGB_GREEN |
++#undef RGB_BLUE |
++#undef RGB_PIXELSIZE |
++#undef rgb_ycc_convert_internal |
++#undef rgb_gray_convert_internal |
++#undef rgb_rgb_convert_internal |
++ |
++#define RGB_RED EXT_RGBX_RED |
++#define RGB_GREEN EXT_RGBX_GREEN |
++#define RGB_BLUE EXT_RGBX_BLUE |
++#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE |
++#define rgb_ycc_convert_internal extrgbx_ycc_convert_internal |
++#define rgb_gray_convert_internal extrgbx_gray_convert_internal |
++#define rgb_rgb_convert_internal extrgbx_rgb_convert_internal |
++#include "jccolext.c" |
++#undef RGB_RED |
++#undef RGB_GREEN |
++#undef RGB_BLUE |
++#undef RGB_PIXELSIZE |
++#undef rgb_ycc_convert_internal |
++#undef rgb_gray_convert_internal |
++#undef rgb_rgb_convert_internal |
++ |
++#define RGB_RED EXT_BGR_RED |
++#define RGB_GREEN EXT_BGR_GREEN |
++#define RGB_BLUE EXT_BGR_BLUE |
++#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE |
++#define rgb_ycc_convert_internal extbgr_ycc_convert_internal |
++#define rgb_gray_convert_internal extbgr_gray_convert_internal |
++#define rgb_rgb_convert_internal extbgr_rgb_convert_internal |
++#include "jccolext.c" |
++#undef RGB_RED |
++#undef RGB_GREEN |
++#undef RGB_BLUE |
++#undef RGB_PIXELSIZE |
++#undef rgb_ycc_convert_internal |
++#undef rgb_gray_convert_internal |
++#undef rgb_rgb_convert_internal |
++ |
++#define RGB_RED EXT_BGRX_RED |
++#define RGB_GREEN EXT_BGRX_GREEN |
++#define RGB_BLUE EXT_BGRX_BLUE |
++#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE |
++#define rgb_ycc_convert_internal extbgrx_ycc_convert_internal |
++#define rgb_gray_convert_internal extbgrx_gray_convert_internal |
++#define rgb_rgb_convert_internal extbgrx_rgb_convert_internal |
++#include "jccolext.c" |
++#undef RGB_RED |
++#undef RGB_GREEN |
++#undef RGB_BLUE |
++#undef RGB_PIXELSIZE |
++#undef rgb_ycc_convert_internal |
++#undef rgb_gray_convert_internal |
++#undef rgb_rgb_convert_internal |
++ |
++#define RGB_RED EXT_XBGR_RED |
++#define RGB_GREEN EXT_XBGR_GREEN |
++#define RGB_BLUE EXT_XBGR_BLUE |
++#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE |
++#define rgb_ycc_convert_internal extxbgr_ycc_convert_internal |
++#define rgb_gray_convert_internal extxbgr_gray_convert_internal |
++#define rgb_rgb_convert_internal extxbgr_rgb_convert_internal |
++#include "jccolext.c" |
++#undef RGB_RED |
++#undef RGB_GREEN |
++#undef RGB_BLUE |
++#undef RGB_PIXELSIZE |
++#undef rgb_ycc_convert_internal |
++#undef rgb_gray_convert_internal |
++#undef rgb_rgb_convert_internal |
++ |
++#define RGB_RED EXT_XRGB_RED |
++#define RGB_GREEN EXT_XRGB_GREEN |
++#define RGB_BLUE EXT_XRGB_BLUE |
++#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE |
++#define rgb_ycc_convert_internal extxrgb_ycc_convert_internal |
++#define rgb_gray_convert_internal extxrgb_gray_convert_internal |
++#define rgb_rgb_convert_internal extxrgb_rgb_convert_internal |
++#include "jccolext.c" |
++#undef RGB_RED |
++#undef RGB_GREEN |
++#undef RGB_BLUE |
++#undef RGB_PIXELSIZE |
++#undef rgb_ycc_convert_internal |
++#undef rgb_gray_convert_internal |
++#undef rgb_rgb_convert_internal |
++ |
++ |
+ /* |
+ * Initialize for RGB->YCC colorspace conversion. |
+ */ |
+@@ -119,14 +226,6 @@ |
+ |
+ /* |
+ * Convert some rows of samples to the JPEG colorspace. |
+- * |
+- * Note that we change from the application's interleaved-pixel format |
+- * to our internal noninterleaved, one-plane-per-component format. |
+- * The input buffer is therefore three times as wide as the output buffer. |
+- * |
+- * A starting row offset is provided only for the output buffer. The caller |
+- * can easily adjust the passed input_buf value to accommodate any row |
+- * offset required on that side. |
+ */ |
+ |
+ METHODDEF(void) |
+@@ -134,43 +233,39 @@ |
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
+ JDIMENSION output_row, int num_rows) |
+ { |
+- my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; |
+- register int r, g, b; |
+- register INT32 * ctab = cconvert->rgb_ycc_tab; |
+- register JSAMPROW inptr; |
+- register JSAMPROW outptr0, outptr1, outptr2; |
+- register JDIMENSION col; |
+- JDIMENSION num_cols = cinfo->image_width; |
+- |
+- while (--num_rows >= 0) { |
+- inptr = *input_buf++; |
+- outptr0 = output_buf[0][output_row]; |
+- outptr1 = output_buf[1][output_row]; |
+- outptr2 = output_buf[2][output_row]; |
+- output_row++; |
+- for (col = 0; col < num_cols; col++) { |
+- r = GETJSAMPLE(inptr[rgb_red[cinfo->in_color_space]]); |
+- g = GETJSAMPLE(inptr[rgb_green[cinfo->in_color_space]]); |
+- b = GETJSAMPLE(inptr[rgb_blue[cinfo->in_color_space]]); |
+- inptr += rgb_pixelsize[cinfo->in_color_space]; |
+- /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations |
+- * must be too; we do not need an explicit range-limiting operation. |
+- * Hence the value being shifted is never negative, and we don't |
+- * need the general RIGHT_SHIFT macro. |
+- */ |
+- /* Y */ |
+- outptr0[col] = (JSAMPLE) |
+- ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) |
+- >> SCALEBITS); |
+- /* Cb */ |
+- outptr1[col] = (JSAMPLE) |
+- ((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF]) |
+- >> SCALEBITS); |
+- /* Cr */ |
+- outptr2[col] = (JSAMPLE) |
+- ((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF]) |
+- >> SCALEBITS); |
+- } |
++ switch (cinfo->in_color_space) { |
++ case JCS_EXT_RGB: |
++ extrgb_ycc_convert_internal(cinfo, input_buf, output_buf, output_row, |
++ num_rows); |
++ break; |
++ case JCS_EXT_RGBX: |
++ case JCS_EXT_RGBA: |
++ extrgbx_ycc_convert_internal(cinfo, input_buf, output_buf, output_row, |
++ num_rows); |
++ break; |
++ case JCS_EXT_BGR: |
++ extbgr_ycc_convert_internal(cinfo, input_buf, output_buf, output_row, |
++ num_rows); |
++ break; |
++ case JCS_EXT_BGRX: |
++ case JCS_EXT_BGRA: |
++ extbgrx_ycc_convert_internal(cinfo, input_buf, output_buf, output_row, |
++ num_rows); |
++ break; |
++ case JCS_EXT_XBGR: |
++ case JCS_EXT_ABGR: |
++ extxbgr_ycc_convert_internal(cinfo, input_buf, output_buf, output_row, |
++ num_rows); |
++ break; |
++ case JCS_EXT_XRGB: |
++ case JCS_EXT_ARGB: |
++ extxrgb_ycc_convert_internal(cinfo, input_buf, output_buf, output_row, |
++ num_rows); |
++ break; |
++ default: |
++ rgb_ycc_convert_internal(cinfo, input_buf, output_buf, output_row, |
++ num_rows); |
++ break; |
+ } |
} |
-+#ifdef MOTION_JPEG_SUPPORTED |
+@@ -180,9 +275,6 @@ |
-+/* The default Huffman tables used by motion JPEG frames. When a motion JPEG |
-+ * frame does not have DHT tables, we should use the huffman tables suggested by |
-+ * the JPEG standard. Each of these tables represents a member of the JHUFF_TBLS |
-+ * struct so we can just copy it to the according JHUFF_TBLS member. |
+ /* |
+ * Convert some rows of samples to the JPEG colorspace. |
+- * This version handles RGB->grayscale conversion, which is the same |
+- * as the RGB->Y portion of RGB->YCbCr. |
+- * We assume rgb_ycc_start has been called (we only use the Y tables). |
+ */ |
+ |
+ METHODDEF(void) |
+@@ -190,28 +282,85 @@ |
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
+ JDIMENSION output_row, int num_rows) |
+ { |
+- my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; |
+- register int r, g, b; |
+- register INT32 * ctab = cconvert->rgb_ycc_tab; |
+- register JSAMPROW inptr; |
+- register JSAMPROW outptr; |
+- register JDIMENSION col; |
+- JDIMENSION num_cols = cinfo->image_width; |
++ switch (cinfo->in_color_space) { |
++ case JCS_EXT_RGB: |
++ extrgb_gray_convert_internal(cinfo, input_buf, output_buf, output_row, |
++ num_rows); |
++ break; |
++ case JCS_EXT_RGBX: |
++ case JCS_EXT_RGBA: |
++ extrgbx_gray_convert_internal(cinfo, input_buf, output_buf, output_row, |
++ num_rows); |
++ break; |
++ case JCS_EXT_BGR: |
++ extbgr_gray_convert_internal(cinfo, input_buf, output_buf, output_row, |
++ num_rows); |
++ break; |
++ case JCS_EXT_BGRX: |
++ case JCS_EXT_BGRA: |
++ extbgrx_gray_convert_internal(cinfo, input_buf, output_buf, output_row, |
++ num_rows); |
++ break; |
++ case JCS_EXT_XBGR: |
++ case JCS_EXT_ABGR: |
++ extxbgr_gray_convert_internal(cinfo, input_buf, output_buf, output_row, |
++ num_rows); |
++ break; |
++ case JCS_EXT_XRGB: |
++ case JCS_EXT_ARGB: |
++ extxrgb_gray_convert_internal(cinfo, input_buf, output_buf, output_row, |
++ num_rows); |
++ break; |
++ default: |
++ rgb_gray_convert_internal(cinfo, input_buf, output_buf, output_row, |
++ num_rows); |
++ break; |
++ } |
++} |
+ |
+- while (--num_rows >= 0) { |
+- inptr = *input_buf++; |
+- outptr = output_buf[0][output_row]; |
+- output_row++; |
+- for (col = 0; col < num_cols; col++) { |
+- r = GETJSAMPLE(inptr[rgb_red[cinfo->in_color_space]]); |
+- g = GETJSAMPLE(inptr[rgb_green[cinfo->in_color_space]]); |
+- b = GETJSAMPLE(inptr[rgb_blue[cinfo->in_color_space]]); |
+- inptr += rgb_pixelsize[cinfo->in_color_space]; |
+- /* Y */ |
+- outptr[col] = (JSAMPLE) |
+- ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) |
+- >> SCALEBITS); |
+- } |
++ |
++/* |
++ * Extended RGB to plain RGB conversion |
+ */ |
-+/* DC table 0 */ |
-+LOCAL(const unsigned char) mjpg_dc0_bits[] = { |
-+ 0x00, 0x01, 0x05, 0x01, 0x01, 0x01, 0x01, 0x01, |
-+ 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 |
++ |
++METHODDEF(void) |
++rgb_rgb_convert (j_compress_ptr cinfo, |
++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
++ JDIMENSION output_row, int num_rows) |
++{ |
++ switch (cinfo->in_color_space) { |
++ case JCS_EXT_RGB: |
++ extrgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, |
++ num_rows); |
++ break; |
++ case JCS_EXT_RGBX: |
++ case JCS_EXT_RGBA: |
++ extrgbx_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, |
++ num_rows); |
++ break; |
++ case JCS_EXT_BGR: |
++ extbgr_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, |
++ num_rows); |
++ break; |
++ case JCS_EXT_BGRX: |
++ case JCS_EXT_BGRA: |
++ extbgrx_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, |
++ num_rows); |
++ break; |
++ case JCS_EXT_XBGR: |
++ case JCS_EXT_ABGR: |
++ extxbgr_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, |
++ num_rows); |
++ break; |
++ case JCS_EXT_XRGB: |
++ case JCS_EXT_ARGB: |
++ extxrgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, |
++ num_rows); |
++ break; |
++ default: |
++ rgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row, |
++ num_rows); |
++ break; |
+ } |
+ } |
+ |
+@@ -377,6 +526,10 @@ |
+ case JCS_EXT_BGRX: |
+ case JCS_EXT_XBGR: |
+ case JCS_EXT_XRGB: |
++ case JCS_EXT_RGBA: |
++ case JCS_EXT_BGRA: |
++ case JCS_EXT_ABGR: |
++ case JCS_EXT_ARGB: |
+ if (cinfo->input_components != rgb_pixelsize[cinfo->in_color_space]) |
+ ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE); |
+ break; |
+@@ -411,9 +564,17 @@ |
+ cinfo->in_color_space == JCS_EXT_BGR || |
+ cinfo->in_color_space == JCS_EXT_BGRX || |
+ cinfo->in_color_space == JCS_EXT_XBGR || |
+- cinfo->in_color_space == JCS_EXT_XRGB) { |
+- cconvert->pub.start_pass = rgb_ycc_start; |
+- cconvert->pub.color_convert = rgb_gray_convert; |
++ cinfo->in_color_space == JCS_EXT_XRGB || |
++ cinfo->in_color_space == JCS_EXT_RGBA || |
++ cinfo->in_color_space == JCS_EXT_BGRA || |
++ cinfo->in_color_space == JCS_EXT_ABGR || |
++ cinfo->in_color_space == JCS_EXT_ARGB) { |
++ if (jsimd_can_rgb_gray()) |
++ cconvert->pub.color_convert = jsimd_rgb_gray_convert; |
++ else { |
++ cconvert->pub.start_pass = rgb_ycc_start; |
++ cconvert->pub.color_convert = rgb_gray_convert; |
++ } |
+ } else if (cinfo->in_color_space == JCS_YCbCr) |
+ cconvert->pub.color_convert = grayscale_convert; |
+ else |
+@@ -421,17 +582,25 @@ |
+ break; |
+ |
+ case JCS_RGB: |
+- case JCS_EXT_RGB: |
+- case JCS_EXT_RGBX: |
+- case JCS_EXT_BGR: |
+- case JCS_EXT_BGRX: |
+- case JCS_EXT_XBGR: |
+- case JCS_EXT_XRGB: |
+ if (cinfo->num_components != 3) |
+ ERREXIT(cinfo, JERR_BAD_J_COLORSPACE); |
+- if (cinfo->in_color_space == cinfo->jpeg_color_space && |
+- rgb_pixelsize[cinfo->in_color_space] == 3) |
++ if (rgb_red[cinfo->in_color_space] == 0 && |
++ rgb_green[cinfo->in_color_space] == 1 && |
++ rgb_blue[cinfo->in_color_space] == 2 && |
++ rgb_pixelsize[cinfo->in_color_space] == 3) |
+ cconvert->pub.color_convert = null_convert; |
++ else if (cinfo->in_color_space == JCS_RGB || |
++ cinfo->in_color_space == JCS_EXT_RGB || |
++ cinfo->in_color_space == JCS_EXT_RGBX || |
++ cinfo->in_color_space == JCS_EXT_BGR || |
++ cinfo->in_color_space == JCS_EXT_BGRX || |
++ cinfo->in_color_space == JCS_EXT_XBGR || |
++ cinfo->in_color_space == JCS_EXT_XRGB || |
++ cinfo->in_color_space == JCS_EXT_RGBA || |
++ cinfo->in_color_space == JCS_EXT_BGRA || |
++ cinfo->in_color_space == JCS_EXT_ABGR || |
++ cinfo->in_color_space == JCS_EXT_ARGB) |
++ cconvert->pub.color_convert = rgb_rgb_convert; |
+ else |
+ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); |
+ break; |
+@@ -445,7 +614,11 @@ |
+ cinfo->in_color_space == JCS_EXT_BGR || |
+ cinfo->in_color_space == JCS_EXT_BGRX || |
+ cinfo->in_color_space == JCS_EXT_XBGR || |
+- cinfo->in_color_space == JCS_EXT_XRGB) { |
++ cinfo->in_color_space == JCS_EXT_XRGB || |
++ cinfo->in_color_space == JCS_EXT_RGBA || |
++ cinfo->in_color_space == JCS_EXT_BGRA || |
++ cinfo->in_color_space == JCS_EXT_ABGR || |
++ cinfo->in_color_space == JCS_EXT_ARGB) { |
+ if (jsimd_can_rgb_ycc()) |
+ cconvert->pub.color_convert = jsimd_rgb_ycc_convert; |
+ else { |
+Index: jcdctmgr.c |
+=================================================================== |
+--- jcdctmgr.c (revision 829) |
++++ jcdctmgr.c (working copy) |
+@@ -1,10 +1,12 @@ |
+ /* |
+ * jcdctmgr.c |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1994-1996, Thomas G. Lane. |
++ * libjpeg-turbo Modifications: |
+ * Copyright (C) 1999-2006, MIYASAKA Masaru. |
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
+- * This file is part of the Independent JPEG Group's software. |
++ * Copyright (C) 2011 D. R. Commander |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains the forward-DCT management logic. |
+@@ -39,6 +41,8 @@ |
+ (JCOEFPTR coef_block, FAST_FLOAT * divisors, |
+ FAST_FLOAT * workspace)); |
+ |
++METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *); |
++ |
+ typedef struct { |
+ struct jpeg_forward_dct pub; /* public fields */ |
+ |
+@@ -73,7 +77,7 @@ |
+ * Find the highest bit in an integer through binary search. |
+ */ |
+ LOCAL(int) |
+-fls (UINT16 val) |
++flss (UINT16 val) |
+ { |
+ int bit; |
+ |
+@@ -160,7 +164,7 @@ |
+ * of in a consecutive manner, yet again in order to allow SIMD |
+ * routines. |
+ */ |
+-LOCAL(void) |
++LOCAL(int) |
+ compute_reciprocal (UINT16 divisor, DCTELEM * dtbl) |
+ { |
+ UDCTELEM2 fq, fr; |
+@@ -167,7 +171,7 @@ |
+ UDCTELEM c; |
+ int b, r; |
+ |
+- b = fls(divisor) - 1; |
++ b = flss(divisor) - 1; |
+ r = sizeof(DCTELEM) * 8 + b; |
+ |
+ fq = ((UDCTELEM2)1 << r) / divisor; |
+@@ -179,7 +183,7 @@ |
+ /* fq will be one bit too large to fit in DCTELEM, so adjust */ |
+ fq >>= 1; |
+ r--; |
+- } else if (fr <= (divisor / 2)) { /* fractional part is < 0.5 */ |
++ } else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */ |
+ c++; |
+ } else { /* fractional part is > 0.5 */ |
+ fq++; |
+@@ -189,6 +193,9 @@ |
+ dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */ |
+ dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r)); /* scale */ |
+ dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */ |
++ |
++ if(r <= 16) return 0; |
++ else return 1; |
+ } |
+ |
+ /* |
+@@ -232,7 +239,9 @@ |
+ } |
+ dtbl = fdct->divisors[qtblno]; |
+ for (i = 0; i < DCTSIZE2; i++) { |
+- compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]); |
++ if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) |
++ && fdct->quantize == jsimd_quantize) |
++ fdct->quantize = quantize; |
+ } |
+ break; |
+ #endif |
+@@ -266,10 +275,12 @@ |
+ } |
+ dtbl = fdct->divisors[qtblno]; |
+ for (i = 0; i < DCTSIZE2; i++) { |
+- compute_reciprocal( |
++ if(!compute_reciprocal( |
+ DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i], |
+ (INT32) aanscales[i]), |
+- CONST_BITS-3), &dtbl[i]); |
++ CONST_BITS-3), &dtbl[i]) |
++ && fdct->quantize == jsimd_quantize) |
++ fdct->quantize = quantize; |
+ } |
+ } |
+ break; |
+Index: jchuff.c |
+=================================================================== |
+--- jchuff.c (revision 829) |
++++ jchuff.c (working copy) |
+@@ -1,8 +1,10 @@ |
+ /* |
+ * jchuff.c |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1991-1997, Thomas G. Lane. |
+- * This file is part of the Independent JPEG Group's software. |
++ * libjpeg-turbo Modifications: |
++ * Copyright (C) 2009-2011, D. R. Commander. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains Huffman entropy encoding routines. |
+@@ -14,21 +16,6 @@ |
+ * permanent JPEG objects only upon successful completion of an MCU. |
+ */ |
+ |
+-/* Modifications: |
+- * Copyright (C)2007 Sun Microsystems, Inc. |
+- * Copyright (C)2009 D. R. Commander |
+- * |
+- * This library is free software and may be redistributed and/or modified under |
+- * the terms of the wxWindows Library License, Version 3.1 or (at your option) |
+- * any later version. The full license is in the LICENSE.txt file included |
+- * with this distribution. |
+- * |
+- * This library is distributed in the hope that it will be useful, |
+- * but WITHOUT ANY WARRANTY; without even the implied warranty of |
+- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
+- * wxWindows Library License for more details. |
+- */ |
+- |
+ #define JPEG_INTERNALS |
+ #include "jinclude.h" |
+ #include "jpeglib.h" |
+@@ -35,13 +22,42 @@ |
+ #include "jchuff.h" /* Declarations shared with jcphuff.c */ |
+ #include <limits.h> |
+ |
+-static unsigned char jpeg_first_bit_table[65536]; |
+-int jpeg_first_bit_table_init=0; |
++/* |
++ * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be |
++ * used for bit counting rather than the lookup table. This will reduce the |
++ * memory footprint by 64k, which is important for some mobile applications |
++ * that create many isolated instances of libjpeg-turbo (web browsers, for |
++ * instance.) This may improve performance on some mobile platforms as well. |
++ * This feature is enabled by default only on ARM processors, because some x86 |
++ * chips have a slow implementation of bsr, and the use of clz/bsr cannot be |
++ * shown to have a significant performance impact even on the x86 chips that |
++ * have a fast implementation of it. When building for ARMv6, you can |
++ * explicitly disable the use of clz/bsr by adding -mthumb to the compiler |
++ * flags (this defines __thumb__). |
++ */ |
+ |
++/* NOTE: Both GCC and Clang define __GNUC__ */ |
++#if defined __GNUC__ && defined __arm__ |
++#if !defined __thumb__ || defined __thumb2__ |
++#define USE_CLZ_INTRINSIC |
++#endif |
++#endif |
++ |
++#ifdef USE_CLZ_INTRINSIC |
++#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x)) |
++#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0) |
++#else |
++static unsigned char jpeg_nbits_table[65536]; |
++static int jpeg_nbits_table_init = 0; |
++#define JPEG_NBITS(x) (jpeg_nbits_table[x]) |
++#define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x) |
++#endif |
++ |
+ #ifndef min |
+ #define min(a,b) ((a)<(b)?(a):(b)) |
+ #endif |
+ |
++ |
+ /* Expanded entropy encoder object for Huffman encoding. |
+ * |
+ * The savable_state subrecord contains fields that change within an MCU, |
+@@ -49,7 +65,7 @@ |
+ */ |
+ |
+ typedef struct { |
+- long put_buffer; /* current bit-accumulation buffer */ |
++ size_t put_buffer; /* current bit-accumulation buffer */ |
+ int put_bits; /* # of bits now in it */ |
+ int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ |
+ } savable_state; |
+@@ -181,7 +197,6 @@ |
+ } |
+ |
+ /* Initialize bit buffer to empty */ |
+- |
+ entropy->saved.put_buffer = 0; |
+ entropy->saved.put_bits = 0; |
+ |
+@@ -285,14 +300,16 @@ |
+ dtbl->ehufsi[i] = huffsize[p]; |
+ } |
+ |
+- if(!jpeg_first_bit_table_init) { |
++#ifndef USE_CLZ_INTRINSIC |
++ if(!jpeg_nbits_table_init) { |
+ for(i = 0; i < 65536; i++) { |
+- int bit = 0, val = i; |
+- while (val) {val >>= 1; bit++;} |
+- jpeg_first_bit_table[i] = bit; |
++ int nbits = 0, temp = i; |
++ while (temp) {temp >>= 1; nbits++;} |
++ jpeg_nbits_table[i] = nbits; |
+ } |
+- jpeg_first_bit_table_init = 1; |
++ jpeg_nbits_table_init = 1; |
+ } |
++#endif |
+ } |
+ |
+ |
+@@ -312,8 +329,6 @@ |
+ { |
+ struct jpeg_destination_mgr * dest = state->cinfo->dest; |
+ |
+- dest->free_in_buffer = state->free_in_buffer; |
+- |
+ if (! (*dest->empty_output_buffer) (state->cinfo)) |
+ return FALSE; |
+ /* After a successful buffer dump, must reset buffer pointers */ |
+@@ -325,178 +340,133 @@ |
+ |
+ /* Outputting bits to the file */ |
+ |
+-/* Only the right 24 bits of put_buffer are used; the valid bits are |
+- * left-justified in this part. At most 16 bits can be passed to emit_bits |
+- * in one call, and we never retain more than 7 bits in put_buffer |
+- * between calls, so 24 bits are sufficient. |
++/* These macros perform the same task as the emit_bits() function in the |
++ * original libjpeg code. In addition to reducing overhead by explicitly |
++ * inlining the code, additional performance is achieved by taking into |
++ * account the size of the bit buffer and waiting until it is almost full |
++ * before emptying it. This mostly benefits 64-bit platforms, since 6 |
++ * bytes can be stored in a 64-bit bit buffer before it has to be emptied. |
+ */ |
+ |
+-/***************************************************************/ |
+- |
+-#define EMIT_BYTE() { \ |
+- if (0xFF == (*buffer++ = (unsigned char)(put_buffer >> (put_bits -= 8)))) \ |
+- *buffer++ = 0; \ |
++#define EMIT_BYTE() { \ |
++ JOCTET c; \ |
++ put_bits -= 8; \ |
++ c = (JOCTET)GETJOCTET(put_buffer >> put_bits); \ |
++ *buffer++ = c; \ |
++ if (c == 0xFF) /* need to stuff a zero byte? */ \ |
++ *buffer++ = 0; \ |
+ } |
+ |
+-/***************************************************************/ |
++#define PUT_BITS(code, size) { \ |
++ put_bits += size; \ |
++ put_buffer = (put_buffer << size) | code; \ |
++} |
+ |
+-#define DUMP_BITS_(code, size) { \ |
+- put_bits += size; \ |
+- put_buffer = (put_buffer << size) | code; \ |
+- if (put_bits > 7) \ |
+- while(put_bits > 7) \ |
+- EMIT_BYTE() \ |
+- } |
+- |
+-/***************************************************************/ |
+- |
+-#define CHECKBUF15() { \ |
+- if (put_bits > 15) { \ |
+- EMIT_BYTE() \ |
+- EMIT_BYTE() \ |
+- } \ |
++#define CHECKBUF15() { \ |
++ if (put_bits > 15) { \ |
++ EMIT_BYTE() \ |
++ EMIT_BYTE() \ |
++ } \ |
+ } |
+ |
+-#define CHECKBUF47() { \ |
+- if (put_bits > 47) { \ |
+- EMIT_BYTE() \ |
+- EMIT_BYTE() \ |
+- EMIT_BYTE() \ |
+- EMIT_BYTE() \ |
+- EMIT_BYTE() \ |
+- EMIT_BYTE() \ |
+- } \ |
++#define CHECKBUF31() { \ |
++ if (put_bits > 31) { \ |
++ EMIT_BYTE() \ |
++ EMIT_BYTE() \ |
++ EMIT_BYTE() \ |
++ EMIT_BYTE() \ |
++ } \ |
+ } |
+ |
+-#define CHECKBUF31() { \ |
+- if (put_bits > 31) { \ |
+- EMIT_BYTE() \ |
+- EMIT_BYTE() \ |
+- EMIT_BYTE() \ |
+- EMIT_BYTE() \ |
+- } \ |
++#define CHECKBUF47() { \ |
++ if (put_bits > 47) { \ |
++ EMIT_BYTE() \ |
++ EMIT_BYTE() \ |
++ EMIT_BYTE() \ |
++ EMIT_BYTE() \ |
++ EMIT_BYTE() \ |
++ EMIT_BYTE() \ |
++ } \ |
+ } |
+ |
+-/***************************************************************/ |
++#if __WORDSIZE==64 || defined(_WIN64) |
+ |
+-#define DUMP_BITS_NOCHECK(code, size) { \ |
+- put_bits += size; \ |
+- put_buffer = (put_buffer << size) | code; \ |
+- } |
++#define EMIT_BITS(code, size) { \ |
++ CHECKBUF47() \ |
++ PUT_BITS(code, size) \ |
++} |
+ |
+-#if __WORDSIZE==64 |
+- |
+-#define DUMP_BITS(code, size) { \ |
+- CHECKBUF47() \ |
+- put_bits += size; \ |
+- put_buffer = (put_buffer << size) | code; \ |
++#define EMIT_CODE(code, size) { \ |
++ temp2 &= (((INT32) 1)<<nbits) - 1; \ |
++ CHECKBUF31() \ |
++ PUT_BITS(code, size) \ |
++ PUT_BITS(temp2, nbits) \ |
+ } |
+ |
+ #else |
+ |
+-#define DUMP_BITS(code, size) { \ |
+- put_bits += size; \ |
+- put_buffer = (put_buffer << size) | code; \ |
+- CHECKBUF15() \ |
+- } |
++#define EMIT_BITS(code, size) { \ |
++ PUT_BITS(code, size) \ |
++ CHECKBUF15() \ |
++} |
+ |
+-#endif |
+- |
+-/***************************************************************/ |
+- |
+-#define DUMP_SINGLE_VALUE(ht, codevalue) { \ |
+- size = ht->ehufsi[codevalue]; \ |
+- code = ht->ehufco[codevalue]; \ |
+- \ |
+- DUMP_BITS(code, size) \ |
++#define EMIT_CODE(code, size) { \ |
++ temp2 &= (((INT32) 1)<<nbits) - 1; \ |
++ PUT_BITS(code, size) \ |
++ CHECKBUF15() \ |
++ PUT_BITS(temp2, nbits) \ |
++ CHECKBUF15() \ |
+ } |
+ |
+-/***************************************************************/ |
+- |
+-#define DUMP_VALUE_SLOW(ht, codevalue, t, nbits) { \ |
+- size = ht->ehufsi[codevalue]; \ |
+- code = ht->ehufco[codevalue]; \ |
+- t &= ~(-1 << nbits); \ |
+- DUMP_BITS_NOCHECK(code, size) \ |
+- CHECKBUF15() \ |
+- DUMP_BITS_NOCHECK(t, nbits) \ |
+- CHECKBUF15() \ |
+- } |
+- |
+-int _max=0; |
+- |
+-#if __WORDSIZE==64 |
+- |
+-#define DUMP_VALUE(ht, codevalue, t, nbits) { \ |
+- size = ht->ehufsi[codevalue]; \ |
+- code = ht->ehufco[codevalue]; \ |
+- t &= ~(-1 << nbits); \ |
+- CHECKBUF31() \ |
+- DUMP_BITS_NOCHECK(code, size) \ |
+- DUMP_BITS_NOCHECK(t, nbits) \ |
+- } |
+- |
+-#else |
+- |
+-#define DUMP_VALUE(ht, codevalue, t, nbits) { \ |
+- size = ht->ehufsi[codevalue]; \ |
+- code = ht->ehufco[codevalue]; \ |
+- t &= ~(-1 << nbits); \ |
+- DUMP_BITS_NOCHECK(code, size) \ |
+- CHECKBUF15() \ |
+- DUMP_BITS_NOCHECK(t, nbits) \ |
+- CHECKBUF15() \ |
+- } |
+- |
+ #endif |
+ |
+-/***************************************************************/ |
+ |
+ #define BUFSIZE (DCTSIZE2 * 2) |
+ |
+-#define LOAD_BUFFER() { \ |
+- if (state->free_in_buffer < BUFSIZE) { \ |
+- localbuf = 1; \ |
+- buffer = _buffer; \ |
+- } \ |
+- else buffer = state->next_output_byte; \ |
++#define LOAD_BUFFER() { \ |
++ if (state->free_in_buffer < BUFSIZE) { \ |
++ localbuf = 1; \ |
++ buffer = _buffer; \ |
++ } \ |
++ else buffer = state->next_output_byte; \ |
+ } |
+ |
+-#define STORE_BUFFER() { \ |
+- if (localbuf) { \ |
+- bytes = buffer - _buffer; \ |
+- buffer = _buffer; \ |
+- while (bytes > 0) { \ |
+- bytestocopy = min(bytes, state->free_in_buffer); \ |
+- MEMCOPY(state->next_output_byte, buffer, bytestocopy); \ |
+- state->next_output_byte += bytestocopy; \ |
+- buffer += bytestocopy; \ |
+- state->free_in_buffer -= bytestocopy; \ |
+- if (state->free_in_buffer == 0) \ |
+- if (! dump_buffer(state)) return FALSE; \ |
+- bytes -= bytestocopy; \ |
+- } \ |
+- } \ |
+- else { \ |
+- state->free_in_buffer -= (buffer - state->next_output_byte); \ |
+- state->next_output_byte = buffer; \ |
+- } \ |
++#define STORE_BUFFER() { \ |
++ if (localbuf) { \ |
++ bytes = buffer - _buffer; \ |
++ buffer = _buffer; \ |
++ while (bytes > 0) { \ |
++ bytestocopy = min(bytes, state->free_in_buffer); \ |
++ MEMCOPY(state->next_output_byte, buffer, bytestocopy); \ |
++ state->next_output_byte += bytestocopy; \ |
++ buffer += bytestocopy; \ |
++ state->free_in_buffer -= bytestocopy; \ |
++ if (state->free_in_buffer == 0) \ |
++ if (! dump_buffer(state)) return FALSE; \ |
++ bytes -= bytestocopy; \ |
++ } \ |
++ } \ |
++ else { \ |
++ state->free_in_buffer -= (buffer - state->next_output_byte); \ |
++ state->next_output_byte = buffer; \ |
++ } \ |
+ } |
+ |
+-/***************************************************************/ |
+ |
+ LOCAL(boolean) |
+ flush_bits (working_state * state) |
+ { |
+- unsigned char _buffer[BUFSIZE], *buffer; |
+- long put_buffer; int put_bits; |
+- int bytes, bytestocopy, localbuf = 0; |
++ JOCTET _buffer[BUFSIZE], *buffer; |
++ size_t put_buffer; int put_bits; |
++ size_t bytes, bytestocopy; int localbuf = 0; |
+ |
+ put_buffer = state->cur.put_buffer; |
+ put_bits = state->cur.put_bits; |
+ LOAD_BUFFER() |
+ |
+- DUMP_BITS_(0x7F, 7) |
++ /* fill any partial byte with ones */ |
++ PUT_BITS(0x7F, 7) |
++ while (put_bits >= 8) EMIT_BYTE() |
+ |
+ state->cur.put_buffer = 0; /* and reset bit-buffer to empty */ |
+ state->cur.put_bits = 0; |
+@@ -505,6 +475,7 @@ |
+ return TRUE; |
+ } |
+ |
++ |
+ /* Encode a single block's worth of coefficients */ |
+ |
+ LOCAL(boolean) |
+@@ -511,13 +482,13 @@ |
+ encode_one_block (working_state * state, JCOEFPTR block, int last_dc_val, |
+ c_derived_tbl *dctbl, c_derived_tbl *actbl) |
+ { |
+- int temp, temp2; |
++ int temp, temp2, temp3; |
+ int nbits; |
+- int r, sflag, size, code; |
+- unsigned char _buffer[BUFSIZE], *buffer; |
+- long put_buffer; int put_bits; |
++ int r, code, size; |
++ JOCTET _buffer[BUFSIZE], *buffer; |
++ size_t put_buffer; int put_bits; |
+ int code_0xf0 = actbl->ehufco[0xf0], size_0xf0 = actbl->ehufsi[0xf0]; |
+- int bytes, bytestocopy, localbuf = 0; |
++ size_t bytes, bytestocopy; int localbuf = 0; |
+ |
+ put_buffer = state->cur.put_buffer; |
+ put_bits = state->cur.put_bits; |
+@@ -527,50 +498,88 @@ |
+ |
+ temp = temp2 = block[0] - last_dc_val; |
+ |
+- sflag = temp >> 31; |
+- temp -= ((temp + temp) & sflag); |
+- temp2 += sflag; |
+- nbits = jpeg_first_bit_table[temp]; |
+- DUMP_VALUE_SLOW(dctbl, nbits, temp2, nbits) |
++ /* This is a well-known technique for obtaining the absolute value without a |
++ * branch. It is derived from an assembly language technique presented in |
++ * "How to Optimize for the Pentium Processors", Copyright (c) 1996, 1997 by |
++ * Agner Fog. |
++ */ |
++ temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); |
++ temp ^= temp3; |
++ temp -= temp3; |
+ |
++ /* For a negative input, want temp2 = bitwise complement of abs(input) */ |
++ /* This code assumes we are on a two's complement machine */ |
++ temp2 += temp3; |
++ |
++ /* Find the number of bits needed for the magnitude of the coefficient */ |
++ nbits = JPEG_NBITS(temp); |
++ |
++ /* Emit the Huffman-coded symbol for the number of bits */ |
++ code = dctbl->ehufco[nbits]; |
++ size = dctbl->ehufsi[nbits]; |
++ PUT_BITS(code, size) |
++ CHECKBUF15() |
++ |
++ /* Mask off any extra bits in code */ |
++ temp2 &= (((INT32) 1)<<nbits) - 1; |
++ |
++ /* Emit that number of bits of the value, if positive, */ |
++ /* or the complement of its magnitude, if negative. */ |
++ PUT_BITS(temp2, nbits) |
++ CHECKBUF15() |
++ |
+ /* Encode the AC coefficients per section F.1.2.2 */ |
+ |
+ r = 0; /* r = run length of zeros */ |
+ |
+-#define innerloop(order) { \ |
+- temp2 = *(JCOEF*)((unsigned char*)block + order); \ |
+- if(temp2 == 0) r++; \ |
+- else { \ |
+- temp = (JCOEF)temp2; \ |
+- sflag = temp >> 31; \ |
+- temp = (temp ^ sflag) - sflag; \ |
+- temp2 += sflag; \ |
+- nbits = jpeg_first_bit_table[temp]; \ |
+- for(; r > 15; r -= 16) DUMP_BITS(code_0xf0, size_0xf0) \ |
+- sflag = (r << 4) + nbits; \ |
+- DUMP_VALUE(actbl, sflag, temp2, nbits) \ |
++/* Manually unroll the k loop to eliminate the counter variable. This |
++ * improves performance greatly on systems with a limited number of |
++ * registers (such as x86.) |
++ */ |
++#define kloop(jpeg_natural_order_of_k) { \ |
++ if ((temp = block[jpeg_natural_order_of_k]) == 0) { \ |
++ r++; \ |
++ } else { \ |
++ temp2 = temp; \ |
++ /* Branch-less absolute value, bitwise complement, etc., same as above */ \ |
++ temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); \ |
++ temp ^= temp3; \ |
++ temp -= temp3; \ |
++ temp2 += temp3; \ |
++ nbits = JPEG_NBITS_NONZERO(temp); \ |
++ /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \ |
++ while (r > 15) { \ |
++ EMIT_BITS(code_0xf0, size_0xf0) \ |
++ r -= 16; \ |
++ } \ |
++ /* Emit Huffman symbol for run length / number of bits */ \ |
++ temp3 = (r << 4) + nbits; \ |
++ code = actbl->ehufco[temp3]; \ |
++ size = actbl->ehufsi[temp3]; \ |
++ EMIT_CODE(code, size) \ |
+ r = 0; \ |
+- }} |
++ } \ |
++} |
+ |
+- innerloop(2*1); innerloop(2*8); innerloop(2*16); innerloop(2*9); |
+- innerloop(2*2); innerloop(2*3); innerloop(2*10); innerloop(2*17); |
+- innerloop(2*24); innerloop(2*32); innerloop(2*25); innerloop(2*18); |
+- innerloop(2*11); innerloop(2*4); innerloop(2*5); innerloop(2*12); |
+- innerloop(2*19); innerloop(2*26); innerloop(2*33); innerloop(2*40); |
+- innerloop(2*48); innerloop(2*41); innerloop(2*34); innerloop(2*27); |
+- innerloop(2*20); innerloop(2*13); innerloop(2*6); innerloop(2*7); |
+- innerloop(2*14); innerloop(2*21); innerloop(2*28); innerloop(2*35); |
+- innerloop(2*42); innerloop(2*49); innerloop(2*56); innerloop(2*57); |
+- innerloop(2*50); innerloop(2*43); innerloop(2*36); innerloop(2*29); |
+- innerloop(2*22); innerloop(2*15); innerloop(2*23); innerloop(2*30); |
+- innerloop(2*37); innerloop(2*44); innerloop(2*51); innerloop(2*58); |
+- innerloop(2*59); innerloop(2*52); innerloop(2*45); innerloop(2*38); |
+- innerloop(2*31); innerloop(2*39); innerloop(2*46); innerloop(2*53); |
+- innerloop(2*60); innerloop(2*61); innerloop(2*54); innerloop(2*47); |
+- innerloop(2*55); innerloop(2*62); innerloop(2*63); |
++ /* One iteration for each value in jpeg_natural_order[] */ |
++ kloop(1); kloop(8); kloop(16); kloop(9); kloop(2); kloop(3); |
++ kloop(10); kloop(17); kloop(24); kloop(32); kloop(25); kloop(18); |
++ kloop(11); kloop(4); kloop(5); kloop(12); kloop(19); kloop(26); |
++ kloop(33); kloop(40); kloop(48); kloop(41); kloop(34); kloop(27); |
++ kloop(20); kloop(13); kloop(6); kloop(7); kloop(14); kloop(21); |
++ kloop(28); kloop(35); kloop(42); kloop(49); kloop(56); kloop(57); |
++ kloop(50); kloop(43); kloop(36); kloop(29); kloop(22); kloop(15); |
++ kloop(23); kloop(30); kloop(37); kloop(44); kloop(51); kloop(58); |
++ kloop(59); kloop(52); kloop(45); kloop(38); kloop(31); kloop(39); |
++ kloop(46); kloop(53); kloop(60); kloop(61); kloop(54); kloop(47); |
++ kloop(55); kloop(62); kloop(63); |
+ |
+ /* If the last coef(s) were zero, emit an end-of-block code */ |
+- if (r > 0) DUMP_SINGLE_VALUE(actbl, 0x0) |
++ if (r > 0) { |
++ code = actbl->ehufco[0]; |
++ size = actbl->ehufsi[0]; |
++ EMIT_BITS(code, size) |
++ } |
+ |
+ state->cur.put_buffer = put_buffer; |
+ state->cur.put_bits = put_bits; |
+Index: jcinit.c |
+=================================================================== |
+--- jcinit.c (revision 829) |
++++ jcinit.c (working copy) |
+@@ -42,7 +42,11 @@ |
+ jinit_forward_dct(cinfo); |
+ /* Entropy encoding: either Huffman or arithmetic coding. */ |
+ if (cinfo->arith_code) { |
++#ifdef C_ARITH_CODING_SUPPORTED |
++ jinit_arith_encoder(cinfo); |
++#else |
+ ERREXIT(cinfo, JERR_ARITH_NOTIMPL); |
++#endif |
+ } else { |
+ if (cinfo->progressive_mode) { |
+ #ifdef C_PROGRESSIVE_SUPPORTED |
+Index: jcmainct.c |
+=================================================================== |
+--- jcmainct.c (revision 829) |
++++ jcmainct.c (working copy) |
+@@ -68,32 +68,32 @@ |
+ METHODDEF(void) |
+ start_pass_main (j_compress_ptr cinfo, J_BUF_MODE pass_mode) |
+ { |
+- my_main_ptr main = (my_main_ptr) cinfo->main; |
++ my_main_ptr main_ptr = (my_main_ptr) cinfo->main; |
+ |
+ /* Do nothing in raw-data mode. */ |
+ if (cinfo->raw_data_in) |
+ return; |
+ |
+- main->cur_iMCU_row = 0; /* initialize counters */ |
+- main->rowgroup_ctr = 0; |
+- main->suspended = FALSE; |
+- main->pass_mode = pass_mode; /* save mode for use by process_data */ |
++ main_ptr->cur_iMCU_row = 0; /* initialize counters */ |
++ main_ptr->rowgroup_ctr = 0; |
++ main_ptr->suspended = FALSE; |
++ main_ptr->pass_mode = pass_mode; /* save mode for use by process_data */ |
+ |
+ switch (pass_mode) { |
+ case JBUF_PASS_THRU: |
+ #ifdef FULL_MAIN_BUFFER_SUPPORTED |
+- if (main->whole_image[0] != NULL) |
++ if (main_ptr->whole_image[0] != NULL) |
+ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); |
+ #endif |
+- main->pub.process_data = process_data_simple_main; |
++ main_ptr->pub.process_data = process_data_simple_main; |
+ break; |
+ #ifdef FULL_MAIN_BUFFER_SUPPORTED |
+ case JBUF_SAVE_SOURCE: |
+ case JBUF_CRANK_DEST: |
+ case JBUF_SAVE_AND_PASS: |
+- if (main->whole_image[0] == NULL) |
++ if (main_ptr->whole_image[0] == NULL) |
+ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); |
+- main->pub.process_data = process_data_buffer_main; |
++ main_ptr->pub.process_data = process_data_buffer_main; |
+ break; |
+ #endif |
+ default: |
+@@ -114,14 +114,14 @@ |
+ JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, |
+ JDIMENSION in_rows_avail) |
+ { |
+- my_main_ptr main = (my_main_ptr) cinfo->main; |
++ my_main_ptr main_ptr = (my_main_ptr) cinfo->main; |
+ |
+- while (main->cur_iMCU_row < cinfo->total_iMCU_rows) { |
++ while (main_ptr->cur_iMCU_row < cinfo->total_iMCU_rows) { |
+ /* Read input data if we haven't filled the main buffer yet */ |
+- if (main->rowgroup_ctr < DCTSIZE) |
++ if (main_ptr->rowgroup_ctr < DCTSIZE) |
+ (*cinfo->prep->pre_process_data) (cinfo, |
+ input_buf, in_row_ctr, in_rows_avail, |
+- main->buffer, &main->rowgroup_ctr, |
++ main_ptr->buffer, &main_ptr->rowgroup_ctr, |
+ (JDIMENSION) DCTSIZE); |
+ |
+ /* If we don't have a full iMCU row buffered, return to application for |
+@@ -128,11 +128,11 @@ |
+ * more data. Note that preprocessor will always pad to fill the iMCU row |
+ * at the bottom of the image. |
+ */ |
+- if (main->rowgroup_ctr != DCTSIZE) |
++ if (main_ptr->rowgroup_ctr != DCTSIZE) |
+ return; |
+ |
+ /* Send the completed row to the compressor */ |
+- if (! (*cinfo->coef->compress_data) (cinfo, main->buffer)) { |
++ if (! (*cinfo->coef->compress_data) (cinfo, main_ptr->buffer)) { |
+ /* If compressor did not consume the whole row, then we must need to |
+ * suspend processing and return to the application. In this situation |
+ * we pretend we didn't yet consume the last input row; otherwise, if |
+@@ -139,9 +139,9 @@ |
+ * it happened to be the last row of the image, the application would |
+ * think we were done. |
+ */ |
+- if (! main->suspended) { |
++ if (! main_ptr->suspended) { |
+ (*in_row_ctr)--; |
+- main->suspended = TRUE; |
++ main_ptr->suspended = TRUE; |
+ } |
+ return; |
+ } |
+@@ -148,12 +148,12 @@ |
+ /* We did finish the row. Undo our little suspension hack if a previous |
+ * call suspended; then mark the main buffer empty. |
+ */ |
+- if (main->suspended) { |
++ if (main_ptr->suspended) { |
+ (*in_row_ctr)++; |
+- main->suspended = FALSE; |
++ main_ptr->suspended = FALSE; |
+ } |
+- main->rowgroup_ctr = 0; |
+- main->cur_iMCU_row++; |
++ main_ptr->rowgroup_ctr = 0; |
++ main_ptr->cur_iMCU_row++; |
+ } |
+ } |
+ |
+@@ -170,25 +170,25 @@ |
+ JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, |
+ JDIMENSION in_rows_avail) |
+ { |
+- my_main_ptr main = (my_main_ptr) cinfo->main; |
++ my_main_ptr main_ptr = (my_main_ptr) cinfo->main; |
+ int ci; |
+ jpeg_component_info *compptr; |
+- boolean writing = (main->pass_mode != JBUF_CRANK_DEST); |
++ boolean writing = (main_ptr->pass_mode != JBUF_CRANK_DEST); |
+ |
+- while (main->cur_iMCU_row < cinfo->total_iMCU_rows) { |
++ while (main_ptr->cur_iMCU_row < cinfo->total_iMCU_rows) { |
+ /* Realign the virtual buffers if at the start of an iMCU row. */ |
+- if (main->rowgroup_ctr == 0) { |
++ if (main_ptr->rowgroup_ctr == 0) { |
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; |
+ ci++, compptr++) { |
+- main->buffer[ci] = (*cinfo->mem->access_virt_sarray) |
+- ((j_common_ptr) cinfo, main->whole_image[ci], |
+- main->cur_iMCU_row * (compptr->v_samp_factor * DCTSIZE), |
++ main_ptr->buffer[ci] = (*cinfo->mem->access_virt_sarray) |
++ ((j_common_ptr) cinfo, main_ptr->whole_image[ci], |
++ main_ptr->cur_iMCU_row * (compptr->v_samp_factor * DCTSIZE), |
+ (JDIMENSION) (compptr->v_samp_factor * DCTSIZE), writing); |
+ } |
+ /* In a read pass, pretend we just read some source data. */ |
+ if (! writing) { |
+ *in_row_ctr += cinfo->max_v_samp_factor * DCTSIZE; |
+- main->rowgroup_ctr = DCTSIZE; |
++ main_ptr->rowgroup_ctr = DCTSIZE; |
+ } |
+ } |
+ |
+@@ -197,16 +197,16 @@ |
+ if (writing) { |
+ (*cinfo->prep->pre_process_data) (cinfo, |
+ input_buf, in_row_ctr, in_rows_avail, |
+- main->buffer, &main->rowgroup_ctr, |
++ main_ptr->buffer, &main_ptr->rowgroup_ctr, |
+ (JDIMENSION) DCTSIZE); |
+ /* Return to application if we need more data to fill the iMCU row. */ |
+- if (main->rowgroup_ctr < DCTSIZE) |
++ if (main_ptr->rowgroup_ctr < DCTSIZE) |
+ return; |
+ } |
+ |
+ /* Emit data, unless this is a sink-only pass. */ |
+- if (main->pass_mode != JBUF_SAVE_SOURCE) { |
+- if (! (*cinfo->coef->compress_data) (cinfo, main->buffer)) { |
++ if (main_ptr->pass_mode != JBUF_SAVE_SOURCE) { |
++ if (! (*cinfo->coef->compress_data) (cinfo, main_ptr->buffer)) { |
+ /* If compressor did not consume the whole row, then we must need to |
+ * suspend processing and return to the application. In this situation |
+ * we pretend we didn't yet consume the last input row; otherwise, if |
+@@ -213,9 +213,9 @@ |
+ * it happened to be the last row of the image, the application would |
+ * think we were done. |
+ */ |
+- if (! main->suspended) { |
++ if (! main_ptr->suspended) { |
+ (*in_row_ctr)--; |
+- main->suspended = TRUE; |
++ main_ptr->suspended = TRUE; |
+ } |
+ return; |
+ } |
+@@ -222,15 +222,15 @@ |
+ /* We did finish the row. Undo our little suspension hack if a previous |
+ * call suspended; then mark the main buffer empty. |
+ */ |
+- if (main->suspended) { |
++ if (main_ptr->suspended) { |
+ (*in_row_ctr)++; |
+- main->suspended = FALSE; |
++ main_ptr->suspended = FALSE; |
+ } |
+ } |
+ |
+ /* If get here, we are done with this iMCU row. Mark buffer empty. */ |
+- main->rowgroup_ctr = 0; |
+- main->cur_iMCU_row++; |
++ main_ptr->rowgroup_ctr = 0; |
++ main_ptr->cur_iMCU_row++; |
+ } |
+ } |
+ |
+@@ -244,15 +244,15 @@ |
+ GLOBAL(void) |
+ jinit_c_main_controller (j_compress_ptr cinfo, boolean need_full_buffer) |
+ { |
+- my_main_ptr main; |
++ my_main_ptr main_ptr; |
+ int ci; |
+ jpeg_component_info *compptr; |
+ |
+- main = (my_main_ptr) |
++ main_ptr = (my_main_ptr) |
+ (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, |
+ SIZEOF(my_main_controller)); |
+- cinfo->main = (struct jpeg_c_main_controller *) main; |
+- main->pub.start_pass = start_pass_main; |
++ cinfo->main = (struct jpeg_c_main_controller *) main_ptr; |
++ main_ptr->pub.start_pass = start_pass_main; |
+ |
+ /* We don't need to create a buffer in raw-data mode. */ |
+ if (cinfo->raw_data_in) |
+@@ -267,7 +267,7 @@ |
+ /* Note we pad the bottom to a multiple of the iMCU height */ |
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; |
+ ci++, compptr++) { |
+- main->whole_image[ci] = (*cinfo->mem->request_virt_sarray) |
++ main_ptr->whole_image[ci] = (*cinfo->mem->request_virt_sarray) |
+ ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, |
+ compptr->width_in_blocks * DCTSIZE, |
+ (JDIMENSION) jround_up((long) compptr->height_in_blocks, |
+@@ -279,12 +279,12 @@ |
+ #endif |
+ } else { |
+ #ifdef FULL_MAIN_BUFFER_SUPPORTED |
+- main->whole_image[0] = NULL; /* flag for no virtual arrays */ |
++ main_ptr->whole_image[0] = NULL; /* flag for no virtual arrays */ |
+ #endif |
+ /* Allocate a strip buffer for each component */ |
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; |
+ ci++, compptr++) { |
+- main->buffer[ci] = (*cinfo->mem->alloc_sarray) |
++ main_ptr->buffer[ci] = (*cinfo->mem->alloc_sarray) |
+ ((j_common_ptr) cinfo, JPOOL_IMAGE, |
+ compptr->width_in_blocks * DCTSIZE, |
+ (JDIMENSION) (compptr->v_samp_factor * DCTSIZE)); |
+Index: jcmarker.c |
+=================================================================== |
+--- jcmarker.c (revision 829) |
++++ jcmarker.c (working copy) |
+@@ -1,8 +1,11 @@ |
+ /* |
+ * jcmarker.c |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1991-1998, Thomas G. Lane. |
+- * This file is part of the Independent JPEG Group's software. |
++ * Modified 2003-2010 by Guido Vollbeding. |
++ * libjpeg-turbo Modifications: |
++ * Copyright (C) 2010, D. R. Commander. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains routines to write JPEG datastream markers. |
+@@ -11,6 +14,7 @@ |
+ #define JPEG_INTERNALS |
+ #include "jinclude.h" |
+ #include "jpeglib.h" |
++#include "jpegcomp.h" |
+ |
+ |
+ typedef enum { /* JPEG marker codes */ |
+@@ -18,24 +22,24 @@ |
+ M_SOF1 = 0xc1, |
+ M_SOF2 = 0xc2, |
+ M_SOF3 = 0xc3, |
+- |
++ |
+ M_SOF5 = 0xc5, |
+ M_SOF6 = 0xc6, |
+ M_SOF7 = 0xc7, |
+- |
++ |
+ M_JPG = 0xc8, |
+ M_SOF9 = 0xc9, |
+ M_SOF10 = 0xca, |
+ M_SOF11 = 0xcb, |
+- |
++ |
+ M_SOF13 = 0xcd, |
+ M_SOF14 = 0xce, |
+ M_SOF15 = 0xcf, |
+- |
++ |
+ M_DHT = 0xc4, |
+- |
++ |
+ M_DAC = 0xcc, |
+- |
++ |
+ M_RST0 = 0xd0, |
+ M_RST1 = 0xd1, |
+ M_RST2 = 0xd2, |
+@@ -44,7 +48,7 @@ |
+ M_RST5 = 0xd5, |
+ M_RST6 = 0xd6, |
+ M_RST7 = 0xd7, |
+- |
++ |
+ M_SOI = 0xd8, |
+ M_EOI = 0xd9, |
+ M_SOS = 0xda, |
+@@ -53,7 +57,7 @@ |
+ M_DRI = 0xdd, |
+ M_DHP = 0xde, |
+ M_EXP = 0xdf, |
+- |
++ |
+ M_APP0 = 0xe0, |
+ M_APP1 = 0xe1, |
+ M_APP2 = 0xe2, |
+@@ -70,13 +74,13 @@ |
+ M_APP13 = 0xed, |
+ M_APP14 = 0xee, |
+ M_APP15 = 0xef, |
+- |
++ |
+ M_JPG0 = 0xf0, |
+ M_JPG13 = 0xfd, |
+ M_COM = 0xfe, |
+- |
++ |
+ M_TEM = 0x01, |
+- |
++ |
+ M_ERROR = 0x100 |
+ } JPEG_MARKER; |
+ |
+@@ -229,33 +233,39 @@ |
+ char ac_in_use[NUM_ARITH_TBLS]; |
+ int length, i; |
+ jpeg_component_info *compptr; |
+- |
++ |
+ for (i = 0; i < NUM_ARITH_TBLS; i++) |
+ dc_in_use[i] = ac_in_use[i] = 0; |
+- |
++ |
+ for (i = 0; i < cinfo->comps_in_scan; i++) { |
+ compptr = cinfo->cur_comp_info[i]; |
+- dc_in_use[compptr->dc_tbl_no] = 1; |
+- ac_in_use[compptr->ac_tbl_no] = 1; |
++ /* DC needs no table for refinement scan */ |
++ if (cinfo->Ss == 0 && cinfo->Ah == 0) |
++ dc_in_use[compptr->dc_tbl_no] = 1; |
++ /* AC needs no table when not present */ |
++ if (cinfo->Se) |
++ ac_in_use[compptr->ac_tbl_no] = 1; |
+ } |
+- |
++ |
+ length = 0; |
+ for (i = 0; i < NUM_ARITH_TBLS; i++) |
+ length += dc_in_use[i] + ac_in_use[i]; |
+- |
+- emit_marker(cinfo, M_DAC); |
+- |
+- emit_2bytes(cinfo, length*2 + 2); |
+- |
+- for (i = 0; i < NUM_ARITH_TBLS; i++) { |
+- if (dc_in_use[i]) { |
+- emit_byte(cinfo, i); |
+- emit_byte(cinfo, cinfo->arith_dc_L[i] + (cinfo->arith_dc_U[i]<<4)); |
++ |
++ if (length) { |
++ emit_marker(cinfo, M_DAC); |
++ |
++ emit_2bytes(cinfo, length*2 + 2); |
++ |
++ for (i = 0; i < NUM_ARITH_TBLS; i++) { |
++ if (dc_in_use[i]) { |
++ emit_byte(cinfo, i); |
++ emit_byte(cinfo, cinfo->arith_dc_L[i] + (cinfo->arith_dc_U[i]<<4)); |
++ } |
++ if (ac_in_use[i]) { |
++ emit_byte(cinfo, i + 0x10); |
++ emit_byte(cinfo, cinfo->arith_ac_K[i]); |
++ } |
+ } |
+- if (ac_in_use[i]) { |
+- emit_byte(cinfo, i + 0x10); |
+- emit_byte(cinfo, cinfo->arith_ac_K[i]); |
+- } |
+ } |
+ #endif /* C_ARITH_CODING_SUPPORTED */ |
+ } |
+@@ -285,13 +295,13 @@ |
+ emit_2bytes(cinfo, 3 * cinfo->num_components + 2 + 5 + 1); /* length */ |
+ |
+ /* Make sure image isn't bigger than SOF field can handle */ |
+- if ((long) cinfo->image_height > 65535L || |
+- (long) cinfo->image_width > 65535L) |
++ if ((long) cinfo->_jpeg_height > 65535L || |
++ (long) cinfo->_jpeg_width > 65535L) |
+ ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) 65535); |
+ |
+ emit_byte(cinfo, cinfo->data_precision); |
+- emit_2bytes(cinfo, (int) cinfo->image_height); |
+- emit_2bytes(cinfo, (int) cinfo->image_width); |
++ emit_2bytes(cinfo, (int) cinfo->_jpeg_height); |
++ emit_2bytes(cinfo, (int) cinfo->_jpeg_width); |
+ |
+ emit_byte(cinfo, cinfo->num_components); |
+ |
+@@ -320,22 +330,16 @@ |
+ for (i = 0; i < cinfo->comps_in_scan; i++) { |
+ compptr = cinfo->cur_comp_info[i]; |
+ emit_byte(cinfo, compptr->component_id); |
+- td = compptr->dc_tbl_no; |
+- ta = compptr->ac_tbl_no; |
+- if (cinfo->progressive_mode) { |
+- /* Progressive mode: only DC or only AC tables are used in one scan; |
+- * furthermore, Huffman coding of DC refinement uses no table at all. |
+- * We emit 0 for unused field(s); this is recommended by the P&M text |
+- * but does not seem to be specified in the standard. |
+- */ |
+- if (cinfo->Ss == 0) { |
+- ta = 0; /* DC scan */ |
+- if (cinfo->Ah != 0 && !cinfo->arith_code) |
+- td = 0; /* no DC table either */ |
+- } else { |
+- td = 0; /* AC scan */ |
+- } |
+- } |
++ |
++ /* We emit 0 for unused field(s); this is recommended by the P&M text |
++ * but does not seem to be specified in the standard. |
++ */ |
++ |
++ /* DC needs no table for refinement scan */ |
++ td = cinfo->Ss == 0 && cinfo->Ah == 0 ? compptr->dc_tbl_no : 0; |
++ /* AC needs no table when not present */ |
++ ta = cinfo->Se ? compptr->ac_tbl_no : 0; |
++ |
+ emit_byte(cinfo, (td << 4) + ta); |
+ } |
+ |
+@@ -529,7 +533,10 @@ |
+ |
+ /* Emit the proper SOF marker */ |
+ if (cinfo->arith_code) { |
+- emit_sof(cinfo, M_SOF9); /* SOF code for arithmetic coding */ |
++ if (cinfo->progressive_mode) |
++ emit_sof(cinfo, M_SOF10); /* SOF code for progressive arithmetic */ |
++ else |
++ emit_sof(cinfo, M_SOF9); /* SOF code for sequential arithmetic */ |
+ } else { |
+ if (cinfo->progressive_mode) |
+ emit_sof(cinfo, M_SOF2); /* SOF code for progressive Huffman */ |
+@@ -566,19 +573,12 @@ |
+ */ |
+ for (i = 0; i < cinfo->comps_in_scan; i++) { |
+ compptr = cinfo->cur_comp_info[i]; |
+- if (cinfo->progressive_mode) { |
+- /* Progressive mode: only DC or only AC tables are used in one scan */ |
+- if (cinfo->Ss == 0) { |
+- if (cinfo->Ah == 0) /* DC needs no table for refinement scan */ |
+- emit_dht(cinfo, compptr->dc_tbl_no, FALSE); |
+- } else { |
+- emit_dht(cinfo, compptr->ac_tbl_no, TRUE); |
+- } |
+- } else { |
+- /* Sequential mode: need both DC and AC tables */ |
++ /* DC needs no table for refinement scan */ |
++ if (cinfo->Ss == 0 && cinfo->Ah == 0) |
+ emit_dht(cinfo, compptr->dc_tbl_no, FALSE); |
++ /* AC needs no table when not present */ |
++ if (cinfo->Se) |
+ emit_dht(cinfo, compptr->ac_tbl_no, TRUE); |
+- } |
+ } |
+ } |
+ |
+Index: jcmaster.c |
+=================================================================== |
+--- jcmaster.c (revision 829) |
++++ jcmaster.c (working copy) |
+@@ -1,8 +1,11 @@ |
+ /* |
+ * jcmaster.c |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1991-1997, Thomas G. Lane. |
+- * This file is part of the Independent JPEG Group's software. |
++ * Modified 2003-2010 by Guido Vollbeding. |
++ * libjpeg-turbo Modifications: |
++ * Copyright (C) 2010, D. R. Commander. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains master control logic for the JPEG compressor. |
+@@ -14,6 +17,7 @@ |
+ #define JPEG_INTERNALS |
+ #include "jinclude.h" |
+ #include "jpeglib.h" |
++#include "jpegcomp.h" |
+ |
+ |
+ /* Private state */ |
+@@ -42,8 +46,28 @@ |
+ * Support routines that do various essential calculations. |
+ */ |
+ |
++#if JPEG_LIB_VERSION >= 70 |
++/* |
++ * Compute JPEG image dimensions and related values. |
++ * NOTE: this is exported for possible use by application. |
++ * Hence it mustn't do anything that can't be done twice. |
++ */ |
++ |
++GLOBAL(void) |
++jpeg_calc_jpeg_dimensions (j_compress_ptr cinfo) |
++/* Do computations that are needed before master selection phase */ |
++{ |
++ /* Hardwire it to "no scaling" */ |
++ cinfo->jpeg_width = cinfo->image_width; |
++ cinfo->jpeg_height = cinfo->image_height; |
++ cinfo->min_DCT_h_scaled_size = DCTSIZE; |
++ cinfo->min_DCT_v_scaled_size = DCTSIZE; |
++} |
++#endif |
++ |
++ |
+ LOCAL(void) |
+-initial_setup (j_compress_ptr cinfo) |
++initial_setup (j_compress_ptr cinfo, boolean transcode_only) |
+ /* Do computations that are needed before master selection phase */ |
+ { |
+ int ci; |
+@@ -51,14 +75,21 @@ |
+ long samplesperrow; |
+ JDIMENSION jd_samplesperrow; |
+ |
++#if JPEG_LIB_VERSION >= 70 |
++#if JPEG_LIB_VERSION >= 80 |
++ if (!transcode_only) |
++#endif |
++ jpeg_calc_jpeg_dimensions(cinfo); |
++#endif |
++ |
+ /* Sanity check on image dimensions */ |
+- if (cinfo->image_height <= 0 || cinfo->image_width <= 0 |
++ if (cinfo->_jpeg_height <= 0 || cinfo->_jpeg_width <= 0 |
+ || cinfo->num_components <= 0 || cinfo->input_components <= 0) |
+ ERREXIT(cinfo, JERR_EMPTY_IMAGE); |
+ |
+ /* Make sure image isn't bigger than I can handle */ |
+- if ((long) cinfo->image_height > (long) JPEG_MAX_DIMENSION || |
+- (long) cinfo->image_width > (long) JPEG_MAX_DIMENSION) |
++ if ((long) cinfo->_jpeg_height > (long) JPEG_MAX_DIMENSION || |
++ (long) cinfo->_jpeg_width > (long) JPEG_MAX_DIMENSION) |
+ ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION); |
+ |
+ /* Width of an input scanline must be representable as JDIMENSION. */ |
+@@ -96,20 +127,24 @@ |
+ /* Fill in the correct component_index value; don't rely on application */ |
+ compptr->component_index = ci; |
+ /* For compression, we never do DCT scaling. */ |
++#if JPEG_LIB_VERSION >= 70 |
++ compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = DCTSIZE; |
++#else |
+ compptr->DCT_scaled_size = DCTSIZE; |
++#endif |
+ /* Size in DCT blocks */ |
+ compptr->width_in_blocks = (JDIMENSION) |
+- jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor, |
++ jdiv_round_up((long) cinfo->_jpeg_width * (long) compptr->h_samp_factor, |
+ (long) (cinfo->max_h_samp_factor * DCTSIZE)); |
+ compptr->height_in_blocks = (JDIMENSION) |
+- jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor, |
++ jdiv_round_up((long) cinfo->_jpeg_height * (long) compptr->v_samp_factor, |
+ (long) (cinfo->max_v_samp_factor * DCTSIZE)); |
+ /* Size in samples */ |
+ compptr->downsampled_width = (JDIMENSION) |
+- jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor, |
++ jdiv_round_up((long) cinfo->_jpeg_width * (long) compptr->h_samp_factor, |
+ (long) cinfo->max_h_samp_factor); |
+ compptr->downsampled_height = (JDIMENSION) |
+- jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor, |
++ jdiv_round_up((long) cinfo->_jpeg_height * (long) compptr->v_samp_factor, |
+ (long) cinfo->max_v_samp_factor); |
+ /* Mark component needed (this flag isn't actually used for compression) */ |
+ compptr->component_needed = TRUE; |
+@@ -119,7 +154,7 @@ |
+ * main controller will call coefficient controller). |
+ */ |
+ cinfo->total_iMCU_rows = (JDIMENSION) |
+- jdiv_round_up((long) cinfo->image_height, |
++ jdiv_round_up((long) cinfo->_jpeg_height, |
+ (long) (cinfo->max_v_samp_factor*DCTSIZE)); |
+ } |
+ |
+@@ -347,10 +382,10 @@ |
+ |
+ /* Overall image size in MCUs */ |
+ cinfo->MCUs_per_row = (JDIMENSION) |
+- jdiv_round_up((long) cinfo->image_width, |
++ jdiv_round_up((long) cinfo->_jpeg_width, |
+ (long) (cinfo->max_h_samp_factor*DCTSIZE)); |
+ cinfo->MCU_rows_in_scan = (JDIMENSION) |
+- jdiv_round_up((long) cinfo->image_height, |
++ jdiv_round_up((long) cinfo->_jpeg_height, |
+ (long) (cinfo->max_v_samp_factor*DCTSIZE)); |
+ |
+ cinfo->blocks_in_MCU = 0; |
+@@ -554,7 +589,7 @@ |
+ master->pub.is_last_pass = FALSE; |
+ |
+ /* Validate parameters, determine derived values */ |
+- initial_setup(cinfo); |
++ initial_setup(cinfo, transcode_only); |
+ |
+ if (cinfo->scan_info != NULL) { |
+ #ifdef C_MULTISCAN_FILES_SUPPORTED |
+@@ -567,7 +602,7 @@ |
+ cinfo->num_scans = 1; |
+ } |
+ |
+- if (cinfo->progressive_mode) /* TEMPORARY HACK ??? */ |
++ if (cinfo->progressive_mode && !cinfo->arith_code) /* TEMPORARY HACK ??? */ |
+ cinfo->optimize_coding = TRUE; /* assume default tables no good for progressive mode */ |
+ |
+ /* Initialize my private state */ |
+Index: jcparam.c |
+=================================================================== |
+--- jcparam.c (revision 829) |
++++ jcparam.c (working copy) |
+@@ -1,9 +1,11 @@ |
+ /* |
+ * jcparam.c |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1991-1998, Thomas G. Lane. |
+- * Copyright (C) 2009, D. R. Commander. |
+- * This file is part of the Independent JPEG Group's software. |
++ * Modified 2003-2008 by Guido Vollbeding. |
++ * libjpeg-turbo Modifications: |
++ * Copyright (C) 2009-2011, D. R. Commander. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains optional default-setting code for the JPEG compressor. |
+@@ -61,7 +63,50 @@ |
+ } |
+ |
+ |
++/* These are the sample quantization tables given in JPEG spec section K.1. |
++ * The spec says that the values given produce "good" quality, and |
++ * when divided by 2, "very good" quality. |
++ */ |
++static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = { |
++ 16, 11, 10, 16, 24, 40, 51, 61, |
++ 12, 12, 14, 19, 26, 58, 60, 55, |
++ 14, 13, 16, 24, 40, 57, 69, 56, |
++ 14, 17, 22, 29, 51, 87, 80, 62, |
++ 18, 22, 37, 56, 68, 109, 103, 77, |
++ 24, 35, 55, 64, 81, 104, 113, 92, |
++ 49, 64, 78, 87, 103, 121, 120, 101, |
++ 72, 92, 95, 98, 112, 100, 103, 99 |
++}; |
++static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = { |
++ 17, 18, 24, 47, 99, 99, 99, 99, |
++ 18, 21, 26, 66, 99, 99, 99, 99, |
++ 24, 26, 56, 99, 99, 99, 99, 99, |
++ 47, 66, 99, 99, 99, 99, 99, 99, |
++ 99, 99, 99, 99, 99, 99, 99, 99, |
++ 99, 99, 99, 99, 99, 99, 99, 99, |
++ 99, 99, 99, 99, 99, 99, 99, 99, |
++ 99, 99, 99, 99, 99, 99, 99, 99 |
++}; |
++ |
++ |
++#if JPEG_LIB_VERSION >= 70 |
+ GLOBAL(void) |
++jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline) |
++/* Set or change the 'quality' (quantization) setting, using default tables |
++ * and straight percentage-scaling quality scales. |
++ * This entry point allows different scalings for luminance and chrominance. |
++ */ |
++{ |
++ /* Set up two quantization tables using the specified scaling */ |
++ jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl, |
++ cinfo->q_scale_factor[0], force_baseline); |
++ jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl, |
++ cinfo->q_scale_factor[1], force_baseline); |
++} |
++#endif |
++ |
++ |
++GLOBAL(void) |
+ jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor, |
+ boolean force_baseline) |
+ /* Set or change the 'quality' (quantization) setting, using default tables |
+@@ -70,31 +115,6 @@ |
+ * applications that insist on a linear percentage scaling. |
+ */ |
+ { |
+- /* These are the sample quantization tables given in JPEG spec section K.1. |
+- * The spec says that the values given produce "good" quality, and |
+- * when divided by 2, "very good" quality. |
+- */ |
+- static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = { |
+- 16, 11, 10, 16, 24, 40, 51, 61, |
+- 12, 12, 14, 19, 26, 58, 60, 55, |
+- 14, 13, 16, 24, 40, 57, 69, 56, |
+- 14, 17, 22, 29, 51, 87, 80, 62, |
+- 18, 22, 37, 56, 68, 109, 103, 77, |
+- 24, 35, 55, 64, 81, 104, 113, 92, |
+- 49, 64, 78, 87, 103, 121, 120, 101, |
+- 72, 92, 95, 98, 112, 100, 103, 99 |
+- }; |
+- static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = { |
+- 17, 18, 24, 47, 99, 99, 99, 99, |
+- 18, 21, 26, 66, 99, 99, 99, 99, |
+- 24, 26, 56, 99, 99, 99, 99, 99, |
+- 47, 66, 99, 99, 99, 99, 99, 99, |
+- 99, 99, 99, 99, 99, 99, 99, 99, |
+- 99, 99, 99, 99, 99, 99, 99, 99, |
+- 99, 99, 99, 99, 99, 99, 99, 99, |
+- 99, 99, 99, 99, 99, 99, 99, 99 |
+- }; |
+- |
+ /* Set up two quantization tables using the specified scaling */ |
+ jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl, |
+ scale_factor, force_baseline); |
+@@ -285,6 +305,10 @@ |
+ |
+ /* Initialize everything not dependent on the color space */ |
+ |
++#if JPEG_LIB_VERSION >= 70 |
++ cinfo->scale_num = 1; /* 1:1 scaling */ |
++ cinfo->scale_denom = 1; |
++#endif |
+ cinfo->data_precision = BITS_IN_JSAMPLE; |
+ /* Set up two quantization tables using default quality of 75 */ |
+ jpeg_set_quality(cinfo, 75, TRUE); |
+@@ -321,6 +345,11 @@ |
+ /* By default, use the simpler non-cosited sampling alignment */ |
+ cinfo->CCIR601_sampling = FALSE; |
+ |
++#if JPEG_LIB_VERSION >= 70 |
++ /* By default, apply fancy downsampling */ |
++ cinfo->do_fancy_downsampling = TRUE; |
++#endif |
++ |
+ /* No input smoothing */ |
+ cinfo->smoothing_factor = 0; |
+ |
+@@ -370,6 +399,10 @@ |
+ case JCS_EXT_BGRX: |
+ case JCS_EXT_XBGR: |
+ case JCS_EXT_XRGB: |
++ case JCS_EXT_RGBA: |
++ case JCS_EXT_BGRA: |
++ case JCS_EXT_ABGR: |
++ case JCS_EXT_ARGB: |
+ jpeg_set_colorspace(cinfo, JCS_YCbCr); |
+ break; |
+ case JCS_YCbCr: |
+Index: jctrans.c |
+=================================================================== |
+--- jctrans.c (revision 829) |
++++ jctrans.c (working copy) |
+@@ -2,6 +2,7 @@ |
+ * jctrans.c |
+ * |
+ * Copyright (C) 1995-1998, Thomas G. Lane. |
++ * Modified 2000-2009 by Guido Vollbeding. |
+ * This file is part of the Independent JPEG Group's software. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+@@ -76,6 +77,12 @@ |
+ dstinfo->image_height = srcinfo->image_height; |
+ dstinfo->input_components = srcinfo->num_components; |
+ dstinfo->in_color_space = srcinfo->jpeg_color_space; |
++#if JPEG_LIB_VERSION >= 70 |
++ dstinfo->jpeg_width = srcinfo->output_width; |
++ dstinfo->jpeg_height = srcinfo->output_height; |
++ dstinfo->min_DCT_h_scaled_size = srcinfo->min_DCT_h_scaled_size; |
++ dstinfo->min_DCT_v_scaled_size = srcinfo->min_DCT_v_scaled_size; |
++#endif |
+ /* Initialize all parameters to default values */ |
+ jpeg_set_defaults(dstinfo); |
+ /* jpeg_set_defaults may choose wrong colorspace, eg YCbCr if input is RGB. |
+@@ -167,7 +174,11 @@ |
+ |
+ /* Entropy encoding: either Huffman or arithmetic coding. */ |
+ if (cinfo->arith_code) { |
++#ifdef C_ARITH_CODING_SUPPORTED |
++ jinit_arith_encoder(cinfo); |
++#else |
+ ERREXIT(cinfo, JERR_ARITH_NOTIMPL); |
++#endif |
+ } else { |
+ if (cinfo->progressive_mode) { |
+ #ifdef C_PROGRESSIVE_SUPPORTED |
+Index: jdapistd.c |
+=================================================================== |
+--- jdapistd.c (revision 829) |
++++ jdapistd.c (working copy) |
+@@ -1,8 +1,11 @@ |
+ /* |
+ * jdapistd.c |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1994-1996, Thomas G. Lane. |
+- * This file is part of the Independent JPEG Group's software. |
++ * libjpeg-turbo Modifications: |
++ * Copyright (C) 2010, 2015, D. R. Commander. |
++ * Copyright (C) 2015, Google, Inc. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains application interface code for the decompression half |
+@@ -14,9 +17,10 @@ |
+ * whole decompression library into a transcoder. |
+ */ |
+ |
+-#define JPEG_INTERNALS |
+-#include "jinclude.h" |
+-#include "jpeglib.h" |
++#include "jdmainct.h" |
++#include "jdcoefct.h" |
++#include "jdsample.h" |
++#include "jmemsys.h" |
+ |
+ |
+ /* Forward declarations */ |
+@@ -176,7 +180,236 @@ |
+ } |
+ |
+ |
++ |
++/* Dummy color convert function used by jpeg_skip_scanlines() */ |
++LOCAL(void) |
++noop_convert (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, |
++ JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows) |
++{ |
++} |
++ |
++ |
+ /* |
++ * In some cases, it is best to call jpeg_read_scanlines() and discard the |
++ * output, rather than skipping the scanlines, because this allows us to |
++ * maintain the internal state of the context-based upsampler. In these cases, |
++ * we set up and tear down a dummy color converter in order to avoid valgrind |
++ * errors and to achieve the best possible performance. |
++ */ |
++LOCAL(void) |
++read_and_discard_scanlines (j_decompress_ptr cinfo, JDIMENSION num_lines) |
++{ |
++ JDIMENSION n; |
++ void (*color_convert) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, |
++ JDIMENSION input_row, JSAMPARRAY output_buf, |
++ int num_rows); |
++ |
++ color_convert = cinfo->cconvert->color_convert; |
++ cinfo->cconvert->color_convert = noop_convert; |
++ |
++ for (n = 0; n < num_lines; n++) |
++ jpeg_read_scanlines(cinfo, NULL, 1); |
++ |
++ cinfo->cconvert->color_convert = color_convert; |
++} |
++ |
++/* |
++ * Called by jpeg_skip_scanlines(). This partially skips a decompress block by |
++ * incrementing the rowgroup counter. |
++ */ |
++ |
++LOCAL(void) |
++increment_simple_rowgroup_ctr (j_decompress_ptr cinfo, JDIMENSION rows) |
++{ |
++ JDIMENSION rows_left; |
++ my_main_ptr main_ptr = (my_main_ptr) cinfo->main; |
++ |
++ /* Increment the counter to the next row group after the skipped rows. */ |
++ main_ptr->rowgroup_ctr += rows / cinfo->max_v_samp_factor; |
++ |
++ /* Partially skipping a row group would involve modifying the internal state |
++ * of the upsampler, so read the remaining rows into a dummy buffer instead. |
++ */ |
++ rows_left = rows % cinfo->max_v_samp_factor; |
++ cinfo->output_scanline += rows - rows_left; |
++ |
++ read_and_discard_scanlines(cinfo, rows_left); |
++} |
++ |
++/* |
++ * Skips some scanlines of data from the JPEG decompressor. |
++ * |
++ * The return value will be the number of lines actually skipped. If skipping |
++ * num_lines would move beyond the end of the image, then the actual number of |
++ * lines remaining in the image is returned. Otherwise, the return value will |
++ * be equal to num_lines. |
++ * |
++ * Refer to libjpeg.txt for more information. |
++ */ |
++ |
++GLOBAL(JDIMENSION) |
++jpeg_skip_scanlines (j_decompress_ptr cinfo, JDIMENSION num_lines) |
++{ |
++ my_main_ptr main_ptr = (my_main_ptr) cinfo->main; |
++ my_coef_ptr coef = (my_coef_ptr) cinfo->coef; |
++ my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; |
++ JDIMENSION i, x; |
++ int y; |
++ JDIMENSION lines_per_iMCU_row, lines_left_in_iMCU_row, lines_after_iMCU_row; |
++ JDIMENSION lines_to_skip, lines_to_read; |
++ |
++ if (cinfo->global_state != DSTATE_SCANNING) |
++ ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state); |
++ |
++ /* Do not skip past the bottom of the image. */ |
++ if (cinfo->output_scanline + num_lines >= cinfo->output_height) { |
++ cinfo->output_scanline = cinfo->output_height; |
++ return cinfo->output_height - cinfo->output_scanline; |
++ } |
++ |
++ if (num_lines == 0) |
++ return 0; |
++ |
++ lines_per_iMCU_row = cinfo->_min_DCT_scaled_size * cinfo->max_v_samp_factor; |
++ lines_left_in_iMCU_row = |
++ (lines_per_iMCU_row - (cinfo->output_scanline % lines_per_iMCU_row)) % |
++ lines_per_iMCU_row; |
++ lines_after_iMCU_row = num_lines - lines_left_in_iMCU_row; |
++ |
++ /* Skip the lines remaining in the current iMCU row. When upsampling |
++ * requires context rows, we need the previous and next rows in order to read |
++ * the current row. This adds some complexity. |
++ */ |
++ if (cinfo->upsample->need_context_rows) { |
++ /* If the skipped lines would not move us past the current iMCU row, we |
++ * read the lines and ignore them. There might be a faster way of doing |
++ * this, but we are facing increasing complexity for diminishing returns. |
++ * The increasing complexity would be a by-product of meddling with the |
++ * state machine used to skip context rows. Near the end of an iMCU row, |
++ * the next iMCU row may have already been entropy-decoded. In this unique |
++ * case, we will read the next iMCU row if we cannot skip past it as well. |
++ */ |
++ if ((num_lines < lines_left_in_iMCU_row + 1) || |
++ (lines_left_in_iMCU_row <= 1 && main_ptr->buffer_full && |
++ lines_after_iMCU_row < lines_per_iMCU_row + 1)) { |
++ read_and_discard_scanlines(cinfo, num_lines); |
++ return num_lines; |
++ } |
++ |
++ /* If the next iMCU row has already been entropy-decoded, make sure that |
++ * we do not skip too far. |
++ */ |
++ if (lines_left_in_iMCU_row <= 1 && main_ptr->buffer_full) { |
++ cinfo->output_scanline += lines_left_in_iMCU_row + lines_per_iMCU_row; |
++ lines_after_iMCU_row -= lines_per_iMCU_row; |
++ } else { |
++ cinfo->output_scanline += lines_left_in_iMCU_row; |
++ } |
++ |
++ /* If we have just completed the first block, adjust the buffer pointers */ |
++ if (main_ptr->iMCU_row_ctr == 0 || |
++ (main_ptr->iMCU_row_ctr == 1 && lines_left_in_iMCU_row > 2)) |
++ set_wraparound_pointers(cinfo); |
++ main_ptr->buffer_full = FALSE; |
++ main_ptr->rowgroup_ctr = 0; |
++ main_ptr->context_state = CTX_PREPARE_FOR_IMCU; |
++ upsample->next_row_out = cinfo->max_v_samp_factor; |
++ upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline; |
++ } |
++ |
++ /* Skipping is much simpler when context rows are not required. */ |
++ else { |
++ if (num_lines < lines_left_in_iMCU_row) { |
++ increment_simple_rowgroup_ctr(cinfo, num_lines); |
++ return num_lines; |
++ } else { |
++ cinfo->output_scanline += lines_left_in_iMCU_row; |
++ main_ptr->buffer_full = FALSE; |
++ main_ptr->rowgroup_ctr = 0; |
++ upsample->next_row_out = cinfo->max_v_samp_factor; |
++ upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline; |
++ } |
++ } |
++ |
++ /* Calculate how many full iMCU rows we can skip. */ |
++ if (cinfo->upsample->need_context_rows) |
++ lines_to_skip = ((lines_after_iMCU_row - 1) / lines_per_iMCU_row) * |
++ lines_per_iMCU_row; |
++ else |
++ lines_to_skip = (lines_after_iMCU_row / lines_per_iMCU_row) * |
++ lines_per_iMCU_row; |
++ /* Calculate the number of lines that remain to be skipped after skipping all |
++ * of the full iMCU rows that we can. We will not read these lines unless we |
++ * have to. |
++ */ |
++ lines_to_read = lines_after_iMCU_row - lines_to_skip; |
++ |
++ /* For images requiring multiple scans (progressive, non-interleaved, etc.), |
++ * all of the entropy decoding occurs in jpeg_start_decompress(), assuming |
++ * that the input data source is non-suspending. This makes skipping easy. |
++ */ |
++ if (cinfo->inputctl->has_multiple_scans) { |
++ if (cinfo->upsample->need_context_rows) { |
++ cinfo->output_scanline += lines_to_skip; |
++ cinfo->output_iMCU_row += lines_to_skip / lines_per_iMCU_row; |
++ main_ptr->iMCU_row_ctr += lines_after_iMCU_row / lines_per_iMCU_row; |
++ /* It is complex to properly move to the middle of a context block, so |
++ * read the remaining lines instead of skipping them. |
++ */ |
++ read_and_discard_scanlines(cinfo, lines_to_read); |
++ } else { |
++ cinfo->output_scanline += lines_to_skip; |
++ cinfo->output_iMCU_row += lines_to_skip / lines_per_iMCU_row; |
++ increment_simple_rowgroup_ctr(cinfo, lines_to_read); |
++ } |
++ upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline; |
++ return num_lines; |
++ } |
++ |
++ /* Skip the iMCU rows that we can safely skip. */ |
++ for (i = 0; i < lines_to_skip; i += lines_per_iMCU_row) { |
++ for (y = 0; y < coef->MCU_rows_per_iMCU_row; y++) { |
++ for (x = 0; x < cinfo->MCUs_per_row; x++) { |
++ /* Calling decode_mcu() with a NULL pointer causes it to discard the |
++ * decoded coefficients. This is ~5% faster for large subsets, but |
++ * it's tough to tell a difference for smaller images. |
++ */ |
++ (*cinfo->entropy->decode_mcu) (cinfo, NULL); |
++ } |
++ } |
++ cinfo->input_iMCU_row++; |
++ cinfo->output_iMCU_row++; |
++ if (cinfo->input_iMCU_row < cinfo->total_iMCU_rows) |
++ start_iMCU_row(cinfo); |
++ else |
++ (*cinfo->inputctl->finish_input_pass) (cinfo); |
++ } |
++ cinfo->output_scanline += lines_to_skip; |
++ |
++ if (cinfo->upsample->need_context_rows) { |
++ /* Context-based upsampling keeps track of iMCU rows. */ |
++ main_ptr->iMCU_row_ctr += lines_to_skip / lines_per_iMCU_row; |
++ |
++ /* It is complex to properly move to the middle of a context block, so |
++ * read the remaining lines instead of skipping them. |
++ */ |
++ read_and_discard_scanlines(cinfo, lines_to_read); |
++ } else { |
++ increment_simple_rowgroup_ctr(cinfo, lines_to_read); |
++ } |
++ |
++ /* Since skipping lines involves skipping the upsampling step, the value of |
++ * "rows_to_go" will become invalid unless we set it here. NOTE: This is a |
++ * bit odd, since "rows_to_go" seems to be redundantly keeping track of |
++ * output_scanline. |
++ */ |
++ upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline; |
++ |
++ /* Always skip the requested number of lines. */ |
++ return num_lines; |
++} |
++ |
++/* |
+ * Alternate entry point to read raw data. |
+ * Processes exactly one iMCU row per call, unless suspended. |
+ */ |
+@@ -202,7 +435,7 @@ |
+ } |
+ |
+ /* Verify that at least one iMCU row can be returned. */ |
+- lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->min_DCT_scaled_size; |
++ lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->_min_DCT_scaled_size; |
+ if (max_lines < lines_per_iMCU_row) |
+ ERREXIT(cinfo, JERR_BUFFER_SIZE); |
+ |
+Index: jdatadst.c |
+=================================================================== |
+--- jdatadst.c (revision 829) |
++++ jdatadst.c (working copy) |
+@@ -1,14 +1,17 @@ |
+ /* |
+ * jdatadst.c |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1994-1996, Thomas G. Lane. |
+- * This file is part of the Independent JPEG Group's software. |
++ * Modified 2009-2012 by Guido Vollbeding. |
++ * libjpeg-turbo Modifications: |
++ * Copyright (C) 2013, D. R. Commander. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains compression data destination routines for the case of |
+- * emitting JPEG data to a file (or any stdio stream). While these routines |
+- * are sufficient for most applications, some will want to use a different |
+- * destination manager. |
++ * emitting JPEG data to memory or to a file (or any stdio stream). |
++ * While these routines are sufficient for most applications, |
++ * some will want to use a different destination manager. |
+ * IMPORTANT: we assume that fwrite() will correctly transcribe an array of |
+ * JOCTETs into 8-bit-wide elements on external storage. If char is wider |
+ * than 8 bits on your machine, you may need to do some tweaking. |
+@@ -19,7 +22,12 @@ |
+ #include "jpeglib.h" |
+ #include "jerror.h" |
+ |
++#ifndef HAVE_STDLIB_H /* <stdlib.h> should declare malloc(),free() */ |
++extern void * malloc JPP((size_t size)); |
++extern void free JPP((void *ptr)); |
++#endif |
+ |
++ |
+ /* Expanded data destination object for stdio output */ |
+ |
+ typedef struct { |
+@@ -34,6 +42,23 @@ |
+ #define OUTPUT_BUF_SIZE 4096 /* choose an efficiently fwrite'able size */ |
+ |
+ |
++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) |
++/* Expanded data destination object for memory output */ |
++ |
++typedef struct { |
++ struct jpeg_destination_mgr pub; /* public fields */ |
++ |
++ unsigned char ** outbuffer; /* target buffer */ |
++ unsigned long * outsize; |
++ unsigned char * newbuffer; /* newly allocated buffer */ |
++ JOCTET * buffer; /* start of buffer */ |
++ size_t bufsize; |
++} my_mem_destination_mgr; |
++ |
++typedef my_mem_destination_mgr * my_mem_dest_ptr; |
++#endif |
++ |
++ |
+ /* |
+ * Initialize destination --- called by jpeg_start_compress |
+ * before any data is actually written. |
+@@ -53,7 +78,15 @@ |
+ dest->pub.free_in_buffer = OUTPUT_BUF_SIZE; |
+ } |
+ |
++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) |
++METHODDEF(void) |
++init_mem_destination (j_compress_ptr cinfo) |
++{ |
++ /* no work necessary here */ |
++} |
++#endif |
+ |
++ |
+ /* |
+ * Empty the output buffer --- called whenever buffer fills up. |
+ * |
+@@ -92,7 +125,39 @@ |
+ return TRUE; |
+ } |
+ |
++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) |
++METHODDEF(boolean) |
++empty_mem_output_buffer (j_compress_ptr cinfo) |
++{ |
++ size_t nextsize; |
++ JOCTET * nextbuffer; |
++ my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest; |
+ |
++ /* Try to allocate new buffer with double size */ |
++ nextsize = dest->bufsize * 2; |
++ nextbuffer = (JOCTET *) malloc(nextsize); |
++ |
++ if (nextbuffer == NULL) |
++ ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10); |
++ |
++ MEMCOPY(nextbuffer, dest->buffer, dest->bufsize); |
++ |
++ if (dest->newbuffer != NULL) |
++ free(dest->newbuffer); |
++ |
++ dest->newbuffer = nextbuffer; |
++ |
++ dest->pub.next_output_byte = nextbuffer + dest->bufsize; |
++ dest->pub.free_in_buffer = dest->bufsize; |
++ |
++ dest->buffer = nextbuffer; |
++ dest->bufsize = nextsize; |
++ |
++ return TRUE; |
++} |
++#endif |
++ |
++ |
+ /* |
+ * Terminate destination --- called by jpeg_finish_compress |
+ * after all data has been written. Usually needs to flush buffer. |
+@@ -119,7 +184,18 @@ |
+ ERREXIT(cinfo, JERR_FILE_WRITE); |
+ } |
+ |
++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) |
++METHODDEF(void) |
++term_mem_destination (j_compress_ptr cinfo) |
++{ |
++ my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest; |
+ |
++ *dest->outbuffer = dest->buffer; |
++ *dest->outsize = (unsigned long)(dest->bufsize - dest->pub.free_in_buffer); |
++} |
++#endif |
++ |
++ |
+ /* |
+ * Prepare for output to a stdio stream. |
+ * The caller must have already opened the stream, and is responsible |
+@@ -149,3 +225,55 @@ |
+ dest->pub.term_destination = term_destination; |
+ dest->outfile = outfile; |
+ } |
++ |
++ |
++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) |
++/* |
++ * Prepare for output to a memory buffer. |
++ * The caller may supply an own initial buffer with appropriate size. |
++ * Otherwise, or when the actual data output exceeds the given size, |
++ * the library adapts the buffer size as necessary. |
++ * The standard library functions malloc/free are used for allocating |
++ * larger memory, so the buffer is available to the application after |
++ * finishing compression, and then the application is responsible for |
++ * freeing the requested memory. |
++ */ |
++ |
++GLOBAL(void) |
++jpeg_mem_dest (j_compress_ptr cinfo, |
++ unsigned char ** outbuffer, unsigned long * outsize) |
++{ |
++ my_mem_dest_ptr dest; |
++ |
++ if (outbuffer == NULL || outsize == NULL) /* sanity check */ |
++ ERREXIT(cinfo, JERR_BUFFER_SIZE); |
++ |
++ /* The destination object is made permanent so that multiple JPEG images |
++ * can be written to the same buffer without re-executing jpeg_mem_dest. |
++ */ |
++ if (cinfo->dest == NULL) { /* first time for this JPEG object? */ |
++ cinfo->dest = (struct jpeg_destination_mgr *) |
++ (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, |
++ SIZEOF(my_mem_destination_mgr)); |
++ } |
++ |
++ dest = (my_mem_dest_ptr) cinfo->dest; |
++ dest->pub.init_destination = init_mem_destination; |
++ dest->pub.empty_output_buffer = empty_mem_output_buffer; |
++ dest->pub.term_destination = term_mem_destination; |
++ dest->outbuffer = outbuffer; |
++ dest->outsize = outsize; |
++ dest->newbuffer = NULL; |
++ |
++ if (*outbuffer == NULL || *outsize == 0) { |
++ /* Allocate initial buffer */ |
++ dest->newbuffer = *outbuffer = (unsigned char *) malloc(OUTPUT_BUF_SIZE); |
++ if (dest->newbuffer == NULL) |
++ ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10); |
++ *outsize = OUTPUT_BUF_SIZE; |
++ } |
++ |
++ dest->pub.next_output_byte = dest->buffer = *outbuffer; |
++ dest->pub.free_in_buffer = dest->bufsize = *outsize; |
++} |
++#endif |
+Index: jdatasrc.c |
+=================================================================== |
+--- jdatasrc.c (revision 829) |
++++ jdatasrc.c (working copy) |
+@@ -1,14 +1,17 @@ |
+ /* |
+ * jdatasrc.c |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1994-1996, Thomas G. Lane. |
+- * This file is part of the Independent JPEG Group's software. |
++ * Modified 2009-2011 by Guido Vollbeding. |
++ * libjpeg-turbo Modifications: |
++ * Copyright (C) 2013, D. R. Commander. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains decompression data source routines for the case of |
+- * reading JPEG data from a file (or any stdio stream). While these routines |
+- * are sufficient for most applications, some will want to use a different |
+- * source manager. |
++ * reading JPEG data from memory or from a file (or any stdio stream). |
++ * While these routines are sufficient for most applications, |
++ * some will want to use a different source manager. |
+ * IMPORTANT: we assume that fread() will correctly transcribe an array of |
+ * JOCTETs from 8-bit-wide elements on external storage. If char is wider |
+ * than 8 bits on your machine, you may need to do some tweaking. |
+@@ -52,7 +55,15 @@ |
+ src->start_of_file = TRUE; |
+ } |
+ |
++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) |
++METHODDEF(void) |
++init_mem_source (j_decompress_ptr cinfo) |
++{ |
++ /* no work necessary here */ |
++} |
++#endif |
+ |
++ |
+ /* |
+ * Fill the input buffer --- called whenever buffer is emptied. |
+ * |
+@@ -111,7 +122,30 @@ |
+ return TRUE; |
+ } |
+ |
++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) |
++METHODDEF(boolean) |
++fill_mem_input_buffer (j_decompress_ptr cinfo) |
++{ |
++ static const JOCTET mybuffer[4] = { |
++ (JOCTET) 0xFF, (JOCTET) JPEG_EOI, 0, 0 |
++ }; |
+ |
++ /* The whole JPEG data is expected to reside in the supplied memory |
++ * buffer, so any request for more data beyond the given buffer size |
++ * is treated as an error. |
++ */ |
++ WARNMS(cinfo, JWRN_JPEG_EOF); |
++ |
++ /* Insert a fake EOI marker */ |
++ |
++ cinfo->src->next_input_byte = mybuffer; |
++ cinfo->src->bytes_in_buffer = 2; |
++ |
++ return TRUE; |
++} |
++#endif |
++ |
++ |
+ /* |
+ * Skip data --- used to skip over a potentially large amount of |
+ * uninteresting data (such as an APPn marker). |
+@@ -127,7 +161,7 @@ |
+ METHODDEF(void) |
+ skip_input_data (j_decompress_ptr cinfo, long num_bytes) |
+ { |
+- my_src_ptr src = (my_src_ptr) cinfo->src; |
++ struct jpeg_source_mgr * src = cinfo->src; |
+ |
+ /* Just a dumb implementation for now. Could use fseek() except |
+ * it doesn't work on pipes. Not clear that being smart is worth |
+@@ -134,15 +168,15 @@ |
+ * any trouble anyway --- large skips are infrequent. |
+ */ |
+ if (num_bytes > 0) { |
+- while (num_bytes > (long) src->pub.bytes_in_buffer) { |
+- num_bytes -= (long) src->pub.bytes_in_buffer; |
+- (void) fill_input_buffer(cinfo); |
++ while (num_bytes > (long) src->bytes_in_buffer) { |
++ num_bytes -= (long) src->bytes_in_buffer; |
++ (void) (*src->fill_input_buffer) (cinfo); |
+ /* note we assume that fill_input_buffer will never return FALSE, |
+ * so suspension need not be handled. |
+ */ |
+ } |
+- src->pub.next_input_byte += (size_t) num_bytes; |
+- src->pub.bytes_in_buffer -= (size_t) num_bytes; |
++ src->next_input_byte += (size_t) num_bytes; |
++ src->bytes_in_buffer -= (size_t) num_bytes; |
+ } |
+ } |
+ |
+@@ -210,3 +244,40 @@ |
+ src->pub.bytes_in_buffer = 0; /* forces fill_input_buffer on first read */ |
+ src->pub.next_input_byte = NULL; /* until buffer loaded */ |
+ } |
++ |
++ |
++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) |
++/* |
++ * Prepare for input from a supplied memory buffer. |
++ * The buffer must contain the whole JPEG data. |
++ */ |
++ |
++GLOBAL(void) |
++jpeg_mem_src (j_decompress_ptr cinfo, |
++ unsigned char * inbuffer, unsigned long insize) |
++{ |
++ struct jpeg_source_mgr * src; |
++ |
++ if (inbuffer == NULL || insize == 0) /* Treat empty input as fatal error */ |
++ ERREXIT(cinfo, JERR_INPUT_EMPTY); |
++ |
++ /* The source object is made permanent so that a series of JPEG images |
++ * can be read from the same buffer by calling jpeg_mem_src only before |
++ * the first one. |
++ */ |
++ if (cinfo->src == NULL) { /* first time for this JPEG object? */ |
++ cinfo->src = (struct jpeg_source_mgr *) |
++ (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, |
++ SIZEOF(struct jpeg_source_mgr)); |
++ } |
++ |
++ src = cinfo->src; |
++ src->init_source = init_mem_source; |
++ src->fill_input_buffer = fill_mem_input_buffer; |
++ src->skip_input_data = skip_input_data; |
++ src->resync_to_restart = jpeg_resync_to_restart; /* use default method */ |
++ src->term_source = term_source; |
++ src->bytes_in_buffer = (size_t) insize; |
++ src->next_input_byte = (JOCTET *) inbuffer; |
++} |
++#endif |
+Index: jdcoefct.c |
+=================================================================== |
+--- jdcoefct.c (revision 829) |
++++ jdcoefct.c (working copy) |
+@@ -1,8 +1,11 @@ |
+ /* |
+ * jdcoefct.c |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1994-1997, Thomas G. Lane. |
+- * This file is part of the Independent JPEG Group's software. |
++ * libjpeg-turbo Modifications: |
++ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
++ * Copyright (C) 2010, D. R. Commander. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains the coefficient buffer controller for decompression. |
+@@ -14,56 +17,10 @@ |
+ * Also, the input side (only) is used when reading a file for transcoding. |
+ */ |
+ |
+-#define JPEG_INTERNALS |
+-#include "jinclude.h" |
+-#include "jpeglib.h" |
++#include "jdcoefct.h" |
++#include "jpegcomp.h" |
+ |
+-/* Block smoothing is only applicable for progressive JPEG, so: */ |
+-#ifndef D_PROGRESSIVE_SUPPORTED |
+-#undef BLOCK_SMOOTHING_SUPPORTED |
+-#endif |
+ |
+-/* Private buffer controller object */ |
+- |
+-typedef struct { |
+- struct jpeg_d_coef_controller pub; /* public fields */ |
+- |
+- /* These variables keep track of the current location of the input side. */ |
+- /* cinfo->input_iMCU_row is also used for this. */ |
+- JDIMENSION MCU_ctr; /* counts MCUs processed in current row */ |
+- int MCU_vert_offset; /* counts MCU rows within iMCU row */ |
+- int MCU_rows_per_iMCU_row; /* number of such rows needed */ |
+- |
+- /* The output side's location is represented by cinfo->output_iMCU_row. */ |
+- |
+- /* In single-pass modes, it's sufficient to buffer just one MCU. |
+- * We allocate a workspace of D_MAX_BLOCKS_IN_MCU coefficient blocks, |
+- * and let the entropy decoder write into that workspace each time. |
+- * (On 80x86, the workspace is FAR even though it's not really very big; |
+- * this is to keep the module interfaces unchanged when a large coefficient |
+- * buffer is necessary.) |
+- * In multi-pass modes, this array points to the current MCU's blocks |
+- * within the virtual arrays; it is used only by the input side. |
+- */ |
+- JBLOCKROW MCU_buffer[D_MAX_BLOCKS_IN_MCU]; |
+- |
+- /* Temporary workspace for one MCU */ |
+- JCOEF * workspace; |
+- |
+-#ifdef D_MULTISCAN_FILES_SUPPORTED |
+- /* In multi-pass modes, we need a virtual block array for each component. */ |
+- jvirt_barray_ptr whole_image[MAX_COMPONENTS]; |
+-#endif |
+- |
+-#ifdef BLOCK_SMOOTHING_SUPPORTED |
+- /* When doing block smoothing, we latch coefficient Al values here */ |
+- int * coef_bits_latch; |
+-#define SAVED_COEFS 6 /* we save coef_bits[0..5] */ |
+-#endif |
+-} my_coef_controller; |
+- |
+-typedef my_coef_controller * my_coef_ptr; |
+- |
+ /* Forward declarations */ |
+ METHODDEF(int) decompress_onepass |
+ JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf)); |
+@@ -78,30 +35,6 @@ |
+ #endif |
+ |
+ |
+-LOCAL(void) |
+-start_iMCU_row (j_decompress_ptr cinfo) |
+-/* Reset within-iMCU-row counters for a new row (input side) */ |
+-{ |
+- my_coef_ptr coef = (my_coef_ptr) cinfo->coef; |
+- |
+- /* In an interleaved scan, an MCU row is the same as an iMCU row. |
+- * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows. |
+- * But at the bottom of the image, process only what's left. |
+- */ |
+- if (cinfo->comps_in_scan > 1) { |
+- coef->MCU_rows_per_iMCU_row = 1; |
+- } else { |
+- if (cinfo->input_iMCU_row < (cinfo->total_iMCU_rows-1)) |
+- coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor; |
+- else |
+- coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height; |
+- } |
+- |
+- coef->MCU_ctr = 0; |
+- coef->MCU_vert_offset = 0; |
+-} |
+- |
+- |
+ /* |
+ * Initialize for an input processing pass. |
+ */ |
+@@ -190,7 +123,7 @@ |
+ useful_width = (MCU_col_num < last_MCU_col) ? compptr->MCU_width |
+ : compptr->last_col_width; |
+ output_ptr = output_buf[compptr->component_index] + |
+- yoffset * compptr->DCT_scaled_size; |
++ yoffset * compptr->_DCT_scaled_size; |
+ start_col = MCU_col_num * compptr->MCU_sample_width; |
+ for (yindex = 0; yindex < compptr->MCU_height; yindex++) { |
+ if (cinfo->input_iMCU_row < last_iMCU_row || |
+@@ -200,11 +133,11 @@ |
+ (*inverse_DCT) (cinfo, compptr, |
+ (JCOEFPTR) coef->MCU_buffer[blkn+xindex], |
+ output_ptr, output_col); |
+- output_col += compptr->DCT_scaled_size; |
++ output_col += compptr->_DCT_scaled_size; |
+ } |
+ } |
+ blkn += compptr->MCU_width; |
+- output_ptr += compptr->DCT_scaled_size; |
++ output_ptr += compptr->_DCT_scaled_size; |
+ } |
+ } |
+ } |
+@@ -365,9 +298,9 @@ |
+ (*inverse_DCT) (cinfo, compptr, (JCOEFPTR) buffer_ptr, |
+ output_ptr, output_col); |
+ buffer_ptr++; |
+- output_col += compptr->DCT_scaled_size; |
++ output_col += compptr->_DCT_scaled_size; |
+ } |
+- output_ptr += compptr->DCT_scaled_size; |
++ output_ptr += compptr->_DCT_scaled_size; |
+ } |
+ } |
+ |
+@@ -660,9 +593,9 @@ |
+ DC4 = DC5; DC5 = DC6; |
+ DC7 = DC8; DC8 = DC9; |
+ buffer_ptr++, prev_block_row++, next_block_row++; |
+- output_col += compptr->DCT_scaled_size; |
++ output_col += compptr->_DCT_scaled_size; |
+ } |
+- output_ptr += compptr->DCT_scaled_size; |
++ output_ptr += compptr->_DCT_scaled_size; |
+ } |
+ } |
+ |
+Index: jdcolor.c |
+=================================================================== |
+--- jdcolor.c (revision 829) |
++++ jdcolor.c (working copy) |
+@@ -1,10 +1,12 @@ |
+ /* |
+ * jdcolor.c |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1991-1997, Thomas G. Lane. |
++ * Modified 2011 by Guido Vollbeding. |
++ * libjpeg-turbo Modifications: |
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
+- * Copyright (C) 2009, D. R. Commander. |
+- * This file is part of the Independent JPEG Group's software. |
++ * Copyright (C) 2009, 2011-2012, D. R. Commander. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains output colorspace conversion routines. |
+@@ -14,6 +16,7 @@ |
+ #include "jinclude.h" |
+ #include "jpeglib.h" |
+ #include "jsimd.h" |
++#include "config.h" |
+ |
+ |
+ /* Private subobject */ |
+@@ -26,6 +29,9 @@ |
+ int * Cb_b_tab; /* => table for Cb to B conversion */ |
+ INT32 * Cr_g_tab; /* => table for Cr to G conversion */ |
+ INT32 * Cb_g_tab; /* => table for Cb to G conversion */ |
++ |
++ /* Private state for RGB->Y conversion */ |
++ INT32 * rgb_y_tab; /* => table for RGB to Y conversion */ |
+ } my_color_deconverter; |
+ |
+ typedef my_color_deconverter * my_cconvert_ptr; |
+@@ -32,14 +38,19 @@ |
+ |
+ |
+ /**************** YCbCr -> RGB conversion: most common case **************/ |
++/**************** RGB -> Y conversion: less common case **************/ |
+ |
+ /* |
+ * YCbCr is defined per CCIR 601-1, except that Cb and Cr are |
+ * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5. |
+ * The conversion equations to be implemented are therefore |
++ * |
+ * R = Y + 1.40200 * Cr |
+ * G = Y - 0.34414 * Cb - 0.71414 * Cr |
+ * B = Y + 1.77200 * Cb |
++ * |
++ * Y = 0.29900 * R + 0.58700 * G + 0.11400 * B |
++ * |
+ * where Cb and Cr represent the incoming values less CENTERJSAMPLE. |
+ * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.) |
+ * |
+@@ -64,7 +75,132 @@ |
+ #define ONE_HALF ((INT32) 1 << (SCALEBITS-1)) |
+ #define FIX(x) ((INT32) ((x) * (1L<<SCALEBITS) + 0.5)) |
+ |
++/* We allocate one big table for RGB->Y conversion and divide it up into |
++ * three parts, instead of doing three alloc_small requests. This lets us |
++ * use a single table base address, which can be held in a register in the |
++ * inner loops on many machines (more than can hold all three addresses, |
++ * anyway). |
++ */ |
+ |
++#define R_Y_OFF 0 /* offset to R => Y section */ |
++#define G_Y_OFF (1*(MAXJSAMPLE+1)) /* offset to G => Y section */ |
++#define B_Y_OFF (2*(MAXJSAMPLE+1)) /* etc. */ |
++#define TABLE_SIZE (3*(MAXJSAMPLE+1)) |
++ |
++ |
++/* Include inline routines for colorspace extensions */ |
++ |
++#include "jdcolext.c" |
++#undef RGB_RED |
++#undef RGB_GREEN |
++#undef RGB_BLUE |
++#undef RGB_PIXELSIZE |
++ |
++#define RGB_RED EXT_RGB_RED |
++#define RGB_GREEN EXT_RGB_GREEN |
++#define RGB_BLUE EXT_RGB_BLUE |
++#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE |
++#define ycc_rgb_convert_internal ycc_extrgb_convert_internal |
++#define gray_rgb_convert_internal gray_extrgb_convert_internal |
++#define rgb_rgb_convert_internal rgb_extrgb_convert_internal |
++#include "jdcolext.c" |
++#undef RGB_RED |
++#undef RGB_GREEN |
++#undef RGB_BLUE |
++#undef RGB_PIXELSIZE |
++#undef ycc_rgb_convert_internal |
++#undef gray_rgb_convert_internal |
++#undef rgb_rgb_convert_internal |
++ |
++#define RGB_RED EXT_RGBX_RED |
++#define RGB_GREEN EXT_RGBX_GREEN |
++#define RGB_BLUE EXT_RGBX_BLUE |
++#define RGB_ALPHA 3 |
++#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE |
++#define ycc_rgb_convert_internal ycc_extrgbx_convert_internal |
++#define gray_rgb_convert_internal gray_extrgbx_convert_internal |
++#define rgb_rgb_convert_internal rgb_extrgbx_convert_internal |
++#include "jdcolext.c" |
++#undef RGB_RED |
++#undef RGB_GREEN |
++#undef RGB_BLUE |
++#undef RGB_ALPHA |
++#undef RGB_PIXELSIZE |
++#undef ycc_rgb_convert_internal |
++#undef gray_rgb_convert_internal |
++#undef rgb_rgb_convert_internal |
++ |
++#define RGB_RED EXT_BGR_RED |
++#define RGB_GREEN EXT_BGR_GREEN |
++#define RGB_BLUE EXT_BGR_BLUE |
++#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE |
++#define ycc_rgb_convert_internal ycc_extbgr_convert_internal |
++#define gray_rgb_convert_internal gray_extbgr_convert_internal |
++#define rgb_rgb_convert_internal rgb_extbgr_convert_internal |
++#include "jdcolext.c" |
++#undef RGB_RED |
++#undef RGB_GREEN |
++#undef RGB_BLUE |
++#undef RGB_PIXELSIZE |
++#undef ycc_rgb_convert_internal |
++#undef gray_rgb_convert_internal |
++#undef rgb_rgb_convert_internal |
++ |
++#define RGB_RED EXT_BGRX_RED |
++#define RGB_GREEN EXT_BGRX_GREEN |
++#define RGB_BLUE EXT_BGRX_BLUE |
++#define RGB_ALPHA 3 |
++#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE |
++#define ycc_rgb_convert_internal ycc_extbgrx_convert_internal |
++#define gray_rgb_convert_internal gray_extbgrx_convert_internal |
++#define rgb_rgb_convert_internal rgb_extbgrx_convert_internal |
++#include "jdcolext.c" |
++#undef RGB_RED |
++#undef RGB_GREEN |
++#undef RGB_BLUE |
++#undef RGB_ALPHA |
++#undef RGB_PIXELSIZE |
++#undef ycc_rgb_convert_internal |
++#undef gray_rgb_convert_internal |
++#undef rgb_rgb_convert_internal |
++ |
++#define RGB_RED EXT_XBGR_RED |
++#define RGB_GREEN EXT_XBGR_GREEN |
++#define RGB_BLUE EXT_XBGR_BLUE |
++#define RGB_ALPHA 0 |
++#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE |
++#define ycc_rgb_convert_internal ycc_extxbgr_convert_internal |
++#define gray_rgb_convert_internal gray_extxbgr_convert_internal |
++#define rgb_rgb_convert_internal rgb_extxbgr_convert_internal |
++#include "jdcolext.c" |
++#undef RGB_RED |
++#undef RGB_GREEN |
++#undef RGB_BLUE |
++#undef RGB_ALPHA |
++#undef RGB_PIXELSIZE |
++#undef ycc_rgb_convert_internal |
++#undef gray_rgb_convert_internal |
++#undef rgb_rgb_convert_internal |
++ |
++#define RGB_RED EXT_XRGB_RED |
++#define RGB_GREEN EXT_XRGB_GREEN |
++#define RGB_BLUE EXT_XRGB_BLUE |
++#define RGB_ALPHA 0 |
++#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE |
++#define ycc_rgb_convert_internal ycc_extxrgb_convert_internal |
++#define gray_rgb_convert_internal gray_extxrgb_convert_internal |
++#define rgb_rgb_convert_internal rgb_extxrgb_convert_internal |
++#include "jdcolext.c" |
++#undef RGB_RED |
++#undef RGB_GREEN |
++#undef RGB_BLUE |
++#undef RGB_ALPHA |
++#undef RGB_PIXELSIZE |
++#undef ycc_rgb_convert_internal |
++#undef gray_rgb_convert_internal |
++#undef rgb_rgb_convert_internal |
++ |
++ |
+ /* |
+ * Initialize tables for YCC->RGB colorspace conversion. |
+ */ |
+@@ -110,13 +246,6 @@ |
+ |
+ /* |
+ * Convert some rows of samples to the output colorspace. |
+- * |
+- * Note that we change from noninterleaved, one-plane-per-component format |
+- * to interleaved-pixel format. The output buffer is therefore three times |
+- * as wide as the input buffer. |
+- * A starting row offset is provided only for the input buffer. The caller |
+- * can easily adjust the passed output_buf value to accommodate any row |
+- * offset required on that side. |
+ */ |
+ |
+ METHODDEF(void) |
+@@ -124,19 +253,86 @@ |
+ JSAMPIMAGE input_buf, JDIMENSION input_row, |
+ JSAMPARRAY output_buf, int num_rows) |
+ { |
++ switch (cinfo->out_color_space) { |
++ case JCS_EXT_RGB: |
++ ycc_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf, |
++ num_rows); |
++ break; |
++ case JCS_EXT_RGBX: |
++ case JCS_EXT_RGBA: |
++ ycc_extrgbx_convert_internal(cinfo, input_buf, input_row, output_buf, |
++ num_rows); |
++ break; |
++ case JCS_EXT_BGR: |
++ ycc_extbgr_convert_internal(cinfo, input_buf, input_row, output_buf, |
++ num_rows); |
++ break; |
++ case JCS_EXT_BGRX: |
++ case JCS_EXT_BGRA: |
++ ycc_extbgrx_convert_internal(cinfo, input_buf, input_row, output_buf, |
++ num_rows); |
++ break; |
++ case JCS_EXT_XBGR: |
++ case JCS_EXT_ABGR: |
++ ycc_extxbgr_convert_internal(cinfo, input_buf, input_row, output_buf, |
++ num_rows); |
++ break; |
++ case JCS_EXT_XRGB: |
++ case JCS_EXT_ARGB: |
++ ycc_extxrgb_convert_internal(cinfo, input_buf, input_row, output_buf, |
++ num_rows); |
++ break; |
++ default: |
++ ycc_rgb_convert_internal(cinfo, input_buf, input_row, output_buf, |
++ num_rows); |
++ break; |
++ } |
++} |
++ |
++ |
++/**************** Cases other than YCbCr -> RGB **************/ |
++ |
++ |
++/* |
++ * Initialize for RGB->grayscale colorspace conversion. |
++ */ |
++ |
++LOCAL(void) |
++build_rgb_y_table (j_decompress_ptr cinfo) |
++{ |
+ my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; |
+- register int y, cb, cr; |
++ INT32 * rgb_y_tab; |
++ INT32 i; |
++ |
++ /* Allocate and fill in the conversion tables. */ |
++ cconvert->rgb_y_tab = rgb_y_tab = (INT32 *) |
++ (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, |
++ (TABLE_SIZE * SIZEOF(INT32))); |
++ |
++ for (i = 0; i <= MAXJSAMPLE; i++) { |
++ rgb_y_tab[i+R_Y_OFF] = FIX(0.29900) * i; |
++ rgb_y_tab[i+G_Y_OFF] = FIX(0.58700) * i; |
++ rgb_y_tab[i+B_Y_OFF] = FIX(0.11400) * i + ONE_HALF; |
++ } |
++} |
++ |
++ |
++/* |
++ * Convert RGB to grayscale. |
++ */ |
++ |
++METHODDEF(void) |
++rgb_gray_convert (j_decompress_ptr cinfo, |
++ JSAMPIMAGE input_buf, JDIMENSION input_row, |
++ JSAMPARRAY output_buf, int num_rows) |
++{ |
++ my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; |
++ register int r, g, b; |
++ register INT32 * ctab = cconvert->rgb_y_tab; |
+ register JSAMPROW outptr; |
+ register JSAMPROW inptr0, inptr1, inptr2; |
+ register JDIMENSION col; |
+ JDIMENSION num_cols = cinfo->output_width; |
+- /* copy these pointers into registers if possible */ |
+- register JSAMPLE * range_limit = cinfo->sample_range_limit; |
+- register int * Crrtab = cconvert->Cr_r_tab; |
+- register int * Cbbtab = cconvert->Cb_b_tab; |
+- register INT32 * Crgtab = cconvert->Cr_g_tab; |
+- register INT32 * Cbgtab = cconvert->Cb_g_tab; |
+- SHIFT_TEMPS |
+ |
+ while (--num_rows >= 0) { |
+ inptr0 = input_buf[0][input_row]; |
+@@ -145,24 +341,18 @@ |
+ input_row++; |
+ outptr = *output_buf++; |
+ for (col = 0; col < num_cols; col++) { |
+- y = GETJSAMPLE(inptr0[col]); |
+- cb = GETJSAMPLE(inptr1[col]); |
+- cr = GETJSAMPLE(inptr2[col]); |
+- /* Range-limiting is essential due to noise introduced by DCT losses. */ |
+- outptr[rgb_red[cinfo->out_color_space]] = range_limit[y + Crrtab[cr]]; |
+- outptr[rgb_green[cinfo->out_color_space]] = range_limit[y + |
+- ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], |
+- SCALEBITS))]; |
+- outptr[rgb_blue[cinfo->out_color_space]] = range_limit[y + Cbbtab[cb]]; |
+- outptr += rgb_pixelsize[cinfo->out_color_space]; |
++ r = GETJSAMPLE(inptr0[col]); |
++ g = GETJSAMPLE(inptr1[col]); |
++ b = GETJSAMPLE(inptr2[col]); |
++ /* Y */ |
++ outptr[col] = (JSAMPLE) |
++ ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF]) |
++ >> SCALEBITS); |
+ } |
+ } |
+ } |
+ |
+ |
+-/**************** Cases other than YCbCr -> RGB **************/ |
+- |
+- |
+ /* |
+ * Color conversion for no colorspace change: just copy the data, |
+ * converting from separate-planes to interleaved representation. |
+@@ -211,9 +401,7 @@ |
+ |
+ |
+ /* |
+- * Convert grayscale to RGB: just duplicate the graylevel three times. |
+- * This is provided to support applications that don't want to cope |
+- * with grayscale as a separate case. |
++ * Convert grayscale to RGB |
+ */ |
+ |
+ METHODDEF(void) |
+@@ -221,20 +409,85 @@ |
+ JSAMPIMAGE input_buf, JDIMENSION input_row, |
+ JSAMPARRAY output_buf, int num_rows) |
+ { |
+- register JSAMPROW inptr, outptr; |
+- register JDIMENSION col; |
+- JDIMENSION num_cols = cinfo->output_width; |
++ switch (cinfo->out_color_space) { |
++ case JCS_EXT_RGB: |
++ gray_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf, |
++ num_rows); |
++ break; |
++ case JCS_EXT_RGBX: |
++ case JCS_EXT_RGBA: |
++ gray_extrgbx_convert_internal(cinfo, input_buf, input_row, output_buf, |
++ num_rows); |
++ break; |
++ case JCS_EXT_BGR: |
++ gray_extbgr_convert_internal(cinfo, input_buf, input_row, output_buf, |
++ num_rows); |
++ break; |
++ case JCS_EXT_BGRX: |
++ case JCS_EXT_BGRA: |
++ gray_extbgrx_convert_internal(cinfo, input_buf, input_row, output_buf, |
++ num_rows); |
++ break; |
++ case JCS_EXT_XBGR: |
++ case JCS_EXT_ABGR: |
++ gray_extxbgr_convert_internal(cinfo, input_buf, input_row, output_buf, |
++ num_rows); |
++ break; |
++ case JCS_EXT_XRGB: |
++ case JCS_EXT_ARGB: |
++ gray_extxrgb_convert_internal(cinfo, input_buf, input_row, output_buf, |
++ num_rows); |
++ break; |
++ default: |
++ gray_rgb_convert_internal(cinfo, input_buf, input_row, output_buf, |
++ num_rows); |
++ break; |
++ } |
++} |
+ |
+- while (--num_rows >= 0) { |
+- inptr = input_buf[0][input_row++]; |
+- outptr = *output_buf++; |
+- for (col = 0; col < num_cols; col++) { |
+- /* We can dispense with GETJSAMPLE() here */ |
+- outptr[rgb_red[cinfo->out_color_space]] = |
+- outptr[rgb_green[cinfo->out_color_space]] = |
+- outptr[rgb_blue[cinfo->out_color_space]] = inptr[col]; |
+- outptr += rgb_pixelsize[cinfo->out_color_space]; |
+- } |
++ |
++/* |
++ * Convert plain RGB to extended RGB |
++ */ |
++ |
++METHODDEF(void) |
++rgb_rgb_convert (j_decompress_ptr cinfo, |
++ JSAMPIMAGE input_buf, JDIMENSION input_row, |
++ JSAMPARRAY output_buf, int num_rows) |
++{ |
++ switch (cinfo->out_color_space) { |
++ case JCS_EXT_RGB: |
++ rgb_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf, |
++ num_rows); |
++ break; |
++ case JCS_EXT_RGBX: |
++ case JCS_EXT_RGBA: |
++ rgb_extrgbx_convert_internal(cinfo, input_buf, input_row, output_buf, |
++ num_rows); |
++ break; |
++ case JCS_EXT_BGR: |
++ rgb_extbgr_convert_internal(cinfo, input_buf, input_row, output_buf, |
++ num_rows); |
++ break; |
++ case JCS_EXT_BGRX: |
++ case JCS_EXT_BGRA: |
++ rgb_extbgrx_convert_internal(cinfo, input_buf, input_row, output_buf, |
++ num_rows); |
++ break; |
++ case JCS_EXT_XBGR: |
++ case JCS_EXT_ABGR: |
++ rgb_extxbgr_convert_internal(cinfo, input_buf, input_row, output_buf, |
++ num_rows); |
++ break; |
++ case JCS_EXT_XRGB: |
++ case JCS_EXT_ARGB: |
++ rgb_extxrgb_convert_internal(cinfo, input_buf, input_row, output_buf, |
++ num_rows); |
++ break; |
++ default: |
++ rgb_rgb_convert_internal(cinfo, input_buf, input_row, output_buf, |
++ num_rows); |
++ break; |
+ } |
+ } |
+ |
+@@ -356,6 +609,9 @@ |
+ /* For color->grayscale conversion, only the Y (0) component is needed */ |
+ for (ci = 1; ci < cinfo->num_components; ci++) |
+ cinfo->comp_info[ci].component_needed = FALSE; |
++ } else if (cinfo->jpeg_color_space == JCS_RGB) { |
++ cconvert->pub.color_convert = rgb_gray_convert; |
++ build_rgb_y_table(cinfo); |
+ } else |
+ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); |
+ break; |
+@@ -367,6 +623,10 @@ |
+ case JCS_EXT_BGRX: |
+ case JCS_EXT_XBGR: |
+ case JCS_EXT_XRGB: |
++ case JCS_EXT_RGBA: |
++ case JCS_EXT_BGRA: |
++ case JCS_EXT_ABGR: |
++ case JCS_EXT_ARGB: |
+ cinfo->out_color_components = rgb_pixelsize[cinfo->out_color_space]; |
+ if (cinfo->jpeg_color_space == JCS_YCbCr) { |
+ if (jsimd_can_ycc_rgb()) |
+@@ -377,9 +637,14 @@ |
+ } |
+ } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) { |
+ cconvert->pub.color_convert = gray_rgb_convert; |
+- } else if (cinfo->jpeg_color_space == cinfo->out_color_space && |
+- rgb_pixelsize[cinfo->out_color_space] == 3) { |
+- cconvert->pub.color_convert = null_convert; |
++ } else if (cinfo->jpeg_color_space == JCS_RGB) { |
++ if (rgb_red[cinfo->out_color_space] == 0 && |
++ rgb_green[cinfo->out_color_space] == 1 && |
++ rgb_blue[cinfo->out_color_space] == 2 && |
++ rgb_pixelsize[cinfo->out_color_space] == 3) |
++ cconvert->pub.color_convert = null_convert; |
++ else |
++ cconvert->pub.color_convert = rgb_rgb_convert; |
+ } else |
+ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL); |
+ break; |
+Index: jdct.h |
+=================================================================== |
+--- jdct.h (revision 829) |
++++ jdct.h (working copy) |
+@@ -95,9 +95,21 @@ |
+ #define jpeg_idct_islow jRDislow |
+ #define jpeg_idct_ifast jRDifast |
+ #define jpeg_idct_float jRDfloat |
++#define jpeg_idct_7x7 jRD7x7 |
++#define jpeg_idct_6x6 jRD6x6 |
++#define jpeg_idct_5x5 jRD5x5 |
+ #define jpeg_idct_4x4 jRD4x4 |
++#define jpeg_idct_3x3 jRD3x3 |
+ #define jpeg_idct_2x2 jRD2x2 |
+ #define jpeg_idct_1x1 jRD1x1 |
++#define jpeg_idct_9x9 jRD9x9 |
++#define jpeg_idct_10x10 jRD10x10 |
++#define jpeg_idct_11x11 jRD11x11 |
++#define jpeg_idct_12x12 jRD12x12 |
++#define jpeg_idct_13x13 jRD13x13 |
++#define jpeg_idct_14x14 jRD14x14 |
++#define jpeg_idct_15x15 jRD15x15 |
++#define jpeg_idct_16x16 jRD16x16 |
+ #endif /* NEED_SHORT_EXTERNAL_NAMES */ |
+ |
+ /* Extern declarations for the forward and inverse DCT routines. */ |
+@@ -115,9 +127,21 @@ |
+ EXTERN(void) jpeg_idct_float |
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, |
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); |
++EXTERN(void) jpeg_idct_7x7 |
++ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, |
++ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); |
++EXTERN(void) jpeg_idct_6x6 |
++ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, |
++ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); |
++EXTERN(void) jpeg_idct_5x5 |
++ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, |
++ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); |
+ EXTERN(void) jpeg_idct_4x4 |
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, |
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); |
++EXTERN(void) jpeg_idct_3x3 |
++ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, |
++ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); |
+ EXTERN(void) jpeg_idct_2x2 |
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, |
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); |
+@@ -124,6 +148,30 @@ |
+ EXTERN(void) jpeg_idct_1x1 |
+ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, |
+ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); |
++EXTERN(void) jpeg_idct_9x9 |
++ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, |
++ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); |
++EXTERN(void) jpeg_idct_10x10 |
++ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, |
++ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); |
++EXTERN(void) jpeg_idct_11x11 |
++ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, |
++ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); |
++EXTERN(void) jpeg_idct_12x12 |
++ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, |
++ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); |
++EXTERN(void) jpeg_idct_13x13 |
++ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, |
++ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); |
++EXTERN(void) jpeg_idct_14x14 |
++ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, |
++ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); |
++EXTERN(void) jpeg_idct_15x15 |
++ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, |
++ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); |
++EXTERN(void) jpeg_idct_16x16 |
++ JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, |
++ JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); |
+ |
+ |
+ /* |
+Index: jddctmgr.c |
+=================================================================== |
+--- jddctmgr.c (revision 829) |
++++ jddctmgr.c (working copy) |
+@@ -1,9 +1,12 @@ |
+ /* |
+ * jddctmgr.c |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1994-1996, Thomas G. Lane. |
++ * Modified 2002-2010 by Guido Vollbeding. |
++ * libjpeg-turbo Modifications: |
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
+- * This file is part of the Independent JPEG Group's software. |
++ * Copyright (C) 2010, D. R. Commander. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains the inverse-DCT management logic. |
+@@ -21,6 +24,7 @@ |
+ #include "jpeglib.h" |
+ #include "jdct.h" /* Private declarations for DCT subsystem */ |
+ #include "jsimddct.h" |
++#include "jpegcomp.h" |
+ |
+ |
+ /* |
+@@ -100,7 +104,7 @@ |
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; |
+ ci++, compptr++) { |
+ /* Select the proper IDCT routine for this component's scaling */ |
+- switch (compptr->DCT_scaled_size) { |
++ switch (compptr->_DCT_scaled_size) { |
+ #ifdef IDCT_SCALING_SUPPORTED |
+ case 1: |
+ method_ptr = jpeg_idct_1x1; |
+@@ -113,6 +117,10 @@ |
+ method_ptr = jpeg_idct_2x2; |
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */ |
+ break; |
++ case 3: |
++ method_ptr = jpeg_idct_3x3; |
++ method = JDCT_ISLOW; /* jidctint uses islow-style table */ |
++ break; |
+ case 4: |
+ if (jsimd_can_idct_4x4()) |
+ method_ptr = jsimd_idct_4x4; |
+@@ -120,6 +128,18 @@ |
+ method_ptr = jpeg_idct_4x4; |
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */ |
+ break; |
++ case 5: |
++ method_ptr = jpeg_idct_5x5; |
++ method = JDCT_ISLOW; /* jidctint uses islow-style table */ |
++ break; |
++ case 6: |
++ method_ptr = jpeg_idct_6x6; |
++ method = JDCT_ISLOW; /* jidctint uses islow-style table */ |
++ break; |
++ case 7: |
++ method_ptr = jpeg_idct_7x7; |
++ method = JDCT_ISLOW; /* jidctint uses islow-style table */ |
++ break; |
+ #endif |
+ case DCTSIZE: |
+ switch (cinfo->dct_method) { |
+@@ -155,8 +175,40 @@ |
+ break; |
+ } |
+ break; |
++ case 9: |
++ method_ptr = jpeg_idct_9x9; |
++ method = JDCT_ISLOW; /* jidctint uses islow-style table */ |
++ break; |
++ case 10: |
++ method_ptr = jpeg_idct_10x10; |
++ method = JDCT_ISLOW; /* jidctint uses islow-style table */ |
++ break; |
++ case 11: |
++ method_ptr = jpeg_idct_11x11; |
++ method = JDCT_ISLOW; /* jidctint uses islow-style table */ |
++ break; |
++ case 12: |
++ method_ptr = jpeg_idct_12x12; |
++ method = JDCT_ISLOW; /* jidctint uses islow-style table */ |
++ break; |
++ case 13: |
++ method_ptr = jpeg_idct_13x13; |
++ method = JDCT_ISLOW; /* jidctint uses islow-style table */ |
++ break; |
++ case 14: |
++ method_ptr = jpeg_idct_14x14; |
++ method = JDCT_ISLOW; /* jidctint uses islow-style table */ |
++ break; |
++ case 15: |
++ method_ptr = jpeg_idct_15x15; |
++ method = JDCT_ISLOW; /* jidctint uses islow-style table */ |
++ break; |
++ case 16: |
++ method_ptr = jpeg_idct_16x16; |
++ method = JDCT_ISLOW; /* jidctint uses islow-style table */ |
++ break; |
+ default: |
+- ERREXIT1(cinfo, JERR_BAD_DCTSIZE, compptr->DCT_scaled_size); |
++ ERREXIT1(cinfo, JERR_BAD_DCTSIZE, compptr->_DCT_scaled_size); |
+ break; |
+ } |
+ idct->pub.inverse_DCT[ci] = method_ptr; |
+Index: jdhuff.c |
+=================================================================== |
+--- jdhuff.c (revision 829) |
++++ jdhuff.c (working copy) |
+@@ -1,8 +1,10 @@ |
+ /* |
+ * jdhuff.c |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1991-1997, Thomas G. Lane. |
+- * This file is part of the Independent JPEG Group's software. |
++ * libjpeg-turbo Modifications: |
++ * Copyright (C) 2009-2011, 2015, D. R. Commander. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains Huffman entropy decoding routines. |
+@@ -18,6 +20,7 @@ |
+ #include "jinclude.h" |
+ #include "jpeglib.h" |
+ #include "jdhuff.h" /* Declarations shared with jdphuff.c */ |
++#include "jpegcomp.h" |
+ |
+ |
+ /* |
+@@ -122,7 +125,7 @@ |
+ if (compptr->component_needed) { |
+ entropy->dc_needed[blkn] = TRUE; |
+ /* we don't need the ACs if producing a 1/8th-size image */ |
+- entropy->ac_needed[blkn] = (compptr->DCT_scaled_size > 1); |
++ entropy->ac_needed[blkn] = (compptr->_DCT_scaled_size > 1); |
+ } else { |
+ entropy->dc_needed[blkn] = entropy->ac_needed[blkn] = FALSE; |
+ } |
+@@ -225,6 +228,7 @@ |
+ dtbl->maxcode[l] = -1; /* -1 if no codes of this length */ |
+ } |
+ } |
++ dtbl->valoffset[17] = 0; |
+ dtbl->maxcode[17] = 0xFFFFFL; /* ensures jpeg_huff_decode terminates */ |
+ |
+ /* Compute lookahead tables to speed up decoding. |
+@@ -234,7 +238,8 @@ |
+ * with that code. |
+ */ |
+ |
+- MEMZERO(dtbl->look_nbits, SIZEOF(dtbl->look_nbits)); |
++ for (i = 0; i < (1 << HUFF_LOOKAHEAD); i++) |
++ dtbl->lookup[i] = (HUFF_LOOKAHEAD + 1) << HUFF_LOOKAHEAD; |
+ |
+ p = 0; |
+ for (l = 1; l <= HUFF_LOOKAHEAD; l++) { |
+@@ -243,8 +248,7 @@ |
+ /* Generate left-justified code followed by all possible bit sequences */ |
+ lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l); |
+ for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) { |
+- dtbl->look_nbits[lookbits] = l; |
+- dtbl->look_sym[lookbits] = htbl->huffval[p]; |
++ dtbl->lookup[lookbits] = (l << HUFF_LOOKAHEAD) | htbl->huffval[p]; |
+ lookbits++; |
+ } |
+ } |
+@@ -389,6 +393,50 @@ |
+ } |
+ |
+ |
++/* Macro version of the above, which performs much better but does not |
++ handle markers. We have to hand off any blocks with markers to the |
++ slower routines. */ |
++ |
++#define GET_BYTE \ |
++{ \ |
++ register int c0, c1; \ |
++ c0 = GETJOCTET(*buffer++); \ |
++ c1 = GETJOCTET(*buffer); \ |
++ /* Pre-execute most common case */ \ |
++ get_buffer = (get_buffer << 8) | c0; \ |
++ bits_left += 8; \ |
++ if (c0 == 0xFF) { \ |
++ /* Pre-execute case of FF/00, which represents an FF data byte */ \ |
++ buffer++; \ |
++ if (c1 != 0) { \ |
++ /* Oops, it's actually a marker indicating end of compressed data. */ \ |
++ cinfo->unread_marker = c1; \ |
++ /* Back out pre-execution and fill the buffer with zero bits */ \ |
++ buffer -= 2; \ |
++ get_buffer &= ~0xFF; \ |
++ } \ |
++ } \ |
++} |
++ |
++#if __WORDSIZE == 64 || defined(_WIN64) |
++ |
++/* Pre-fetch 48 bytes, because the holding register is 64-bit */ |
++#define FILL_BIT_BUFFER_FAST \ |
++ if (bits_left < 16) { \ |
++ GET_BYTE GET_BYTE GET_BYTE GET_BYTE GET_BYTE GET_BYTE \ |
++ } |
++ |
++#else |
++ |
++/* Pre-fetch 16 bytes, because the holding register is 32-bit */ |
++#define FILL_BIT_BUFFER_FAST \ |
++ if (bits_left < 16) { \ |
++ GET_BYTE GET_BYTE \ |
++ } |
++ |
++#endif |
++ |
++ |
+ /* |
+ * Out-of-line code for Huffman code decoding. |
+ * See jdhuff.h for info about usage. |
+@@ -438,9 +486,10 @@ |
+ * On some machines, a shift and add will be faster than a table lookup. |
+ */ |
+ |
++#define AVOID_TABLES |
+ #ifdef AVOID_TABLES |
+ |
+-#define HUFF_EXTEND(x,s) ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x)) |
++#define HUFF_EXTEND(x,s) ((x) + ((((x) - (1<<((s)-1))) >> 31) & (((-1)<<(s)) + 1))) |
+ |
+ #else |
+ |
+@@ -498,6 +547,191 @@ |
+ } |
+ |
+ |
++LOCAL(boolean) |
++decode_mcu_slow (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) |
++{ |
++ huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; |
++ BITREAD_STATE_VARS; |
++ int blkn; |
++ savable_state state; |
++ /* Outer loop handles each block in the MCU */ |
++ |
++ /* Load up working state */ |
++ BITREAD_LOAD_STATE(cinfo,entropy->bitstate); |
++ ASSIGN_STATE(state, entropy->saved); |
++ |
++ for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { |
++ JBLOCKROW block = MCU_data ? MCU_data[blkn] : NULL; |
++ d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn]; |
++ d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn]; |
++ register int s, k, r; |
++ |
++ /* Decode a single block's worth of coefficients */ |
++ |
++ /* Section F.2.2.1: decode the DC coefficient difference */ |
++ HUFF_DECODE(s, br_state, dctbl, return FALSE, label1); |
++ if (s) { |
++ CHECK_BIT_BUFFER(br_state, s, return FALSE); |
++ r = GET_BITS(s); |
++ s = HUFF_EXTEND(r, s); |
++ } |
++ |
++ if (entropy->dc_needed[blkn]) { |
++ /* Convert DC difference to actual value, update last_dc_val */ |
++ int ci = cinfo->MCU_membership[blkn]; |
++ s += state.last_dc_val[ci]; |
++ state.last_dc_val[ci] = s; |
++ if (block) { |
++ /* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */ |
++ (*block)[0] = (JCOEF) s; |
++ } |
++ } |
++ |
++ if (entropy->ac_needed[blkn] && block) { |
++ |
++ /* Section F.2.2.2: decode the AC coefficients */ |
++ /* Since zeroes are skipped, output area must be cleared beforehand */ |
++ for (k = 1; k < DCTSIZE2; k++) { |
++ HUFF_DECODE(s, br_state, actbl, return FALSE, label2); |
++ |
++ r = s >> 4; |
++ s &= 15; |
++ |
++ if (s) { |
++ k += r; |
++ CHECK_BIT_BUFFER(br_state, s, return FALSE); |
++ r = GET_BITS(s); |
++ s = HUFF_EXTEND(r, s); |
++ /* Output coefficient in natural (dezigzagged) order. |
++ * Note: the extra entries in jpeg_natural_order[] will save us |
++ * if k >= DCTSIZE2, which could happen if the data is corrupted. |
++ */ |
++ (*block)[jpeg_natural_order[k]] = (JCOEF) s; |
++ } else { |
++ if (r != 15) |
++ break; |
++ k += 15; |
++ } |
++ } |
++ |
++ } else { |
++ |
++ /* Section F.2.2.2: decode the AC coefficients */ |
++ /* In this path we just discard the values */ |
++ for (k = 1; k < DCTSIZE2; k++) { |
++ HUFF_DECODE(s, br_state, actbl, return FALSE, label3); |
++ |
++ r = s >> 4; |
++ s &= 15; |
++ |
++ if (s) { |
++ k += r; |
++ CHECK_BIT_BUFFER(br_state, s, return FALSE); |
++ DROP_BITS(s); |
++ } else { |
++ if (r != 15) |
++ break; |
++ k += 15; |
++ } |
++ } |
++ } |
++ } |
++ |
++ /* Completed MCU, so update state */ |
++ BITREAD_SAVE_STATE(cinfo,entropy->bitstate); |
++ ASSIGN_STATE(entropy->saved, state); |
++ return TRUE; |
++} |
++ |
++ |
++LOCAL(boolean) |
++decode_mcu_fast (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) |
++{ |
++ huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; |
++ BITREAD_STATE_VARS; |
++ JOCTET *buffer; |
++ int blkn; |
++ savable_state state; |
++ /* Outer loop handles each block in the MCU */ |
++ |
++ /* Load up working state */ |
++ BITREAD_LOAD_STATE(cinfo,entropy->bitstate); |
++ buffer = (JOCTET *) br_state.next_input_byte; |
++ ASSIGN_STATE(state, entropy->saved); |
++ |
++ for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { |
++ JBLOCKROW block = MCU_data[blkn]; |
++ d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn]; |
++ d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn]; |
++ register int s, k, r, l; |
++ |
++ HUFF_DECODE_FAST(s, l, dctbl, slow_decode_mcu); |
++ if (s) { |
++ FILL_BIT_BUFFER_FAST |
++ r = GET_BITS(s); |
++ s = HUFF_EXTEND(r, s); |
++ } |
++ |
++ if (entropy->dc_needed[blkn]) { |
++ int ci = cinfo->MCU_membership[blkn]; |
++ s += state.last_dc_val[ci]; |
++ state.last_dc_val[ci] = s; |
++ if (block) |
++ (*block)[0] = (JCOEF) s; |
++ } |
++ |
++ if (entropy->ac_needed[blkn] && block) { |
++ |
++ for (k = 1; k < DCTSIZE2; k++) { |
++ HUFF_DECODE_FAST(s, l, actbl, slow_decode_mcu); |
++ r = s >> 4; |
++ s &= 15; |
++ |
++ if (s) { |
++ k += r; |
++ FILL_BIT_BUFFER_FAST |
++ r = GET_BITS(s); |
++ s = HUFF_EXTEND(r, s); |
++ (*block)[jpeg_natural_order[k]] = (JCOEF) s; |
++ } else { |
++ if (r != 15) break; |
++ k += 15; |
++ } |
++ } |
++ |
++ } else { |
++ |
++ for (k = 1; k < DCTSIZE2; k++) { |
++ HUFF_DECODE_FAST(s, l, actbl, slow_decode_mcu); |
++ r = s >> 4; |
++ s &= 15; |
++ |
++ if (s) { |
++ k += r; |
++ FILL_BIT_BUFFER_FAST |
++ DROP_BITS(s); |
++ } else { |
++ if (r != 15) break; |
++ k += 15; |
++ } |
++ } |
++ } |
++ } |
++ |
++ if (cinfo->unread_marker != 0) { |
++slow_decode_mcu: |
++ cinfo->unread_marker = 0; |
++ return FALSE; |
++ } |
++ |
++ br_state.bytes_in_buffer -= (buffer - br_state.next_input_byte); |
++ br_state.next_input_byte = buffer; |
++ BITREAD_SAVE_STATE(cinfo,entropy->bitstate); |
++ ASSIGN_STATE(entropy->saved, state); |
++ return TRUE; |
++} |
++ |
++ |
+ /* |
+ * Decode and return one MCU's worth of Huffman-compressed coefficients. |
+ * The coefficients are reordered from zigzag order into natural array order, |
+@@ -513,13 +747,13 @@ |
+ * this module, since we'll just re-assign them on the next call.) |
+ */ |
+ |
++#define BUFSIZE (DCTSIZE2 * 2u) |
++ |
+ METHODDEF(boolean) |
+ decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) |
+ { |
+ huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; |
+- int blkn; |
+- BITREAD_STATE_VARS; |
+- savable_state state; |
++ int usefast = 1; |
+ |
+ /* Process restart marker if needed; may have to suspend */ |
+ if (cinfo->restart_interval) { |
+@@ -526,98 +760,26 @@ |
+ if (entropy->restarts_to_go == 0) |
+ if (! process_restart(cinfo)) |
+ return FALSE; |
++ usefast = 0; |
+ } |
+ |
++ if (cinfo->src->bytes_in_buffer < BUFSIZE * (size_t)cinfo->blocks_in_MCU |
++ || cinfo->unread_marker != 0) |
++ usefast = 0; |
++ |
+ /* If we've run out of data, just leave the MCU set to zeroes. |
+ * This way, we return uniform gray for the remainder of the segment. |
+ */ |
+ if (! entropy->pub.insufficient_data) { |
+ |
+- /* Load up working state */ |
+- BITREAD_LOAD_STATE(cinfo,entropy->bitstate); |
+- ASSIGN_STATE(state, entropy->saved); |
+- |
+- /* Outer loop handles each block in the MCU */ |
+- |
+- for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { |
+- JBLOCKROW block = MCU_data[blkn]; |
+- d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn]; |
+- d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn]; |
+- register int s, k, r; |
+- |
+- /* Decode a single block's worth of coefficients */ |
+- |
+- /* Section F.2.2.1: decode the DC coefficient difference */ |
+- HUFF_DECODE(s, br_state, dctbl, return FALSE, label1); |
+- if (s) { |
+- CHECK_BIT_BUFFER(br_state, s, return FALSE); |
+- r = GET_BITS(s); |
+- s = HUFF_EXTEND(r, s); |
+- } |
+- |
+- if (entropy->dc_needed[blkn]) { |
+- /* Convert DC difference to actual value, update last_dc_val */ |
+- int ci = cinfo->MCU_membership[blkn]; |
+- s += state.last_dc_val[ci]; |
+- state.last_dc_val[ci] = s; |
+- /* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */ |
+- (*block)[0] = (JCOEF) s; |
+- } |
+- |
+- if (entropy->ac_needed[blkn]) { |
+- |
+- /* Section F.2.2.2: decode the AC coefficients */ |
+- /* Since zeroes are skipped, output area must be cleared beforehand */ |
+- for (k = 1; k < DCTSIZE2; k++) { |
+- HUFF_DECODE(s, br_state, actbl, return FALSE, label2); |
+- |
+- r = s >> 4; |
+- s &= 15; |
+- |
+- if (s) { |
+- k += r; |
+- CHECK_BIT_BUFFER(br_state, s, return FALSE); |
+- r = GET_BITS(s); |
+- s = HUFF_EXTEND(r, s); |
+- /* Output coefficient in natural (dezigzagged) order. |
+- * Note: the extra entries in jpeg_natural_order[] will save us |
+- * if k >= DCTSIZE2, which could happen if the data is corrupted. |
+- */ |
+- (*block)[jpeg_natural_order[k]] = (JCOEF) s; |
+- } else { |
+- if (r != 15) |
+- break; |
+- k += 15; |
+- } |
+- } |
+- |
+- } else { |
+- |
+- /* Section F.2.2.2: decode the AC coefficients */ |
+- /* In this path we just discard the values */ |
+- for (k = 1; k < DCTSIZE2; k++) { |
+- HUFF_DECODE(s, br_state, actbl, return FALSE, label3); |
+- |
+- r = s >> 4; |
+- s &= 15; |
+- |
+- if (s) { |
+- k += r; |
+- CHECK_BIT_BUFFER(br_state, s, return FALSE); |
+- DROP_BITS(s); |
+- } else { |
+- if (r != 15) |
+- break; |
+- k += 15; |
+- } |
+- } |
+- |
+- } |
++ if (usefast) { |
++ if (!decode_mcu_fast(cinfo, MCU_data)) goto use_slow; |
+ } |
++ else { |
++ use_slow: |
++ if (!decode_mcu_slow(cinfo, MCU_data)) return FALSE; |
++ } |
+ |
+- /* Completed MCU, so update state */ |
+- BITREAD_SAVE_STATE(cinfo,entropy->bitstate); |
+- ASSIGN_STATE(entropy->saved, state); |
+ } |
+ |
+ /* Account for restart interval (no-op if not using restarts) */ |
+Index: jdhuff.h |
+=================================================================== |
+--- jdhuff.h (revision 829) |
++++ jdhuff.h (working copy) |
+@@ -1,8 +1,10 @@ |
+ /* |
+ * jdhuff.h |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1991-1997, Thomas G. Lane. |
+- * This file is part of the Independent JPEG Group's software. |
++ * Modifications: |
++ * Copyright (C) 2010-2011, D. R. Commander. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains declarations for Huffman entropy decoding routines |
+@@ -27,7 +29,7 @@ |
+ /* Basic tables: (element [0] of each array is unused) */ |
+ INT32 maxcode[18]; /* largest code of length k (-1 if none) */ |
+ /* (maxcode[17] is a sentinel to ensure jpeg_huff_decode terminates) */ |
+- INT32 valoffset[17]; /* huffval[] offset for codes of length k */ |
++ INT32 valoffset[18]; /* huffval[] offset for codes of length k */ |
+ /* valoffset[k] = huffval[] index of 1st symbol of code length k, less |
+ * the smallest code of length k; so given a code of length k, the |
+ * corresponding symbol is huffval[code + valoffset[k]] |
+@@ -36,13 +38,17 @@ |
+ /* Link to public Huffman table (needed only in jpeg_huff_decode) */ |
+ JHUFF_TBL *pub; |
+ |
+- /* Lookahead tables: indexed by the next HUFF_LOOKAHEAD bits of |
++ /* Lookahead table: indexed by the next HUFF_LOOKAHEAD bits of |
+ * the input data stream. If the next Huffman code is no more |
+ * than HUFF_LOOKAHEAD bits long, we can obtain its length and |
+- * the corresponding symbol directly from these tables. |
++ * the corresponding symbol directly from this tables. |
++ * |
++ * The lower 8 bits of each table entry contain the number of |
++ * bits in the corresponding Huffman code, or HUFF_LOOKAHEAD + 1 |
++ * if too long. The next 8 bits of each entry contain the |
++ * symbol. |
+ */ |
+- int look_nbits[1<<HUFF_LOOKAHEAD]; /* # bits, or 0 if too long */ |
+- UINT8 look_sym[1<<HUFF_LOOKAHEAD]; /* symbol, or unused */ |
++ int lookup[1<<HUFF_LOOKAHEAD]; |
+ } d_derived_tbl; |
+ |
+ /* Expand a Huffman table definition into the derived format */ |
+@@ -69,9 +75,18 @@ |
+ * necessary. |
+ */ |
+ |
++#if __WORDSIZE == 64 || defined(_WIN64) |
++ |
++typedef size_t bit_buf_type; /* type of bit-extraction buffer */ |
++#define BIT_BUF_SIZE 64 /* size of buffer in bits */ |
++ |
++#else |
++ |
+ typedef INT32 bit_buf_type; /* type of bit-extraction buffer */ |
+-#define BIT_BUF_SIZE 32 /* size of buffer in bits */ |
++#define BIT_BUF_SIZE 32 /* size of buffer in bits */ |
+ |
++#endif |
++ |
+ /* If long is > 32 bits on your machine, and shifting/masking longs is |
+ * reasonably fast, making bit_buf_type be long and setting BIT_BUF_SIZE |
+ * appropriately should be a win. Unfortunately we can't define the size |
+@@ -183,11 +198,10 @@ |
+ } \ |
+ } \ |
+ look = PEEK_BITS(HUFF_LOOKAHEAD); \ |
+- if ((nb = htbl->look_nbits[look]) != 0) { \ |
++ if ((nb = (htbl->lookup[look] >> HUFF_LOOKAHEAD)) <= HUFF_LOOKAHEAD) { \ |
+ DROP_BITS(nb); \ |
+- result = htbl->look_sym[look]; \ |
++ result = htbl->lookup[look] & ((1 << HUFF_LOOKAHEAD) - 1); \ |
+ } else { \ |
+- nb = HUFF_LOOKAHEAD+1; \ |
+ slowlabel: \ |
+ if ((result=jpeg_huff_decode(&state,get_buffer,bits_left,htbl,nb)) < 0) \ |
+ { failaction; } \ |
+@@ -195,6 +209,28 @@ |
+ } \ |
+ } |
+ |
++#define HUFF_DECODE_FAST(s,nb,htbl,slowlabel) \ |
++ FILL_BIT_BUFFER_FAST; \ |
++ s = PEEK_BITS(HUFF_LOOKAHEAD); \ |
++ s = htbl->lookup[s]; \ |
++ nb = s >> HUFF_LOOKAHEAD; \ |
++ /* Pre-execute the common case of nb <= HUFF_LOOKAHEAD */ \ |
++ DROP_BITS(nb); \ |
++ s = s & ((1 << HUFF_LOOKAHEAD) - 1); \ |
++ if (nb > HUFF_LOOKAHEAD) { \ |
++ /* Equivalent of jpeg_huff_decode() */ \ |
++ /* Don't use GET_BITS() here because we don't want to modify bits_left */ \ |
++ s = (get_buffer >> bits_left) & ((1 << (nb)) - 1); \ |
++ while (s > htbl->maxcode[nb]) { \ |
++ s <<= 1; \ |
++ s |= GET_BITS(1); \ |
++ nb++; \ |
++ } \ |
++ if (nb > 16) \ |
++ goto slowlabel; \ |
++ s = htbl->pub->huffval[ (int) (s + htbl->valoffset[nb]) ]; \ |
++ } |
++ |
+ /* Out-of-line case for Huffman code fetching */ |
+ EXTERN(int) jpeg_huff_decode |
+ JPP((bitread_working_state * state, register bit_buf_type get_buffer, |
+Index: jdinput.c |
+=================================================================== |
+--- jdinput.c (revision 829) |
++++ jdinput.c (working copy) |
+@@ -1,8 +1,10 @@ |
+ /* |
+ * jdinput.c |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1991-1997, Thomas G. Lane. |
+- * This file is part of the Independent JPEG Group's software. |
++ * libjpeg-turbo Modifications: |
++ * Copyright (C) 2010, D. R. Commander. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains input control logic for the JPEG decompressor. |
+@@ -14,6 +16,7 @@ |
+ #define JPEG_INTERNALS |
+ #include "jinclude.h" |
+ #include "jpeglib.h" |
++#include "jpegcomp.h" |
+ |
+ |
+ /* Private state */ |
+@@ -70,16 +73,30 @@ |
+ compptr->v_samp_factor); |
+ } |
+ |
++#if JPEG_LIB_VERSION >=80 |
++ cinfo->block_size = DCTSIZE; |
++ cinfo->natural_order = jpeg_natural_order; |
++ cinfo->lim_Se = DCTSIZE2-1; |
++#endif |
++ |
+ /* We initialize DCT_scaled_size and min_DCT_scaled_size to DCTSIZE. |
+ * In the full decompressor, this will be overridden by jdmaster.c; |
+ * but in the transcoder, jdmaster.c is not used, so we must do it here. |
+ */ |
++#if JPEG_LIB_VERSION >= 70 |
++ cinfo->min_DCT_h_scaled_size = cinfo->min_DCT_v_scaled_size = DCTSIZE; |
++#else |
+ cinfo->min_DCT_scaled_size = DCTSIZE; |
++#endif |
+ |
+ /* Compute dimensions of components */ |
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; |
+ ci++, compptr++) { |
++#if JPEG_LIB_VERSION >= 70 |
++ compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = DCTSIZE; |
++#else |
+ compptr->DCT_scaled_size = DCTSIZE; |
++#endif |
+ /* Size in DCT blocks */ |
+ compptr->width_in_blocks = (JDIMENSION) |
+ jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor, |
+@@ -138,7 +155,7 @@ |
+ compptr->MCU_width = 1; |
+ compptr->MCU_height = 1; |
+ compptr->MCU_blocks = 1; |
+- compptr->MCU_sample_width = compptr->DCT_scaled_size; |
++ compptr->MCU_sample_width = compptr->_DCT_scaled_size; |
+ compptr->last_col_width = 1; |
+ /* For noninterleaved scans, it is convenient to define last_row_height |
+ * as the number of block rows present in the last iMCU row. |
+@@ -174,7 +191,7 @@ |
+ compptr->MCU_width = compptr->h_samp_factor; |
+ compptr->MCU_height = compptr->v_samp_factor; |
+ compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height; |
+- compptr->MCU_sample_width = compptr->MCU_width * compptr->DCT_scaled_size; |
++ compptr->MCU_sample_width = compptr->MCU_width * compptr->_DCT_scaled_size; |
+ /* Figure number of non-dummy blocks in last MCU column & row */ |
+ tmp = (int) (compptr->width_in_blocks % compptr->MCU_width); |
+ if (tmp == 0) tmp = compptr->MCU_width; |
+Index: jdmainct.c |
+=================================================================== |
+--- jdmainct.c (revision 829) |
++++ jdmainct.c (working copy) |
+@@ -1,8 +1,10 @@ |
+ /* |
+ * jdmainct.c |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1994-1996, Thomas G. Lane. |
+- * This file is part of the Independent JPEG Group's software. |
++ * libjpeg-turbo Modifications: |
++ * Copyright (C) 2010, D. R. Commander. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains the main buffer controller for decompression. |
+@@ -13,9 +15,7 @@ |
+ * supplies the equivalent of the main buffer in that case. |
+ */ |
+ |
+-#define JPEG_INTERNALS |
+-#include "jinclude.h" |
+-#include "jpeglib.h" |
++#include "jdmainct.h" |
+ |
+ |
+ /* |
+@@ -109,36 +109,6 @@ |
+ */ |
+ |
+ |
+-/* Private buffer controller object */ |
+- |
+-typedef struct { |
+- struct jpeg_d_main_controller pub; /* public fields */ |
+- |
+- /* Pointer to allocated workspace (M or M+2 row groups). */ |
+- JSAMPARRAY buffer[MAX_COMPONENTS]; |
+- |
+- boolean buffer_full; /* Have we gotten an iMCU row from decoder? */ |
+- JDIMENSION rowgroup_ctr; /* counts row groups output to postprocessor */ |
+- |
+- /* Remaining fields are only used in the context case. */ |
+- |
+- /* These are the master pointers to the funny-order pointer lists. */ |
+- JSAMPIMAGE xbuffer[2]; /* pointers to weird pointer lists */ |
+- |
+- int whichptr; /* indicates which pointer set is now in use */ |
+- int context_state; /* process_data state machine status */ |
+- JDIMENSION rowgroups_avail; /* row groups available to postprocessor */ |
+- JDIMENSION iMCU_row_ctr; /* counts iMCU rows to detect image top/bot */ |
+-} my_main_controller; |
+- |
+-typedef my_main_controller * my_main_ptr; |
+- |
+-/* context_state values: */ |
+-#define CTX_PREPARE_FOR_IMCU 0 /* need to prepare for MCU row */ |
+-#define CTX_PROCESS_IMCU 1 /* feeding iMCU to postprocessor */ |
+-#define CTX_POSTPONED_ROW 2 /* feeding postponed row group */ |
+- |
+- |
+ /* Forward declarations */ |
+ METHODDEF(void) process_data_simple_main |
+ JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf, |
+@@ -159,9 +129,9 @@ |
+ * This is done only once, not once per pass. |
+ */ |
+ { |
+- my_main_ptr main = (my_main_ptr) cinfo->main; |
++ my_main_ptr main_ptr = (my_main_ptr) cinfo->main; |
+ int ci, rgroup; |
+- int M = cinfo->min_DCT_scaled_size; |
++ int M = cinfo->_min_DCT_scaled_size; |
+ jpeg_component_info *compptr; |
+ JSAMPARRAY xbuf; |
+ |
+@@ -168,15 +138,15 @@ |
+ /* Get top-level space for component array pointers. |
+ * We alloc both arrays with one call to save a few cycles. |
+ */ |
+- main->xbuffer[0] = (JSAMPIMAGE) |
++ main_ptr->xbuffer[0] = (JSAMPIMAGE) |
+ (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, |
+ cinfo->num_components * 2 * SIZEOF(JSAMPARRAY)); |
+- main->xbuffer[1] = main->xbuffer[0] + cinfo->num_components; |
++ main_ptr->xbuffer[1] = main_ptr->xbuffer[0] + cinfo->num_components; |
+ |
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; |
+ ci++, compptr++) { |
+- rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) / |
+- cinfo->min_DCT_scaled_size; /* height of a row group of component */ |
++ rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) / |
++ cinfo->_min_DCT_scaled_size; /* height of a row group of component */ |
+ /* Get space for pointer lists --- M+4 row groups in each list. |
+ * We alloc both pointer lists with one call to save a few cycles. |
+ */ |
+@@ -184,9 +154,9 @@ |
+ (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, |
+ 2 * (rgroup * (M + 4)) * SIZEOF(JSAMPROW)); |
+ xbuf += rgroup; /* want one row group at negative offsets */ |
+- main->xbuffer[0][ci] = xbuf; |
++ main_ptr->xbuffer[0][ci] = xbuf; |
+ xbuf += rgroup * (M + 4); |
+- main->xbuffer[1][ci] = xbuf; |
++ main_ptr->xbuffer[1][ci] = xbuf; |
+ } |
+ } |
+ |
+@@ -194,26 +164,26 @@ |
+ LOCAL(void) |
+ make_funny_pointers (j_decompress_ptr cinfo) |
+ /* Create the funny pointer lists discussed in the comments above. |
+- * The actual workspace is already allocated (in main->buffer), |
++ * The actual workspace is already allocated (in main_ptr->buffer), |
+ * and the space for the pointer lists is allocated too. |
+ * This routine just fills in the curiously ordered lists. |
+ * This will be repeated at the beginning of each pass. |
+ */ |
+ { |
+- my_main_ptr main = (my_main_ptr) cinfo->main; |
++ my_main_ptr main_ptr = (my_main_ptr) cinfo->main; |
+ int ci, i, rgroup; |
+- int M = cinfo->min_DCT_scaled_size; |
++ int M = cinfo->_min_DCT_scaled_size; |
+ jpeg_component_info *compptr; |
+ JSAMPARRAY buf, xbuf0, xbuf1; |
+ |
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; |
+ ci++, compptr++) { |
+- rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) / |
+- cinfo->min_DCT_scaled_size; /* height of a row group of component */ |
+- xbuf0 = main->xbuffer[0][ci]; |
+- xbuf1 = main->xbuffer[1][ci]; |
++ rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) / |
++ cinfo->_min_DCT_scaled_size; /* height of a row group of component */ |
++ xbuf0 = main_ptr->xbuffer[0][ci]; |
++ xbuf1 = main_ptr->xbuffer[1][ci]; |
+ /* First copy the workspace pointers as-is */ |
+- buf = main->buffer[ci]; |
++ buf = main_ptr->buffer[ci]; |
+ for (i = 0; i < rgroup * (M + 2); i++) { |
+ xbuf0[i] = xbuf1[i] = buf[i]; |
+ } |
+@@ -235,34 +205,6 @@ |
+ |
+ |
+ LOCAL(void) |
+-set_wraparound_pointers (j_decompress_ptr cinfo) |
+-/* Set up the "wraparound" pointers at top and bottom of the pointer lists. |
+- * This changes the pointer list state from top-of-image to the normal state. |
+- */ |
+-{ |
+- my_main_ptr main = (my_main_ptr) cinfo->main; |
+- int ci, i, rgroup; |
+- int M = cinfo->min_DCT_scaled_size; |
+- jpeg_component_info *compptr; |
+- JSAMPARRAY xbuf0, xbuf1; |
+- |
+- for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; |
+- ci++, compptr++) { |
+- rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) / |
+- cinfo->min_DCT_scaled_size; /* height of a row group of component */ |
+- xbuf0 = main->xbuffer[0][ci]; |
+- xbuf1 = main->xbuffer[1][ci]; |
+- for (i = 0; i < rgroup; i++) { |
+- xbuf0[i - rgroup] = xbuf0[rgroup*(M+1) + i]; |
+- xbuf1[i - rgroup] = xbuf1[rgroup*(M+1) + i]; |
+- xbuf0[rgroup*(M+2) + i] = xbuf0[i]; |
+- xbuf1[rgroup*(M+2) + i] = xbuf1[i]; |
+- } |
+- } |
+-} |
+- |
+- |
+-LOCAL(void) |
+ set_bottom_pointers (j_decompress_ptr cinfo) |
+ /* Change the pointer lists to duplicate the last sample row at the bottom |
+ * of the image. whichptr indicates which xbuffer holds the final iMCU row. |
+@@ -269,7 +211,7 @@ |
+ * Also sets rowgroups_avail to indicate number of nondummy row groups in row. |
+ */ |
+ { |
+- my_main_ptr main = (my_main_ptr) cinfo->main; |
++ my_main_ptr main_ptr = (my_main_ptr) cinfo->main; |
+ int ci, i, rgroup, iMCUheight, rows_left; |
+ jpeg_component_info *compptr; |
+ JSAMPARRAY xbuf; |
+@@ -277,8 +219,8 @@ |
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; |
+ ci++, compptr++) { |
+ /* Count sample rows in one iMCU row and in one row group */ |
+- iMCUheight = compptr->v_samp_factor * compptr->DCT_scaled_size; |
+- rgroup = iMCUheight / cinfo->min_DCT_scaled_size; |
++ iMCUheight = compptr->v_samp_factor * compptr->_DCT_scaled_size; |
++ rgroup = iMCUheight / cinfo->_min_DCT_scaled_size; |
+ /* Count nondummy sample rows remaining for this component */ |
+ rows_left = (int) (compptr->downsampled_height % (JDIMENSION) iMCUheight); |
+ if (rows_left == 0) rows_left = iMCUheight; |
+@@ -286,12 +228,12 @@ |
+ * so we need only do it once. |
+ */ |
+ if (ci == 0) { |
+- main->rowgroups_avail = (JDIMENSION) ((rows_left-1) / rgroup + 1); |
++ main_ptr->rowgroups_avail = (JDIMENSION) ((rows_left-1) / rgroup + 1); |
+ } |
+ /* Duplicate the last real sample row rgroup*2 times; this pads out the |
+ * last partial rowgroup and ensures at least one full rowgroup of context. |
+ */ |
+- xbuf = main->xbuffer[main->whichptr][ci]; |
++ xbuf = main_ptr->xbuffer[main_ptr->whichptr][ci]; |
+ for (i = 0; i < rgroup * 2; i++) { |
+ xbuf[rows_left + i] = xbuf[rows_left-1]; |
+ } |
+@@ -306,27 +248,27 @@ |
+ METHODDEF(void) |
+ start_pass_main (j_decompress_ptr cinfo, J_BUF_MODE pass_mode) |
+ { |
+- my_main_ptr main = (my_main_ptr) cinfo->main; |
++ my_main_ptr main_ptr = (my_main_ptr) cinfo->main; |
+ |
+ switch (pass_mode) { |
+ case JBUF_PASS_THRU: |
+ if (cinfo->upsample->need_context_rows) { |
+- main->pub.process_data = process_data_context_main; |
++ main_ptr->pub.process_data = process_data_context_main; |
+ make_funny_pointers(cinfo); /* Create the xbuffer[] lists */ |
+- main->whichptr = 0; /* Read first iMCU row into xbuffer[0] */ |
+- main->context_state = CTX_PREPARE_FOR_IMCU; |
+- main->iMCU_row_ctr = 0; |
++ main_ptr->whichptr = 0; /* Read first iMCU row into xbuffer[0] */ |
++ main_ptr->context_state = CTX_PREPARE_FOR_IMCU; |
++ main_ptr->iMCU_row_ctr = 0; |
+ } else { |
+ /* Simple case with no context needed */ |
+- main->pub.process_data = process_data_simple_main; |
++ main_ptr->pub.process_data = process_data_simple_main; |
+ } |
+- main->buffer_full = FALSE; /* Mark buffer empty */ |
+- main->rowgroup_ctr = 0; |
++ main_ptr->buffer_full = FALSE; /* Mark buffer empty */ |
++ main_ptr->rowgroup_ctr = 0; |
+ break; |
+ #ifdef QUANT_2PASS_SUPPORTED |
+ case JBUF_CRANK_DEST: |
+ /* For last pass of 2-pass quantization, just crank the postprocessor */ |
+- main->pub.process_data = process_data_crank_post; |
++ main_ptr->pub.process_data = process_data_crank_post; |
+ break; |
+ #endif |
+ default: |
+@@ -346,18 +288,18 @@ |
+ JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, |
+ JDIMENSION out_rows_avail) |
+ { |
+- my_main_ptr main = (my_main_ptr) cinfo->main; |
++ my_main_ptr main_ptr = (my_main_ptr) cinfo->main; |
+ JDIMENSION rowgroups_avail; |
+ |
+ /* Read input data if we haven't filled the main buffer yet */ |
+- if (! main->buffer_full) { |
+- if (! (*cinfo->coef->decompress_data) (cinfo, main->buffer)) |
++ if (! main_ptr->buffer_full) { |
++ if (! (*cinfo->coef->decompress_data) (cinfo, main_ptr->buffer)) |
+ return; /* suspension forced, can do nothing more */ |
+- main->buffer_full = TRUE; /* OK, we have an iMCU row to work with */ |
++ main_ptr->buffer_full = TRUE; /* OK, we have an iMCU row to work with */ |
+ } |
+ |
+ /* There are always min_DCT_scaled_size row groups in an iMCU row. */ |
+- rowgroups_avail = (JDIMENSION) cinfo->min_DCT_scaled_size; |
++ rowgroups_avail = (JDIMENSION) cinfo->_min_DCT_scaled_size; |
+ /* Note: at the bottom of the image, we may pass extra garbage row groups |
+ * to the postprocessor. The postprocessor has to check for bottom |
+ * of image anyway (at row resolution), so no point in us doing it too. |
+@@ -364,14 +306,14 @@ |
+ */ |
+ |
+ /* Feed the postprocessor */ |
+- (*cinfo->post->post_process_data) (cinfo, main->buffer, |
+- &main->rowgroup_ctr, rowgroups_avail, |
++ (*cinfo->post->post_process_data) (cinfo, main_ptr->buffer, |
++ &main_ptr->rowgroup_ctr, rowgroups_avail, |
+ output_buf, out_row_ctr, out_rows_avail); |
+ |
+ /* Has postprocessor consumed all the data yet? If so, mark buffer empty */ |
+- if (main->rowgroup_ctr >= rowgroups_avail) { |
+- main->buffer_full = FALSE; |
+- main->rowgroup_ctr = 0; |
++ if (main_ptr->rowgroup_ctr >= rowgroups_avail) { |
++ main_ptr->buffer_full = FALSE; |
++ main_ptr->rowgroup_ctr = 0; |
+ } |
+ } |
+ |
+@@ -386,15 +328,15 @@ |
+ JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, |
+ JDIMENSION out_rows_avail) |
+ { |
+- my_main_ptr main = (my_main_ptr) cinfo->main; |
++ my_main_ptr main_ptr = (my_main_ptr) cinfo->main; |
+ |
+ /* Read input data if we haven't filled the main buffer yet */ |
+- if (! main->buffer_full) { |
++ if (! main_ptr->buffer_full) { |
+ if (! (*cinfo->coef->decompress_data) (cinfo, |
+- main->xbuffer[main->whichptr])) |
++ main_ptr->xbuffer[main_ptr->whichptr])) |
+ return; /* suspension forced, can do nothing more */ |
+- main->buffer_full = TRUE; /* OK, we have an iMCU row to work with */ |
+- main->iMCU_row_ctr++; /* count rows received */ |
++ main_ptr->buffer_full = TRUE; /* OK, we have an iMCU row to work with */ |
++ main_ptr->iMCU_row_ctr++; /* count rows received */ |
+ } |
+ |
+ /* Postprocessor typically will not swallow all the input data it is handed |
+@@ -402,47 +344,47 @@ |
+ * to exit and restart. This switch lets us keep track of how far we got. |
+ * Note that each case falls through to the next on successful completion. |
+ */ |
+- switch (main->context_state) { |
++ switch (main_ptr->context_state) { |
+ case CTX_POSTPONED_ROW: |
+ /* Call postprocessor using previously set pointers for postponed row */ |
+- (*cinfo->post->post_process_data) (cinfo, main->xbuffer[main->whichptr], |
+- &main->rowgroup_ctr, main->rowgroups_avail, |
++ (*cinfo->post->post_process_data) (cinfo, main_ptr->xbuffer[main_ptr->whichptr], |
++ &main_ptr->rowgroup_ctr, main_ptr->rowgroups_avail, |
+ output_buf, out_row_ctr, out_rows_avail); |
+- if (main->rowgroup_ctr < main->rowgroups_avail) |
++ if (main_ptr->rowgroup_ctr < main_ptr->rowgroups_avail) |
+ return; /* Need to suspend */ |
+- main->context_state = CTX_PREPARE_FOR_IMCU; |
++ main_ptr->context_state = CTX_PREPARE_FOR_IMCU; |
+ if (*out_row_ctr >= out_rows_avail) |
+ return; /* Postprocessor exactly filled output buf */ |
+ /*FALLTHROUGH*/ |
+ case CTX_PREPARE_FOR_IMCU: |
+ /* Prepare to process first M-1 row groups of this iMCU row */ |
+- main->rowgroup_ctr = 0; |
+- main->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_scaled_size - 1); |
++ main_ptr->rowgroup_ctr = 0; |
++ main_ptr->rowgroups_avail = (JDIMENSION) (cinfo->_min_DCT_scaled_size - 1); |
+ /* Check for bottom of image: if so, tweak pointers to "duplicate" |
+ * the last sample row, and adjust rowgroups_avail to ignore padding rows. |
+ */ |
+- if (main->iMCU_row_ctr == cinfo->total_iMCU_rows) |
++ if (main_ptr->iMCU_row_ctr == cinfo->total_iMCU_rows) |
+ set_bottom_pointers(cinfo); |
+- main->context_state = CTX_PROCESS_IMCU; |
++ main_ptr->context_state = CTX_PROCESS_IMCU; |
+ /*FALLTHROUGH*/ |
+ case CTX_PROCESS_IMCU: |
+ /* Call postprocessor using previously set pointers */ |
+- (*cinfo->post->post_process_data) (cinfo, main->xbuffer[main->whichptr], |
+- &main->rowgroup_ctr, main->rowgroups_avail, |
++ (*cinfo->post->post_process_data) (cinfo, main_ptr->xbuffer[main_ptr->whichptr], |
++ &main_ptr->rowgroup_ctr, main_ptr->rowgroups_avail, |
+ output_buf, out_row_ctr, out_rows_avail); |
+- if (main->rowgroup_ctr < main->rowgroups_avail) |
++ if (main_ptr->rowgroup_ctr < main_ptr->rowgroups_avail) |
+ return; /* Need to suspend */ |
+ /* After the first iMCU, change wraparound pointers to normal state */ |
+- if (main->iMCU_row_ctr == 1) |
++ if (main_ptr->iMCU_row_ctr == 1) |
+ set_wraparound_pointers(cinfo); |
+ /* Prepare to load new iMCU row using other xbuffer list */ |
+- main->whichptr ^= 1; /* 0=>1 or 1=>0 */ |
+- main->buffer_full = FALSE; |
++ main_ptr->whichptr ^= 1; /* 0=>1 or 1=>0 */ |
++ main_ptr->buffer_full = FALSE; |
+ /* Still need to process last row group of this iMCU row, */ |
+ /* which is saved at index M+1 of the other xbuffer */ |
+- main->rowgroup_ctr = (JDIMENSION) (cinfo->min_DCT_scaled_size + 1); |
+- main->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_scaled_size + 2); |
+- main->context_state = CTX_POSTPONED_ROW; |
++ main_ptr->rowgroup_ctr = (JDIMENSION) (cinfo->_min_DCT_scaled_size + 1); |
++ main_ptr->rowgroups_avail = (JDIMENSION) (cinfo->_min_DCT_scaled_size + 2); |
++ main_ptr->context_state = CTX_POSTPONED_ROW; |
+ } |
+ } |
+ |
+@@ -475,15 +417,15 @@ |
+ GLOBAL(void) |
+ jinit_d_main_controller (j_decompress_ptr cinfo, boolean need_full_buffer) |
+ { |
+- my_main_ptr main; |
++ my_main_ptr main_ptr; |
+ int ci, rgroup, ngroups; |
+ jpeg_component_info *compptr; |
+ |
+- main = (my_main_ptr) |
++ main_ptr = (my_main_ptr) |
+ (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, |
+ SIZEOF(my_main_controller)); |
+- cinfo->main = (struct jpeg_d_main_controller *) main; |
+- main->pub.start_pass = start_pass_main; |
++ cinfo->main = (struct jpeg_d_main_controller *) main_ptr; |
++ main_ptr->pub.start_pass = start_pass_main; |
+ |
+ if (need_full_buffer) /* shouldn't happen */ |
+ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); |
+@@ -492,21 +434,21 @@ |
+ * ngroups is the number of row groups we need. |
+ */ |
+ if (cinfo->upsample->need_context_rows) { |
+- if (cinfo->min_DCT_scaled_size < 2) /* unsupported, see comments above */ |
++ if (cinfo->_min_DCT_scaled_size < 2) /* unsupported, see comments above */ |
+ ERREXIT(cinfo, JERR_NOTIMPL); |
+ alloc_funny_pointers(cinfo); /* Alloc space for xbuffer[] lists */ |
+- ngroups = cinfo->min_DCT_scaled_size + 2; |
++ ngroups = cinfo->_min_DCT_scaled_size + 2; |
+ } else { |
+- ngroups = cinfo->min_DCT_scaled_size; |
++ ngroups = cinfo->_min_DCT_scaled_size; |
+ } |
+ |
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; |
+ ci++, compptr++) { |
+- rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) / |
+- cinfo->min_DCT_scaled_size; /* height of a row group of component */ |
+- main->buffer[ci] = (*cinfo->mem->alloc_sarray) |
++ rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) / |
++ cinfo->_min_DCT_scaled_size; /* height of a row group of component */ |
++ main_ptr->buffer[ci] = (*cinfo->mem->alloc_sarray) |
+ ((j_common_ptr) cinfo, JPOOL_IMAGE, |
+- compptr->width_in_blocks * compptr->DCT_scaled_size, |
++ compptr->width_in_blocks * compptr->_DCT_scaled_size, |
+ (JDIMENSION) (rgroup * ngroups)); |
+ } |
+ } |
+Index: jdmarker.c |
+=================================================================== |
+--- jdmarker.c (revision 829) |
++++ jdmarker.c (working copy) |
+@@ -1,8 +1,10 @@ |
+ /* |
+ * jdmarker.c |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1991-1998, Thomas G. Lane. |
+- * This file is part of the Independent JPEG Group's software. |
++ * libjpeg-turbo Modifications: |
++ * Copyright (C) 2012, D. R. Commander. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains routines to decode JPEG datastream markers. |
+@@ -302,7 +304,7 @@ |
+ /* Process a SOS marker */ |
+ { |
+ INT32 length; |
+- int i, ci, n, c, cc; |
++ int i, ci, n, c, cc, pi; |
+ jpeg_component_info * compptr; |
+ INPUT_VARS(cinfo); |
+ |
+@@ -322,13 +324,17 @@ |
+ |
+ /* Collect the component-spec parameters */ |
+ |
++ for (i = 0; i < MAX_COMPS_IN_SCAN; i++) |
++ cinfo->cur_comp_info[i] = NULL; |
++ |
+ for (i = 0; i < n; i++) { |
+ INPUT_BYTE(cinfo, cc, return FALSE); |
+ INPUT_BYTE(cinfo, c, return FALSE); |
+ |
+- for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; |
++ for (ci = 0, compptr = cinfo->comp_info; |
++ ci < cinfo->num_components && ci < MAX_COMPS_IN_SCAN; |
+ ci++, compptr++) { |
+- if (cc == compptr->component_id) |
++ if (cc == compptr->component_id && !cinfo->cur_comp_info[ci]) |
+ goto id_found; |
+ } |
+ |
+@@ -342,6 +348,13 @@ |
+ |
+ TRACEMS3(cinfo, 1, JTRC_SOS_COMPONENT, cc, |
+ compptr->dc_tbl_no, compptr->ac_tbl_no); |
++ |
++ /* This CSi (cc) should differ from the previous CSi */ |
++ for (pi = 0; pi < i; pi++) { |
++ if (cinfo->cur_comp_info[pi] == compptr) { |
++ ERREXIT1(cinfo, JERR_BAD_COMPONENT_ID, cc); |
++ } |
++ } |
+ } |
+ |
+ /* Collect the additional scan parameters Ss, Se, Ah/Al. */ |
+@@ -459,18 +472,21 @@ |
+ for (i = 0; i < count; i++) |
+ INPUT_BYTE(cinfo, huffval[i], return FALSE); |
+ |
++ MEMZERO(&huffval[count], (256 - count) * SIZEOF(UINT8)); |
++ |
+ length -= count; |
+ |
+ if (index & 0x10) { /* AC table definition */ |
+ index -= 0x10; |
++ if (index < 0 || index >= NUM_HUFF_TBLS) |
++ ERREXIT1(cinfo, JERR_DHT_INDEX, index); |
+ htblptr = &cinfo->ac_huff_tbl_ptrs[index]; |
+ } else { /* DC table definition */ |
++ if (index < 0 || index >= NUM_HUFF_TBLS) |
++ ERREXIT1(cinfo, JERR_DHT_INDEX, index); |
+ htblptr = &cinfo->dc_huff_tbl_ptrs[index]; |
+ } |
+ |
+- if (index < 0 || index >= NUM_HUFF_TBLS) |
+- ERREXIT1(cinfo, JERR_DHT_INDEX, index); |
+- |
+ if (*htblptr == NULL) |
+ *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); |
+ |
+@@ -906,7 +922,7 @@ |
+ } |
+ |
+ if (cinfo->marker->discarded_bytes != 0) { |
+- WARNMS2(cinfo, JWRN_EXTRANEOUS_DATA, cinfo->marker->discarded_bytes, c); |
++ TRACEMS2(cinfo, 1, JWRN_EXTRANEOUS_DATA, cinfo->marker->discarded_bytes, c); |
+ cinfo->marker->discarded_bytes = 0; |
+ } |
+ |
+@@ -940,7 +956,144 @@ |
+ return TRUE; |
+ } |
+ |
++#ifdef MOTION_JPEG_SUPPORTED |
+ |
++/* The default Huffman tables used by motion JPEG frames. When a motion JPEG |
++ * frame does not have DHT tables, we should use the huffman tables suggested by |
++ * the JPEG standard. Each of these tables represents a member of the JHUFF_TBLS |
++ * struct so we can just copy it to the according JHUFF_TBLS member. |
++ */ |
++/* DC table 0 */ |
++LOCAL(const unsigned char) mjpg_dc0_bits[] = { |
++ 0x00, 0x01, 0x05, 0x01, 0x01, 0x01, 0x01, 0x01, |
++ 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 |
++}; |
++ |
++LOCAL(const unsigned char) mjpg_dc0_huffval[] = { |
++ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
++ 0x08, 0x09, 0x0A, 0x0B |
++}; |
++ |
++/* DC table 1 */ |
++LOCAL(const unsigned char) mjpg_dc1_bits[] = { |
++ 0x00, 0x03, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, |
++ 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 |
++}; |
++ |
++LOCAL(const unsigned char) mjpg_dc1_huffval[] = { |
++ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
++ 0x08, 0x09, 0x0A, 0x0B |
++}; |
++ |
++/* AC table 0 */ |
++LOCAL(const unsigned char) mjpg_ac0_bits[] = { |
++ 0x00, 0x02, 0x01, 0x03, 0x03, 0x02, 0x04, 0x03, |
++ 0x05, 0x05, 0x04, 0x04, 0x00, 0x00, 0x01, 0x7D |
++}; |
++ |
++LOCAL(const unsigned char) mjpg_ac0_huffval[] = { |
++ 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, |
++ 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, |
++ 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xA1, 0x08, |
++ 0x23, 0x42, 0xB1, 0xC1, 0x15, 0x52, 0xD1, 0xF0, |
++ 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0A, 0x16, |
++ 0x17, 0x18, 0x19, 0x1A, 0x25, 0x26, 0x27, 0x28, |
++ 0x29, 0x2A, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, |
++ 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, |
++ 0x4A, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, |
++ 0x5A, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, |
++ 0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, |
++ 0x7A, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, |
++ 0x8A, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, |
++ 0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, |
++ 0xA8, 0xA9, 0xAA, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, |
++ 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3, 0xC4, 0xC5, |
++ 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2, 0xD3, 0xD4, |
++ 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xE1, 0xE2, |
++ 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, |
++ 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, |
++ 0xF9, 0xFA |
++}; |
++ |
++/* AC table 1 */ |
++LOCAL(const unsigned char) mjpg_ac1_bits[] = { |
++ 0x00, 0x02, 0x01, 0x02, 0x04, 0x04, 0x03, 0x04, |
++ 0x07, 0x05, 0x04, 0x04, 0x00, 0x01, 0x02, 0x77 |
++}; |
++ |
++LOCAL(const unsigned char) mjpg_ac1_huffval[] = { |
++ 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, |
++ 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71, |
++ 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, |
++ 0xA1, 0xB1, 0xC1, 0x09, 0x23, 0x33, 0x52, 0xF0, |
++ 0x15, 0x62, 0x72, 0xD1, 0x0A, 0x16, 0x24, 0x34, |
++ 0xE1, 0x25, 0xF1, 0x17, 0x18, 0x19, 0x1A, 0x26, |
++ 0x27, 0x28, 0x29, 0x2A, 0x35, 0x36, 0x37, 0x38, |
++ 0x39, 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, |
++ 0x49, 0x4A, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, |
++ 0x59, 0x5A, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, |
++ 0x69, 0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, |
++ 0x79, 0x7A, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
++ 0x88, 0x89, 0x8A, 0x92, 0x93, 0x94, 0x95, 0x96, |
++ 0x97, 0x98, 0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5, |
++ 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xB2, 0xB3, 0xB4, |
++ 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3, |
++ 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2, |
++ 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, |
++ 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, |
++ 0xEA, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, |
++ 0xF9, 0xFA |
+}; |
+ |
-+LOCAL(const unsigned char) mjpg_dc0_huffval[] = { |
-+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
-+ 0x08, 0x09, 0x0A, 0x0B |
-+}; |
++/* Loads the default Huffman tables used by motion JPEG frames. This function |
++ * just copies the huffman tables suggested in the JPEG standard when we have |
++ * not load them. |
++ */ |
++LOCAL(void) |
++mjpg_load_huff_tables (j_decompress_ptr cinfo) |
++{ |
++ JHUFF_TBL *htblptr; |
++ |
++ if (! cinfo->dc_huff_tbl_ptrs[0]) { |
++ htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); |
++ MEMZERO(htblptr, SIZEOF(JHUFF_TBL)); |
++ MEMCOPY(&htblptr->bits[1], mjpg_dc0_bits, SIZEOF(mjpg_dc0_bits)); |
++ MEMCOPY(&htblptr->huffval[0], mjpg_dc0_huffval, SIZEOF(mjpg_dc0_huffval)); |
++ cinfo->dc_huff_tbl_ptrs[0] = htblptr; |
++ } |
++ |
++ if (! cinfo->dc_huff_tbl_ptrs[1]) { |
++ htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); |
++ MEMZERO(htblptr, SIZEOF(JHUFF_TBL)); |
++ MEMCOPY(&htblptr->bits[1], mjpg_dc1_bits, SIZEOF(mjpg_dc1_bits)); |
++ MEMCOPY(&htblptr->huffval[0], mjpg_dc1_huffval, SIZEOF(mjpg_dc1_huffval)); |
++ cinfo->dc_huff_tbl_ptrs[1] = htblptr; |
++ } |
++ |
++ if (! cinfo->ac_huff_tbl_ptrs[0]) { |
++ htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); |
++ MEMZERO(htblptr, SIZEOF(JHUFF_TBL)); |
++ MEMCOPY(&htblptr->bits[1], mjpg_ac0_bits, SIZEOF(mjpg_ac0_bits)); |
++ MEMCOPY(&htblptr->huffval[0], mjpg_ac0_huffval, SIZEOF(mjpg_ac0_huffval)); |
++ cinfo->ac_huff_tbl_ptrs[0] = htblptr; |
++ } |
++ |
++ if (! cinfo->ac_huff_tbl_ptrs[1]) { |
++ htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); |
++ MEMZERO(htblptr, SIZEOF(JHUFF_TBL)); |
++ MEMCOPY(&htblptr->bits[1], mjpg_ac1_bits, SIZEOF(mjpg_ac1_bits)); |
++ MEMCOPY(&htblptr->huffval[0], mjpg_ac1_huffval, SIZEOF(mjpg_ac1_huffval)); |
++ cinfo->ac_huff_tbl_ptrs[1] = htblptr; |
++ } |
++} |
++ |
++#else |
++ |
++#define mjpg_load_huff_tables(cinfo) |
++ |
++#endif /* MOTION_JPEG_SUPPORTED */ |
++ |
++ |
+ /* |
+ * Read markers until SOS or EOI. |
+ * |
+@@ -1009,6 +1162,7 @@ |
+ break; |
+ |
+ case M_SOS: |
++ mjpg_load_huff_tables(cinfo); |
+ if (! get_sos(cinfo)) |
+ return JPEG_SUSPENDED; |
+ cinfo->unread_marker = 0; /* processed the marker */ |
+Index: jdmaster.c |
+=================================================================== |
+--- jdmaster.c (revision 829) |
++++ jdmaster.c (working copy) |
+@@ -1,9 +1,11 @@ |
+ /* |
+ * jdmaster.c |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1991-1997, Thomas G. Lane. |
+- * Copyright (C) 2009, D. R. Commander. |
+- * This file is part of the Independent JPEG Group's software. |
++ * Modified 2002-2009 by Guido Vollbeding. |
++ * libjpeg-turbo Modifications: |
++ * Copyright (C) 2009-2011, D. R. Commander. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains master control logic for the JPEG decompressor. |
+@@ -15,6 +17,7 @@ |
+ #define JPEG_INTERNALS |
+ #include "jinclude.h" |
+ #include "jpeglib.h" |
++#include "jpegcomp.h" |
+ |
+ |
+ /* Private state */ |
+@@ -56,7 +59,11 @@ |
+ cinfo->out_color_space != JCS_EXT_BGR && |
+ cinfo->out_color_space != JCS_EXT_BGRX && |
+ cinfo->out_color_space != JCS_EXT_XBGR && |
+- cinfo->out_color_space != JCS_EXT_XRGB) || |
++ cinfo->out_color_space != JCS_EXT_XRGB && |
++ cinfo->out_color_space != JCS_EXT_RGBA && |
++ cinfo->out_color_space != JCS_EXT_BGRA && |
++ cinfo->out_color_space != JCS_EXT_ABGR && |
++ cinfo->out_color_space != JCS_EXT_ARGB) || |
+ cinfo->out_color_components != rgb_pixelsize[cinfo->out_color_space]) |
+ return FALSE; |
+ /* and it only handles 2h1v or 2h2v sampling ratios */ |
+@@ -68,9 +75,9 @@ |
+ cinfo->comp_info[2].v_samp_factor != 1) |
+ return FALSE; |
+ /* furthermore, it doesn't work if we've scaled the IDCTs differently */ |
+- if (cinfo->comp_info[0].DCT_scaled_size != cinfo->min_DCT_scaled_size || |
+- cinfo->comp_info[1].DCT_scaled_size != cinfo->min_DCT_scaled_size || |
+- cinfo->comp_info[2].DCT_scaled_size != cinfo->min_DCT_scaled_size) |
++ if (cinfo->comp_info[0]._DCT_scaled_size != cinfo->_min_DCT_scaled_size || |
++ cinfo->comp_info[1]._DCT_scaled_size != cinfo->_min_DCT_scaled_size || |
++ cinfo->comp_info[2]._DCT_scaled_size != cinfo->_min_DCT_scaled_size) |
+ return FALSE; |
+ /* ??? also need to test for upsample-time rescaling, when & if supported */ |
+ return TRUE; /* by golly, it'll work... */ |
+@@ -84,6 +91,177 @@ |
+ * Compute output image dimensions and related values. |
+ * NOTE: this is exported for possible use by application. |
+ * Hence it mustn't do anything that can't be done twice. |
++ */ |
++ |
++#if JPEG_LIB_VERSION >= 80 |
++GLOBAL(void) |
++#else |
++LOCAL(void) |
++#endif |
++jpeg_core_output_dimensions (j_decompress_ptr cinfo) |
++/* Do computations that are needed before master selection phase. |
++ * This function is used for transcoding and full decompression. |
++ */ |
++{ |
++#ifdef IDCT_SCALING_SUPPORTED |
++ int ci; |
++ jpeg_component_info *compptr; |
++ |
++ /* Compute actual output image dimensions and DCT scaling choices. */ |
++ if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom) { |
++ /* Provide 1/block_size scaling */ |
++ cinfo->output_width = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_width, (long) DCTSIZE); |
++ cinfo->output_height = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_height, (long) DCTSIZE); |
++ cinfo->_min_DCT_h_scaled_size = 1; |
++ cinfo->_min_DCT_v_scaled_size = 1; |
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 2) { |
++ /* Provide 2/block_size scaling */ |
++ cinfo->output_width = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_width * 2L, (long) DCTSIZE); |
++ cinfo->output_height = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_height * 2L, (long) DCTSIZE); |
++ cinfo->_min_DCT_h_scaled_size = 2; |
++ cinfo->_min_DCT_v_scaled_size = 2; |
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 3) { |
++ /* Provide 3/block_size scaling */ |
++ cinfo->output_width = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_width * 3L, (long) DCTSIZE); |
++ cinfo->output_height = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_height * 3L, (long) DCTSIZE); |
++ cinfo->_min_DCT_h_scaled_size = 3; |
++ cinfo->_min_DCT_v_scaled_size = 3; |
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 4) { |
++ /* Provide 4/block_size scaling */ |
++ cinfo->output_width = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_width * 4L, (long) DCTSIZE); |
++ cinfo->output_height = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_height * 4L, (long) DCTSIZE); |
++ cinfo->_min_DCT_h_scaled_size = 4; |
++ cinfo->_min_DCT_v_scaled_size = 4; |
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 5) { |
++ /* Provide 5/block_size scaling */ |
++ cinfo->output_width = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_width * 5L, (long) DCTSIZE); |
++ cinfo->output_height = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_height * 5L, (long) DCTSIZE); |
++ cinfo->_min_DCT_h_scaled_size = 5; |
++ cinfo->_min_DCT_v_scaled_size = 5; |
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 6) { |
++ /* Provide 6/block_size scaling */ |
++ cinfo->output_width = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_width * 6L, (long) DCTSIZE); |
++ cinfo->output_height = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_height * 6L, (long) DCTSIZE); |
++ cinfo->_min_DCT_h_scaled_size = 6; |
++ cinfo->_min_DCT_v_scaled_size = 6; |
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 7) { |
++ /* Provide 7/block_size scaling */ |
++ cinfo->output_width = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_width * 7L, (long) DCTSIZE); |
++ cinfo->output_height = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_height * 7L, (long) DCTSIZE); |
++ cinfo->_min_DCT_h_scaled_size = 7; |
++ cinfo->_min_DCT_v_scaled_size = 7; |
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 8) { |
++ /* Provide 8/block_size scaling */ |
++ cinfo->output_width = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_width * 8L, (long) DCTSIZE); |
++ cinfo->output_height = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_height * 8L, (long) DCTSIZE); |
++ cinfo->_min_DCT_h_scaled_size = 8; |
++ cinfo->_min_DCT_v_scaled_size = 8; |
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 9) { |
++ /* Provide 9/block_size scaling */ |
++ cinfo->output_width = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_width * 9L, (long) DCTSIZE); |
++ cinfo->output_height = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_height * 9L, (long) DCTSIZE); |
++ cinfo->_min_DCT_h_scaled_size = 9; |
++ cinfo->_min_DCT_v_scaled_size = 9; |
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 10) { |
++ /* Provide 10/block_size scaling */ |
++ cinfo->output_width = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_width * 10L, (long) DCTSIZE); |
++ cinfo->output_height = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_height * 10L, (long) DCTSIZE); |
++ cinfo->_min_DCT_h_scaled_size = 10; |
++ cinfo->_min_DCT_v_scaled_size = 10; |
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 11) { |
++ /* Provide 11/block_size scaling */ |
++ cinfo->output_width = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_width * 11L, (long) DCTSIZE); |
++ cinfo->output_height = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_height * 11L, (long) DCTSIZE); |
++ cinfo->_min_DCT_h_scaled_size = 11; |
++ cinfo->_min_DCT_v_scaled_size = 11; |
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 12) { |
++ /* Provide 12/block_size scaling */ |
++ cinfo->output_width = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_width * 12L, (long) DCTSIZE); |
++ cinfo->output_height = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_height * 12L, (long) DCTSIZE); |
++ cinfo->_min_DCT_h_scaled_size = 12; |
++ cinfo->_min_DCT_v_scaled_size = 12; |
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 13) { |
++ /* Provide 13/block_size scaling */ |
++ cinfo->output_width = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_width * 13L, (long) DCTSIZE); |
++ cinfo->output_height = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_height * 13L, (long) DCTSIZE); |
++ cinfo->_min_DCT_h_scaled_size = 13; |
++ cinfo->_min_DCT_v_scaled_size = 13; |
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 14) { |
++ /* Provide 14/block_size scaling */ |
++ cinfo->output_width = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_width * 14L, (long) DCTSIZE); |
++ cinfo->output_height = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_height * 14L, (long) DCTSIZE); |
++ cinfo->_min_DCT_h_scaled_size = 14; |
++ cinfo->_min_DCT_v_scaled_size = 14; |
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 15) { |
++ /* Provide 15/block_size scaling */ |
++ cinfo->output_width = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_width * 15L, (long) DCTSIZE); |
++ cinfo->output_height = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_height * 15L, (long) DCTSIZE); |
++ cinfo->_min_DCT_h_scaled_size = 15; |
++ cinfo->_min_DCT_v_scaled_size = 15; |
++ } else { |
++ /* Provide 16/block_size scaling */ |
++ cinfo->output_width = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_width * 16L, (long) DCTSIZE); |
++ cinfo->output_height = (JDIMENSION) |
++ jdiv_round_up((long) cinfo->image_height * 16L, (long) DCTSIZE); |
++ cinfo->_min_DCT_h_scaled_size = 16; |
++ cinfo->_min_DCT_v_scaled_size = 16; |
++ } |
++ |
++ /* Recompute dimensions of components */ |
++ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; |
++ ci++, compptr++) { |
++ compptr->_DCT_h_scaled_size = cinfo->_min_DCT_h_scaled_size; |
++ compptr->_DCT_v_scaled_size = cinfo->_min_DCT_v_scaled_size; |
++ } |
++ |
++#else /* !IDCT_SCALING_SUPPORTED */ |
++ |
++ /* Hardwire it to "no scaling" */ |
++ cinfo->output_width = cinfo->image_width; |
++ cinfo->output_height = cinfo->image_height; |
++ /* jdinput.c has already initialized DCT_scaled_size, |
++ * and has computed unscaled downsampled_width and downsampled_height. |
++ */ |
++ |
++#endif /* IDCT_SCALING_SUPPORTED */ |
++} |
++ |
++ |
++/* |
++ * Compute output image dimensions and related values. |
++ * NOTE: this is exported for possible use by application. |
++ * Hence it mustn't do anything that can't be done twice. |
+ * Also note that it may be called before the master module is initialized! |
+ */ |
+ |
+@@ -100,52 +278,31 @@ |
+ if (cinfo->global_state != DSTATE_READY) |
+ ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state); |
+ |
++ /* Compute core output image dimensions and DCT scaling choices. */ |
++ jpeg_core_output_dimensions(cinfo); |
++ |
+ #ifdef IDCT_SCALING_SUPPORTED |
+ |
+- /* Compute actual output image dimensions and DCT scaling choices. */ |
+- if (cinfo->scale_num * 8 <= cinfo->scale_denom) { |
+- /* Provide 1/8 scaling */ |
+- cinfo->output_width = (JDIMENSION) |
+- jdiv_round_up((long) cinfo->image_width, 8L); |
+- cinfo->output_height = (JDIMENSION) |
+- jdiv_round_up((long) cinfo->image_height, 8L); |
+- cinfo->min_DCT_scaled_size = 1; |
+- } else if (cinfo->scale_num * 4 <= cinfo->scale_denom) { |
+- /* Provide 1/4 scaling */ |
+- cinfo->output_width = (JDIMENSION) |
+- jdiv_round_up((long) cinfo->image_width, 4L); |
+- cinfo->output_height = (JDIMENSION) |
+- jdiv_round_up((long) cinfo->image_height, 4L); |
+- cinfo->min_DCT_scaled_size = 2; |
+- } else if (cinfo->scale_num * 2 <= cinfo->scale_denom) { |
+- /* Provide 1/2 scaling */ |
+- cinfo->output_width = (JDIMENSION) |
+- jdiv_round_up((long) cinfo->image_width, 2L); |
+- cinfo->output_height = (JDIMENSION) |
+- jdiv_round_up((long) cinfo->image_height, 2L); |
+- cinfo->min_DCT_scaled_size = 4; |
+- } else { |
+- /* Provide 1/1 scaling */ |
+- cinfo->output_width = cinfo->image_width; |
+- cinfo->output_height = cinfo->image_height; |
+- cinfo->min_DCT_scaled_size = DCTSIZE; |
+- } |
+ /* In selecting the actual DCT scaling for each component, we try to |
+ * scale up the chroma components via IDCT scaling rather than upsampling. |
+ * This saves time if the upsampler gets to use 1:1 scaling. |
+- * Note this code assumes that the supported DCT scalings are powers of 2. |
++ * Note this code adapts subsampling ratios which are powers of 2. |
+ */ |
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; |
+ ci++, compptr++) { |
+- int ssize = cinfo->min_DCT_scaled_size; |
++ int ssize = cinfo->_min_DCT_scaled_size; |
+ while (ssize < DCTSIZE && |
+- (compptr->h_samp_factor * ssize * 2 <= |
+- cinfo->max_h_samp_factor * cinfo->min_DCT_scaled_size) && |
+- (compptr->v_samp_factor * ssize * 2 <= |
+- cinfo->max_v_samp_factor * cinfo->min_DCT_scaled_size)) { |
++ ((cinfo->max_h_samp_factor * cinfo->_min_DCT_scaled_size) % |
++ (compptr->h_samp_factor * ssize * 2) == 0) && |
++ ((cinfo->max_v_samp_factor * cinfo->_min_DCT_scaled_size) % |
++ (compptr->v_samp_factor * ssize * 2) == 0)) { |
+ ssize = ssize * 2; |
+ } |
++#if JPEG_LIB_VERSION >= 70 |
++ compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = ssize; |
++#else |
+ compptr->DCT_scaled_size = ssize; |
++#endif |
+ } |
+ |
+ /* Recompute downsampled dimensions of components; |
+@@ -156,11 +313,11 @@ |
+ /* Size in samples, after IDCT scaling */ |
+ compptr->downsampled_width = (JDIMENSION) |
+ jdiv_round_up((long) cinfo->image_width * |
+- (long) (compptr->h_samp_factor * compptr->DCT_scaled_size), |
++ (long) (compptr->h_samp_factor * compptr->_DCT_scaled_size), |
+ (long) (cinfo->max_h_samp_factor * DCTSIZE)); |
+ compptr->downsampled_height = (JDIMENSION) |
+ jdiv_round_up((long) cinfo->image_height * |
+- (long) (compptr->v_samp_factor * compptr->DCT_scaled_size), |
++ (long) (compptr->v_samp_factor * compptr->_DCT_scaled_size), |
+ (long) (cinfo->max_v_samp_factor * DCTSIZE)); |
+ } |
+ |
+@@ -188,6 +345,10 @@ |
+ case JCS_EXT_BGRX: |
+ case JCS_EXT_XBGR: |
+ case JCS_EXT_XRGB: |
++ case JCS_EXT_RGBA: |
++ case JCS_EXT_BGRA: |
++ case JCS_EXT_ABGR: |
++ case JCS_EXT_ARGB: |
+ cinfo->out_color_components = rgb_pixelsize[cinfo->out_color_space]; |
+ break; |
+ case JCS_YCbCr: |
+@@ -384,7 +545,11 @@ |
+ jinit_inverse_dct(cinfo); |
+ /* Entropy decoding: either Huffman or arithmetic coding. */ |
+ if (cinfo->arith_code) { |
++#ifdef D_ARITH_CODING_SUPPORTED |
++ jinit_arith_decoder(cinfo); |
++#else |
+ ERREXIT(cinfo, JERR_ARITH_NOTIMPL); |
++#endif |
+ } else { |
+ if (cinfo->progressive_mode) { |
+ #ifdef D_PROGRESSIVE_SUPPORTED |
+Index: jdmerge.c |
+=================================================================== |
+--- jdmerge.c (revision 829) |
++++ jdmerge.c (working copy) |
+@@ -1,10 +1,11 @@ |
+ /* |
+ * jdmerge.c |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1994-1996, Thomas G. Lane. |
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
+- * Copyright (C) 2009, D. R. Commander. |
+- * This file is part of the Independent JPEG Group's software. |
++ * libjpeg-turbo Modifications: |
++ * Copyright (C) 2009, 2011, D. R. Commander. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains code for merged upsampling/color conversion. |
+@@ -38,6 +39,7 @@ |
+ #include "jinclude.h" |
+ #include "jpeglib.h" |
+ #include "jsimd.h" |
++#include "config.h" |
+ |
+ #ifdef UPSAMPLE_MERGING_SUPPORTED |
+ |
+@@ -77,6 +79,107 @@ |
+ #define FIX(x) ((INT32) ((x) * (1L<<SCALEBITS) + 0.5)) |
+ |
+ |
++/* Include inline routines for colorspace extensions */ |
++ |
++#include "jdmrgext.c" |
++#undef RGB_RED |
++#undef RGB_GREEN |
++#undef RGB_BLUE |
++#undef RGB_PIXELSIZE |
++ |
++#define RGB_RED EXT_RGB_RED |
++#define RGB_GREEN EXT_RGB_GREEN |
++#define RGB_BLUE EXT_RGB_BLUE |
++#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE |
++#define h2v1_merged_upsample_internal extrgb_h2v1_merged_upsample_internal |
++#define h2v2_merged_upsample_internal extrgb_h2v2_merged_upsample_internal |
++#include "jdmrgext.c" |
++#undef RGB_RED |
++#undef RGB_GREEN |
++#undef RGB_BLUE |
++#undef RGB_PIXELSIZE |
++#undef h2v1_merged_upsample_internal |
++#undef h2v2_merged_upsample_internal |
++ |
++#define RGB_RED EXT_RGBX_RED |
++#define RGB_GREEN EXT_RGBX_GREEN |
++#define RGB_BLUE EXT_RGBX_BLUE |
++#define RGB_ALPHA 3 |
++#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE |
++#define h2v1_merged_upsample_internal extrgbx_h2v1_merged_upsample_internal |
++#define h2v2_merged_upsample_internal extrgbx_h2v2_merged_upsample_internal |
++#include "jdmrgext.c" |
++#undef RGB_RED |
++#undef RGB_GREEN |
++#undef RGB_BLUE |
++#undef RGB_ALPHA |
++#undef RGB_PIXELSIZE |
++#undef h2v1_merged_upsample_internal |
++#undef h2v2_merged_upsample_internal |
++ |
++#define RGB_RED EXT_BGR_RED |
++#define RGB_GREEN EXT_BGR_GREEN |
++#define RGB_BLUE EXT_BGR_BLUE |
++#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE |
++#define h2v1_merged_upsample_internal extbgr_h2v1_merged_upsample_internal |
++#define h2v2_merged_upsample_internal extbgr_h2v2_merged_upsample_internal |
++#include "jdmrgext.c" |
++#undef RGB_RED |
++#undef RGB_GREEN |
++#undef RGB_BLUE |
++#undef RGB_PIXELSIZE |
++#undef h2v1_merged_upsample_internal |
++#undef h2v2_merged_upsample_internal |
++ |
++#define RGB_RED EXT_BGRX_RED |
++#define RGB_GREEN EXT_BGRX_GREEN |
++#define RGB_BLUE EXT_BGRX_BLUE |
++#define RGB_ALPHA 3 |
++#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE |
++#define h2v1_merged_upsample_internal extbgrx_h2v1_merged_upsample_internal |
++#define h2v2_merged_upsample_internal extbgrx_h2v2_merged_upsample_internal |
++#include "jdmrgext.c" |
++#undef RGB_RED |
++#undef RGB_GREEN |
++#undef RGB_BLUE |
++#undef RGB_ALPHA |
++#undef RGB_PIXELSIZE |
++#undef h2v1_merged_upsample_internal |
++#undef h2v2_merged_upsample_internal |
++ |
++#define RGB_RED EXT_XBGR_RED |
++#define RGB_GREEN EXT_XBGR_GREEN |
++#define RGB_BLUE EXT_XBGR_BLUE |
++#define RGB_ALPHA 0 |
++#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE |
++#define h2v1_merged_upsample_internal extxbgr_h2v1_merged_upsample_internal |
++#define h2v2_merged_upsample_internal extxbgr_h2v2_merged_upsample_internal |
++#include "jdmrgext.c" |
++#undef RGB_RED |
++#undef RGB_GREEN |
++#undef RGB_BLUE |
++#undef RGB_ALPHA |
++#undef RGB_PIXELSIZE |
++#undef h2v1_merged_upsample_internal |
++#undef h2v2_merged_upsample_internal |
++ |
++#define RGB_RED EXT_XRGB_RED |
++#define RGB_GREEN EXT_XRGB_GREEN |
++#define RGB_BLUE EXT_XRGB_BLUE |
++#define RGB_ALPHA 0 |
++#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE |
++#define h2v1_merged_upsample_internal extxrgb_h2v1_merged_upsample_internal |
++#define h2v2_merged_upsample_internal extxrgb_h2v2_merged_upsample_internal |
++#include "jdmrgext.c" |
++#undef RGB_RED |
++#undef RGB_GREEN |
++#undef RGB_BLUE |
++#undef RGB_ALPHA |
++#undef RGB_PIXELSIZE |
++#undef h2v1_merged_upsample_internal |
++#undef h2v2_merged_upsample_internal |
++ |
++ |
+ /* |
+ * Initialize tables for YCC->RGB colorspace conversion. |
+ * This is taken directly from jdcolor.c; see that file for more info. |
+@@ -230,56 +333,40 @@ |
+ JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, |
+ JSAMPARRAY output_buf) |
+ { |
+- my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; |
+- register int y, cred, cgreen, cblue; |
+- int cb, cr; |
+- register JSAMPROW outptr; |
+- JSAMPROW inptr0, inptr1, inptr2; |
+- JDIMENSION col; |
+- /* copy these pointers into registers if possible */ |
+- register JSAMPLE * range_limit = cinfo->sample_range_limit; |
+- int * Crrtab = upsample->Cr_r_tab; |
+- int * Cbbtab = upsample->Cb_b_tab; |
+- INT32 * Crgtab = upsample->Cr_g_tab; |
+- INT32 * Cbgtab = upsample->Cb_g_tab; |
+- SHIFT_TEMPS |
+- |
+- inptr0 = input_buf[0][in_row_group_ctr]; |
+- inptr1 = input_buf[1][in_row_group_ctr]; |
+- inptr2 = input_buf[2][in_row_group_ctr]; |
+- outptr = output_buf[0]; |
+- /* Loop for each pair of output pixels */ |
+- for (col = cinfo->output_width >> 1; col > 0; col--) { |
+- /* Do the chroma part of the calculation */ |
+- cb = GETJSAMPLE(*inptr1++); |
+- cr = GETJSAMPLE(*inptr2++); |
+- cred = Crrtab[cr]; |
+- cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); |
+- cblue = Cbbtab[cb]; |
+- /* Fetch 2 Y values and emit 2 pixels */ |
+- y = GETJSAMPLE(*inptr0++); |
+- outptr[rgb_red[cinfo->out_color_space]] = range_limit[y + cred]; |
+- outptr[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen]; |
+- outptr[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue]; |
+- outptr += rgb_pixelsize[cinfo->out_color_space]; |
+- y = GETJSAMPLE(*inptr0++); |
+- outptr[rgb_red[cinfo->out_color_space]] = range_limit[y + cred]; |
+- outptr[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen]; |
+- outptr[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue]; |
+- outptr += rgb_pixelsize[cinfo->out_color_space]; |
++ switch (cinfo->out_color_space) { |
++ case JCS_EXT_RGB: |
++ extrgb_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, |
++ output_buf); |
++ break; |
++ case JCS_EXT_RGBX: |
++ case JCS_EXT_RGBA: |
++ extrgbx_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, |
++ output_buf); |
++ break; |
++ case JCS_EXT_BGR: |
++ extbgr_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, |
++ output_buf); |
++ break; |
++ case JCS_EXT_BGRX: |
++ case JCS_EXT_BGRA: |
++ extbgrx_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, |
++ output_buf); |
++ break; |
++ case JCS_EXT_XBGR: |
++ case JCS_EXT_ABGR: |
++ extxbgr_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, |
++ output_buf); |
++ break; |
++ case JCS_EXT_XRGB: |
++ case JCS_EXT_ARGB: |
++ extxrgb_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, |
++ output_buf); |
++ break; |
++ default: |
++ h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, |
++ output_buf); |
++ break; |
+ } |
+- /* If image width is odd, do the last output column separately */ |
+- if (cinfo->output_width & 1) { |
+- cb = GETJSAMPLE(*inptr1); |
+- cr = GETJSAMPLE(*inptr2); |
+- cred = Crrtab[cr]; |
+- cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); |
+- cblue = Cbbtab[cb]; |
+- y = GETJSAMPLE(*inptr0); |
+- outptr[rgb_red[cinfo->out_color_space]] = range_limit[y + cred]; |
+- outptr[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen]; |
+- outptr[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue]; |
+- } |
+ } |
+ |
+ |
+@@ -292,72 +379,40 @@ |
+ JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, |
+ JSAMPARRAY output_buf) |
+ { |
+- my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; |
+- register int y, cred, cgreen, cblue; |
+- int cb, cr; |
+- register JSAMPROW outptr0, outptr1; |
+- JSAMPROW inptr00, inptr01, inptr1, inptr2; |
+- JDIMENSION col; |
+- /* copy these pointers into registers if possible */ |
+- register JSAMPLE * range_limit = cinfo->sample_range_limit; |
+- int * Crrtab = upsample->Cr_r_tab; |
+- int * Cbbtab = upsample->Cb_b_tab; |
+- INT32 * Crgtab = upsample->Cr_g_tab; |
+- INT32 * Cbgtab = upsample->Cb_g_tab; |
+- SHIFT_TEMPS |
+- |
+- inptr00 = input_buf[0][in_row_group_ctr*2]; |
+- inptr01 = input_buf[0][in_row_group_ctr*2 + 1]; |
+- inptr1 = input_buf[1][in_row_group_ctr]; |
+- inptr2 = input_buf[2][in_row_group_ctr]; |
+- outptr0 = output_buf[0]; |
+- outptr1 = output_buf[1]; |
+- /* Loop for each group of output pixels */ |
+- for (col = cinfo->output_width >> 1; col > 0; col--) { |
+- /* Do the chroma part of the calculation */ |
+- cb = GETJSAMPLE(*inptr1++); |
+- cr = GETJSAMPLE(*inptr2++); |
+- cred = Crrtab[cr]; |
+- cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); |
+- cblue = Cbbtab[cb]; |
+- /* Fetch 4 Y values and emit 4 pixels */ |
+- y = GETJSAMPLE(*inptr00++); |
+- outptr0[rgb_red[cinfo->out_color_space]] = range_limit[y + cred]; |
+- outptr0[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen]; |
+- outptr0[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue]; |
+- outptr0 += RGB_PIXELSIZE; |
+- y = GETJSAMPLE(*inptr00++); |
+- outptr0[rgb_red[cinfo->out_color_space]] = range_limit[y + cred]; |
+- outptr0[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen]; |
+- outptr0[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue]; |
+- outptr0 += RGB_PIXELSIZE; |
+- y = GETJSAMPLE(*inptr01++); |
+- outptr1[rgb_red[cinfo->out_color_space]] = range_limit[y + cred]; |
+- outptr1[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen]; |
+- outptr1[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue]; |
+- outptr1 += RGB_PIXELSIZE; |
+- y = GETJSAMPLE(*inptr01++); |
+- outptr1[rgb_red[cinfo->out_color_space]] = range_limit[y + cred]; |
+- outptr1[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen]; |
+- outptr1[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue]; |
+- outptr1 += RGB_PIXELSIZE; |
++ switch (cinfo->out_color_space) { |
++ case JCS_EXT_RGB: |
++ extrgb_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, |
++ output_buf); |
++ break; |
++ case JCS_EXT_RGBX: |
++ case JCS_EXT_RGBA: |
++ extrgbx_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, |
++ output_buf); |
++ break; |
++ case JCS_EXT_BGR: |
++ extbgr_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, |
++ output_buf); |
++ break; |
++ case JCS_EXT_BGRX: |
++ case JCS_EXT_BGRA: |
++ extbgrx_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, |
++ output_buf); |
++ break; |
++ case JCS_EXT_XBGR: |
++ case JCS_EXT_ABGR: |
++ extxbgr_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, |
++ output_buf); |
++ break; |
++ case JCS_EXT_XRGB: |
++ case JCS_EXT_ARGB: |
++ extxrgb_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, |
++ output_buf); |
++ break; |
++ default: |
++ h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr, |
++ output_buf); |
++ break; |
+ } |
+- /* If image width is odd, do the last output column separately */ |
+- if (cinfo->output_width & 1) { |
+- cb = GETJSAMPLE(*inptr1); |
+- cr = GETJSAMPLE(*inptr2); |
+- cred = Crrtab[cr]; |
+- cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); |
+- cblue = Cbbtab[cb]; |
+- y = GETJSAMPLE(*inptr00); |
+- outptr0[rgb_red[cinfo->out_color_space]] = range_limit[y + cred]; |
+- outptr0[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen]; |
+- outptr0[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue]; |
+- y = GETJSAMPLE(*inptr01); |
+- outptr1[rgb_red[cinfo->out_color_space]] = range_limit[y + cred]; |
+- outptr1[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen]; |
+- outptr1[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue]; |
+- } |
+ } |
+ |
+ |
+Index: jdphuff.c |
+=================================================================== |
+--- jdphuff.c (revision 829) |
++++ jdphuff.c (working copy) |
+@@ -198,6 +198,7 @@ |
+ * On some machines, a shift and add will be faster than a table lookup. |
+ */ |
+ |
++#define AVOID_TABLES |
+ #ifdef AVOID_TABLES |
+ |
+ #define HUFF_EXTEND(x,s) ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x)) |
+Index: jdsample.c |
+=================================================================== |
+--- jdsample.c (revision 829) |
++++ jdsample.c (working copy) |
+@@ -1,9 +1,11 @@ |
+ /* |
+ * jdsample.c |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1991-1996, Thomas G. Lane. |
++ * libjpeg-turbo Modifications: |
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
+- * This file is part of the Independent JPEG Group's software. |
++ * Copyright (C) 2010, D. R. Commander. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains upsampling routines. |
+@@ -19,50 +21,12 @@ |
+ * Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7. |
+ */ |
+ |
+-#define JPEG_INTERNALS |
+-#include "jinclude.h" |
+-#include "jpeglib.h" |
++#include "jdsample.h" |
+ #include "jsimd.h" |
++#include "jpegcomp.h" |
+ |
+ |
+-/* Pointer to routine to upsample a single component */ |
+-typedef JMETHOD(void, upsample1_ptr, |
+- (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
+- JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); |
+ |
+-/* Private subobject */ |
+- |
+-typedef struct { |
+- struct jpeg_upsampler pub; /* public fields */ |
+- |
+- /* Color conversion buffer. When using separate upsampling and color |
+- * conversion steps, this buffer holds one upsampled row group until it |
+- * has been color converted and output. |
+- * Note: we do not allocate any storage for component(s) which are full-size, |
+- * ie do not need rescaling. The corresponding entry of color_buf[] is |
+- * simply set to point to the input data array, thereby avoiding copying. |
+- */ |
+- JSAMPARRAY color_buf[MAX_COMPONENTS]; |
+- |
+- /* Per-component upsampling method pointers */ |
+- upsample1_ptr methods[MAX_COMPONENTS]; |
+- |
+- int next_row_out; /* counts rows emitted from color_buf */ |
+- JDIMENSION rows_to_go; /* counts rows remaining in image */ |
+- |
+- /* Height of an input row group for each component. */ |
+- int rowgroup_height[MAX_COMPONENTS]; |
+- |
+- /* These arrays save pixel expansion factors so that int_expand need not |
+- * recompute them each time. They are unused for other upsampling methods. |
+- */ |
+- UINT8 h_expand[MAX_COMPONENTS]; |
+- UINT8 v_expand[MAX_COMPONENTS]; |
+-} my_upsampler; |
+- |
+-typedef my_upsampler * my_upsample_ptr; |
+- |
+- |
+ /* |
+ * Initialize for an upsampling pass. |
+ */ |
+@@ -420,7 +384,7 @@ |
+ /* jdmainct.c doesn't support context rows when min_DCT_scaled_size = 1, |
+ * so don't ask for it. |
+ */ |
+- do_fancy = cinfo->do_fancy_upsampling && cinfo->min_DCT_scaled_size > 1; |
++ do_fancy = cinfo->do_fancy_upsampling && cinfo->_min_DCT_scaled_size > 1; |
+ |
+ /* Verify we can handle the sampling factors, select per-component methods, |
+ * and create storage as needed. |
+@@ -430,10 +394,10 @@ |
+ /* Compute size of an "input group" after IDCT scaling. This many samples |
+ * are to be converted to max_h_samp_factor * max_v_samp_factor pixels. |
+ */ |
+- h_in_group = (compptr->h_samp_factor * compptr->DCT_scaled_size) / |
+- cinfo->min_DCT_scaled_size; |
+- v_in_group = (compptr->v_samp_factor * compptr->DCT_scaled_size) / |
+- cinfo->min_DCT_scaled_size; |
++ h_in_group = (compptr->h_samp_factor * compptr->_DCT_scaled_size) / |
++ cinfo->_min_DCT_scaled_size; |
++ v_in_group = (compptr->v_samp_factor * compptr->_DCT_scaled_size) / |
++ cinfo->_min_DCT_scaled_size; |
+ h_out_group = cinfo->max_h_samp_factor; |
+ v_out_group = cinfo->max_v_samp_factor; |
+ upsample->rowgroup_height[ci] = v_in_group; /* save for use later */ |
+Index: jdtrans.c |
+=================================================================== |
+--- jdtrans.c (revision 829) |
++++ jdtrans.c (working copy) |
+@@ -99,9 +99,18 @@ |
+ /* This is effectively a buffered-image operation. */ |
+ cinfo->buffered_image = TRUE; |
+ |
++#if JPEG_LIB_VERSION >= 80 |
++ /* Compute output image dimensions and related values. */ |
++ jpeg_core_output_dimensions(cinfo); |
++#endif |
++ |
+ /* Entropy decoding: either Huffman or arithmetic coding. */ |
+ if (cinfo->arith_code) { |
++#ifdef D_ARITH_CODING_SUPPORTED |
++ jinit_arith_decoder(cinfo); |
++#else |
+ ERREXIT(cinfo, JERR_ARITH_NOTIMPL); |
++#endif |
+ } else { |
+ if (cinfo->progressive_mode) { |
+ #ifdef D_PROGRESSIVE_SUPPORTED |
+Index: jerror.h |
+=================================================================== |
+--- jerror.h (revision 829) |
++++ jerror.h (working copy) |
+@@ -2,6 +2,7 @@ |
+ * jerror.h |
+ * |
+ * Copyright (C) 1994-1997, Thomas G. Lane. |
++ * Modified 1997-2009 by Guido Vollbeding. |
+ * This file is part of the Independent JPEG Group's software. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+@@ -39,14 +40,23 @@ |
+ JMESSAGE(JMSG_NOMESSAGE, "Bogus message code %d") /* Must be first entry! */ |
+ |
+ /* For maintenance convenience, list is alphabetical by message code name */ |
++#if JPEG_LIB_VERSION < 70 |
+ JMESSAGE(JERR_ARITH_NOTIMPL, |
+- "Sorry, there are legal restrictions on arithmetic coding") |
++ "Sorry, arithmetic coding is not implemented") |
++#endif |
+ JMESSAGE(JERR_BAD_ALIGN_TYPE, "ALIGN_TYPE is wrong, please fix") |
+ JMESSAGE(JERR_BAD_ALLOC_CHUNK, "MAX_ALLOC_CHUNK is wrong, please fix") |
+ JMESSAGE(JERR_BAD_BUFFER_MODE, "Bogus buffer control mode") |
+ JMESSAGE(JERR_BAD_COMPONENT_ID, "Invalid component ID %d in SOS") |
++#if JPEG_LIB_VERSION >= 70 |
++JMESSAGE(JERR_BAD_CROP_SPEC, "Invalid crop request") |
++#endif |
+ JMESSAGE(JERR_BAD_DCT_COEF, "DCT coefficient out of range") |
+ JMESSAGE(JERR_BAD_DCTSIZE, "IDCT output block size %d not supported") |
++#if JPEG_LIB_VERSION >= 70 |
++JMESSAGE(JERR_BAD_DROP_SAMPLING, |
++ "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c") |
++#endif |
+ JMESSAGE(JERR_BAD_HUFF_TABLE, "Bogus Huffman table definition") |
+ JMESSAGE(JERR_BAD_IN_COLORSPACE, "Bogus input colorspace") |
+ JMESSAGE(JERR_BAD_J_COLORSPACE, "Bogus JPEG colorspace") |
+@@ -93,6 +103,9 @@ |
+ JMESSAGE(JERR_MODE_CHANGE, "Invalid color quantization mode change") |
+ JMESSAGE(JERR_NOTIMPL, "Not implemented yet") |
+ JMESSAGE(JERR_NOT_COMPILED, "Requested feature was omitted at compile time") |
++#if JPEG_LIB_VERSION >= 70 |
++JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmetic table 0x%02x was not defined") |
++#endif |
+ JMESSAGE(JERR_NO_BACKING_STORE, "Backing store not supported") |
+ JMESSAGE(JERR_NO_HUFF_TABLE, "Huffman table 0x%02x was not defined") |
+ JMESSAGE(JERR_NO_IMAGE, "JPEG datastream contains no image") |
+@@ -170,6 +183,9 @@ |
+ JMESSAGE(JTRC_XMS_CLOSE, "Freed XMS handle %u") |
+ JMESSAGE(JTRC_XMS_OPEN, "Obtained XMS handle %u") |
+ JMESSAGE(JWRN_ADOBE_XFORM, "Unknown Adobe color transform code %d") |
++#if JPEG_LIB_VERSION >= 70 |
++JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code") |
++#endif |
+ JMESSAGE(JWRN_BOGUS_PROGRESSION, |
+ "Inconsistent progression sequence for component %d coefficient %d") |
+ JMESSAGE(JWRN_EXTRANEOUS_DATA, |
+@@ -182,6 +198,13 @@ |
+ "Corrupt JPEG data: found marker 0x%02x instead of RST%d") |
+ JMESSAGE(JWRN_NOT_SEQUENTIAL, "Invalid SOS parameters for sequential JPEG") |
+ JMESSAGE(JWRN_TOO_MUCH_DATA, "Application transferred too many scanlines") |
++#if JPEG_LIB_VERSION < 70 |
++JMESSAGE(JERR_BAD_CROP_SPEC, "Invalid crop request") |
++#if defined(C_ARITH_CODING_SUPPORTED) || defined(D_ARITH_CODING_SUPPORTED) |
++JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmetic table 0x%02x was not defined") |
++JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code") |
++#endif |
++#endif |
+ |
+ #ifdef JMAKE_ENUM_LIST |
+ |
+Index: jidctint.c |
+=================================================================== |
+--- jidctint.c (revision 829) |
++++ jidctint.c (working copy) |
+@@ -2,6 +2,7 @@ |
+ * jidctint.c |
+ * |
+ * Copyright (C) 1991-1998, Thomas G. Lane. |
++ * Modification developed 2002-2009 by Guido Vollbeding. |
+ * This file is part of the Independent JPEG Group's software. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+@@ -23,6 +24,27 @@ |
+ * The advantage of this method is that no data path contains more than one |
+ * multiplication; this allows a very simple and accurate implementation in |
+ * scaled fixed-point arithmetic, with a minimal number of shifts. |
++ * |
++ * We also provide IDCT routines with various output sample block sizes for |
++ * direct resolution reduction or enlargement without additional resampling: |
++ * NxN (N=1...16) pixels for one 8x8 input DCT block. |
++ * |
++ * For N<8 we simply take the corresponding low-frequency coefficients of |
++ * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block |
++ * to yield the downscaled outputs. |
++ * This can be seen as direct low-pass downsampling from the DCT domain |
++ * point of view rather than the usual spatial domain point of view, |
++ * yielding significant computational savings and results at least |
++ * as good as common bilinear (averaging) spatial downsampling. |
++ * |
++ * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as |
++ * lower frequencies and higher frequencies assumed to be zero. |
++ * It turns out that the computational effort is similar to the 8x8 IDCT |
++ * regarding the output size. |
++ * Furthermore, the scaling and descaling is the same for all IDCT sizes. |
++ * |
++ * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases |
++ * since there would be too many additional constants to pre-calculate. |
+ */ |
+ |
+ #define JPEG_INTERNALS |
+@@ -38,7 +60,7 @@ |
+ */ |
+ |
+ #if DCTSIZE != 8 |
+- Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ |
++ Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */ |
+ #endif |
+ |
+ |
+@@ -386,4 +408,2216 @@ |
+ } |
+ } |
+ |
++#ifdef IDCT_SCALING_SUPPORTED |
++ |
++ |
++/* |
++ * Perform dequantization and inverse DCT on one block of coefficients, |
++ * producing a 7x7 output block. |
++ * |
++ * Optimized algorithm with 12 multiplications in the 1-D kernel. |
++ * cK represents sqrt(2) * cos(K*pi/14). |
++ */ |
++ |
++GLOBAL(void) |
++jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
++ JCOEFPTR coef_block, |
++ JSAMPARRAY output_buf, JDIMENSION output_col) |
++{ |
++ INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13; |
++ INT32 z1, z2, z3; |
++ JCOEFPTR inptr; |
++ ISLOW_MULT_TYPE * quantptr; |
++ int * wsptr; |
++ JSAMPROW outptr; |
++ JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
++ int ctr; |
++ int workspace[7*7]; /* buffers data between passes */ |
++ SHIFT_TEMPS |
++ |
++ /* Pass 1: process columns from input, store into work array. */ |
++ |
++ inptr = coef_block; |
++ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
++ wsptr = workspace; |
++ for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) { |
++ /* Even part */ |
++ |
++ tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
++ tmp13 <<= CONST_BITS; |
++ /* Add fudge factor here for final descale. */ |
++ tmp13 += ONE << (CONST_BITS-PASS1_BITS-1); |
++ |
++ z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
++ z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
++ z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
++ |
++ tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ |
++ tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ |
++ tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ |
++ tmp0 = z1 + z3; |
++ z2 -= tmp0; |
++ tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */ |
++ tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ |
++ tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ |
++ tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ |
++ |
++ /* Odd part */ |
++ |
++ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
++ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
++ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
++ |
++ tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ |
++ tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ |
++ tmp0 = tmp1 - tmp2; |
++ tmp1 += tmp2; |
++ tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ |
++ tmp1 += tmp2; |
++ z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ |
++ tmp0 += z2; |
++ tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ |
++ |
++ /* Final output stage */ |
++ |
++ wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); |
++ wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); |
++ wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); |
++ wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); |
++ wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); |
++ wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); |
++ wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS); |
++ } |
++ |
++ /* Pass 2: process 7 rows from work array, store into output array. */ |
++ |
++ wsptr = workspace; |
++ for (ctr = 0; ctr < 7; ctr++) { |
++ outptr = output_buf[ctr] + output_col; |
++ |
++ /* Even part */ |
++ |
++ /* Add fudge factor here for final descale. */ |
++ tmp13 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
++ tmp13 <<= CONST_BITS; |
++ |
++ z1 = (INT32) wsptr[2]; |
++ z2 = (INT32) wsptr[4]; |
++ z3 = (INT32) wsptr[6]; |
++ |
++ tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ |
++ tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ |
++ tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ |
++ tmp0 = z1 + z3; |
++ z2 -= tmp0; |
++ tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */ |
++ tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ |
++ tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ |
++ tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ |
++ |
++ /* Odd part */ |
++ |
++ z1 = (INT32) wsptr[1]; |
++ z2 = (INT32) wsptr[3]; |
++ z3 = (INT32) wsptr[5]; |
++ |
++ tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ |
++ tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ |
++ tmp0 = tmp1 - tmp2; |
++ tmp1 += tmp2; |
++ tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ |
++ tmp1 += tmp2; |
++ z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ |
++ tmp0 += z2; |
++ tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ |
++ |
++ /* Final output stage */ |
++ |
++ outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ |
++ wsptr += 7; /* advance pointer to next row */ |
++ } |
++} |
++ |
++ |
++/* |
++ * Perform dequantization and inverse DCT on one block of coefficients, |
++ * producing a reduced-size 6x6 output block. |
++ * |
++ * Optimized algorithm with 3 multiplications in the 1-D kernel. |
++ * cK represents sqrt(2) * cos(K*pi/12). |
++ */ |
++ |
++GLOBAL(void) |
++jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
++ JCOEFPTR coef_block, |
++ JSAMPARRAY output_buf, JDIMENSION output_col) |
++{ |
++ INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12; |
++ INT32 z1, z2, z3; |
++ JCOEFPTR inptr; |
++ ISLOW_MULT_TYPE * quantptr; |
++ int * wsptr; |
++ JSAMPROW outptr; |
++ JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
++ int ctr; |
++ int workspace[6*6]; /* buffers data between passes */ |
++ SHIFT_TEMPS |
++ |
++ /* Pass 1: process columns from input, store into work array. */ |
++ |
++ inptr = coef_block; |
++ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
++ wsptr = workspace; |
++ for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) { |
++ /* Even part */ |
++ |
++ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
++ tmp0 <<= CONST_BITS; |
++ /* Add fudge factor here for final descale. */ |
++ tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); |
++ tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
++ tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ |
++ tmp1 = tmp0 + tmp10; |
++ tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS); |
++ tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
++ tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ |
++ tmp10 = tmp1 + tmp0; |
++ tmp12 = tmp1 - tmp0; |
++ |
++ /* Odd part */ |
++ |
++ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
++ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
++ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
++ tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ |
++ tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); |
++ tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); |
++ tmp1 = (z1 - z2 - z3) << PASS1_BITS; |
++ |
++ /* Final output stage */ |
++ |
++ wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); |
++ wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); |
++ wsptr[6*1] = (int) (tmp11 + tmp1); |
++ wsptr[6*4] = (int) (tmp11 - tmp1); |
++ wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); |
++ wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); |
++ } |
++ |
++ /* Pass 2: process 6 rows from work array, store into output array. */ |
++ |
++ wsptr = workspace; |
++ for (ctr = 0; ctr < 6; ctr++) { |
++ outptr = output_buf[ctr] + output_col; |
++ |
++ /* Even part */ |
++ |
++ /* Add fudge factor here for final descale. */ |
++ tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
++ tmp0 <<= CONST_BITS; |
++ tmp2 = (INT32) wsptr[4]; |
++ tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ |
++ tmp1 = tmp0 + tmp10; |
++ tmp11 = tmp0 - tmp10 - tmp10; |
++ tmp10 = (INT32) wsptr[2]; |
++ tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ |
++ tmp10 = tmp1 + tmp0; |
++ tmp12 = tmp1 - tmp0; |
++ |
++ /* Odd part */ |
++ |
++ z1 = (INT32) wsptr[1]; |
++ z2 = (INT32) wsptr[3]; |
++ z3 = (INT32) wsptr[5]; |
++ tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ |
++ tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); |
++ tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); |
++ tmp1 = (z1 - z2 - z3) << CONST_BITS; |
++ |
++ /* Final output stage */ |
++ |
++ outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ |
++ wsptr += 6; /* advance pointer to next row */ |
++ } |
++} |
++ |
++ |
++/* |
++ * Perform dequantization and inverse DCT on one block of coefficients, |
++ * producing a reduced-size 5x5 output block. |
++ * |
++ * Optimized algorithm with 5 multiplications in the 1-D kernel. |
++ * cK represents sqrt(2) * cos(K*pi/10). |
++ */ |
++ |
++GLOBAL(void) |
++jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
++ JCOEFPTR coef_block, |
++ JSAMPARRAY output_buf, JDIMENSION output_col) |
++{ |
++ INT32 tmp0, tmp1, tmp10, tmp11, tmp12; |
++ INT32 z1, z2, z3; |
++ JCOEFPTR inptr; |
++ ISLOW_MULT_TYPE * quantptr; |
++ int * wsptr; |
++ JSAMPROW outptr; |
++ JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
++ int ctr; |
++ int workspace[5*5]; /* buffers data between passes */ |
++ SHIFT_TEMPS |
++ |
++ /* Pass 1: process columns from input, store into work array. */ |
++ |
++ inptr = coef_block; |
++ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
++ wsptr = workspace; |
++ for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) { |
++ /* Even part */ |
++ |
++ tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
++ tmp12 <<= CONST_BITS; |
++ /* Add fudge factor here for final descale. */ |
++ tmp12 += ONE << (CONST_BITS-PASS1_BITS-1); |
++ tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
++ tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
++ z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */ |
++ z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */ |
++ z3 = tmp12 + z2; |
++ tmp10 = z3 + z1; |
++ tmp11 = z3 - z1; |
++ tmp12 -= z2 << 2; |
++ |
++ /* Odd part */ |
++ |
++ z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
++ z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
++ |
++ z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ |
++ tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ |
++ tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ |
++ |
++ /* Final output stage */ |
++ |
++ wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); |
++ wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); |
++ wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); |
++ wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); |
++ wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS); |
++ } |
++ |
++ /* Pass 2: process 5 rows from work array, store into output array. */ |
++ |
++ wsptr = workspace; |
++ for (ctr = 0; ctr < 5; ctr++) { |
++ outptr = output_buf[ctr] + output_col; |
++ |
++ /* Even part */ |
++ |
++ /* Add fudge factor here for final descale. */ |
++ tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
++ tmp12 <<= CONST_BITS; |
++ tmp0 = (INT32) wsptr[2]; |
++ tmp1 = (INT32) wsptr[4]; |
++ z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */ |
++ z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */ |
++ z3 = tmp12 + z2; |
++ tmp10 = z3 + z1; |
++ tmp11 = z3 - z1; |
++ tmp12 -= z2 << 2; |
++ |
++ /* Odd part */ |
++ |
++ z2 = (INT32) wsptr[1]; |
++ z3 = (INT32) wsptr[3]; |
++ |
++ z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ |
++ tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ |
++ tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ |
++ |
++ /* Final output stage */ |
++ |
++ outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ |
++ wsptr += 5; /* advance pointer to next row */ |
++ } |
++} |
++ |
++ |
++/* |
++ * Perform dequantization and inverse DCT on one block of coefficients, |
++ * producing a reduced-size 3x3 output block. |
++ * |
++ * Optimized algorithm with 2 multiplications in the 1-D kernel. |
++ * cK represents sqrt(2) * cos(K*pi/6). |
++ */ |
++ |
++GLOBAL(void) |
++jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
++ JCOEFPTR coef_block, |
++ JSAMPARRAY output_buf, JDIMENSION output_col) |
++{ |
++ INT32 tmp0, tmp2, tmp10, tmp12; |
++ JCOEFPTR inptr; |
++ ISLOW_MULT_TYPE * quantptr; |
++ int * wsptr; |
++ JSAMPROW outptr; |
++ JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
++ int ctr; |
++ int workspace[3*3]; /* buffers data between passes */ |
++ SHIFT_TEMPS |
++ |
++ /* Pass 1: process columns from input, store into work array. */ |
++ |
++ inptr = coef_block; |
++ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
++ wsptr = workspace; |
++ for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) { |
++ /* Even part */ |
++ |
++ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
++ tmp0 <<= CONST_BITS; |
++ /* Add fudge factor here for final descale. */ |
++ tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); |
++ tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
++ tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ |
++ tmp10 = tmp0 + tmp12; |
++ tmp2 = tmp0 - tmp12 - tmp12; |
++ |
++ /* Odd part */ |
++ |
++ tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
++ tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ |
++ |
++ /* Final output stage */ |
++ |
++ wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); |
++ wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); |
++ wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS); |
++ } |
++ |
++ /* Pass 2: process 3 rows from work array, store into output array. */ |
++ |
++ wsptr = workspace; |
++ for (ctr = 0; ctr < 3; ctr++) { |
++ outptr = output_buf[ctr] + output_col; |
++ |
++ /* Even part */ |
++ |
++ /* Add fudge factor here for final descale. */ |
++ tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
++ tmp0 <<= CONST_BITS; |
++ tmp2 = (INT32) wsptr[2]; |
++ tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ |
++ tmp10 = tmp0 + tmp12; |
++ tmp2 = tmp0 - tmp12 - tmp12; |
++ |
++ /* Odd part */ |
++ |
++ tmp12 = (INT32) wsptr[1]; |
++ tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ |
++ |
++ /* Final output stage */ |
++ |
++ outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ |
++ wsptr += 3; /* advance pointer to next row */ |
++ } |
++} |
++ |
++ |
++/* |
++ * Perform dequantization and inverse DCT on one block of coefficients, |
++ * producing a 9x9 output block. |
++ * |
++ * Optimized algorithm with 10 multiplications in the 1-D kernel. |
++ * cK represents sqrt(2) * cos(K*pi/18). |
++ */ |
++ |
++GLOBAL(void) |
++jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
++ JCOEFPTR coef_block, |
++ JSAMPARRAY output_buf, JDIMENSION output_col) |
++{ |
++ INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14; |
++ INT32 z1, z2, z3, z4; |
++ JCOEFPTR inptr; |
++ ISLOW_MULT_TYPE * quantptr; |
++ int * wsptr; |
++ JSAMPROW outptr; |
++ JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
++ int ctr; |
++ int workspace[8*9]; /* buffers data between passes */ |
++ SHIFT_TEMPS |
++ |
++ /* Pass 1: process columns from input, store into work array. */ |
++ |
++ inptr = coef_block; |
++ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
++ wsptr = workspace; |
++ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { |
++ /* Even part */ |
++ |
++ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
++ tmp0 <<= CONST_BITS; |
++ /* Add fudge factor here for final descale. */ |
++ tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); |
++ |
++ z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
++ z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
++ z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
++ |
++ tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */ |
++ tmp1 = tmp0 + tmp3; |
++ tmp2 = tmp0 - tmp3 - tmp3; |
++ |
++ tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */ |
++ tmp11 = tmp2 + tmp0; |
++ tmp14 = tmp2 - tmp0 - tmp0; |
++ |
++ tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */ |
++ tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */ |
++ tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */ |
++ |
++ tmp10 = tmp1 + tmp0 - tmp3; |
++ tmp12 = tmp1 - tmp0 + tmp2; |
++ tmp13 = tmp1 - tmp2 + tmp3; |
++ |
++ /* Odd part */ |
++ |
++ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
++ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
++ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
++ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); |
++ |
++ z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */ |
++ |
++ tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */ |
++ tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */ |
++ tmp0 = tmp2 + tmp3 - z2; |
++ tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */ |
++ tmp2 += z2 - tmp1; |
++ tmp3 += z2 + tmp1; |
++ tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */ |
++ |
++ /* Final output stage */ |
++ |
++ wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); |
++ wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); |
++ wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); |
++ wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); |
++ wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); |
++ wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); |
++ wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS); |
++ wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS); |
++ wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS); |
++ } |
++ |
++ /* Pass 2: process 9 rows from work array, store into output array. */ |
++ |
++ wsptr = workspace; |
++ for (ctr = 0; ctr < 9; ctr++) { |
++ outptr = output_buf[ctr] + output_col; |
++ |
++ /* Even part */ |
++ |
++ /* Add fudge factor here for final descale. */ |
++ tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
++ tmp0 <<= CONST_BITS; |
++ |
++ z1 = (INT32) wsptr[2]; |
++ z2 = (INT32) wsptr[4]; |
++ z3 = (INT32) wsptr[6]; |
++ |
++ tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */ |
++ tmp1 = tmp0 + tmp3; |
++ tmp2 = tmp0 - tmp3 - tmp3; |
++ |
++ tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */ |
++ tmp11 = tmp2 + tmp0; |
++ tmp14 = tmp2 - tmp0 - tmp0; |
++ |
++ tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */ |
++ tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */ |
++ tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */ |
++ |
++ tmp10 = tmp1 + tmp0 - tmp3; |
++ tmp12 = tmp1 - tmp0 + tmp2; |
++ tmp13 = tmp1 - tmp2 + tmp3; |
++ |
++ /* Odd part */ |
++ |
++ z1 = (INT32) wsptr[1]; |
++ z2 = (INT32) wsptr[3]; |
++ z3 = (INT32) wsptr[5]; |
++ z4 = (INT32) wsptr[7]; |
++ |
++ z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */ |
++ |
++ tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */ |
++ tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */ |
++ tmp0 = tmp2 + tmp3 - z2; |
++ tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */ |
++ tmp2 += z2 - tmp1; |
++ tmp3 += z2 + tmp1; |
++ tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */ |
++ |
++ /* Final output stage */ |
++ |
++ outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ |
++ wsptr += 8; /* advance pointer to next row */ |
++ } |
++} |
++ |
++ |
++/* |
++ * Perform dequantization and inverse DCT on one block of coefficients, |
++ * producing a 10x10 output block. |
++ * |
++ * Optimized algorithm with 12 multiplications in the 1-D kernel. |
++ * cK represents sqrt(2) * cos(K*pi/20). |
++ */ |
++ |
++GLOBAL(void) |
++jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
++ JCOEFPTR coef_block, |
++ JSAMPARRAY output_buf, JDIMENSION output_col) |
++{ |
++ INT32 tmp10, tmp11, tmp12, tmp13, tmp14; |
++ INT32 tmp20, tmp21, tmp22, tmp23, tmp24; |
++ INT32 z1, z2, z3, z4, z5; |
++ JCOEFPTR inptr; |
++ ISLOW_MULT_TYPE * quantptr; |
++ int * wsptr; |
++ JSAMPROW outptr; |
++ JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
++ int ctr; |
++ int workspace[8*10]; /* buffers data between passes */ |
++ SHIFT_TEMPS |
++ |
++ /* Pass 1: process columns from input, store into work array. */ |
++ |
++ inptr = coef_block; |
++ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
++ wsptr = workspace; |
++ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { |
++ /* Even part */ |
++ |
++ z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
++ z3 <<= CONST_BITS; |
++ /* Add fudge factor here for final descale. */ |
++ z3 += ONE << (CONST_BITS-PASS1_BITS-1); |
++ z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
++ z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ |
++ z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ |
++ tmp10 = z3 + z1; |
++ tmp11 = z3 - z2; |
++ |
++ tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */ |
++ CONST_BITS-PASS1_BITS); |
++ |
++ z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
++ z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
++ |
++ z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ |
++ tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ |
++ tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ |
++ |
++ tmp20 = tmp10 + tmp12; |
++ tmp24 = tmp10 - tmp12; |
++ tmp21 = tmp11 + tmp13; |
++ tmp23 = tmp11 - tmp13; |
++ |
++ /* Odd part */ |
++ |
++ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
++ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
++ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
++ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); |
++ |
++ tmp11 = z2 + z4; |
++ tmp13 = z2 - z4; |
++ |
++ tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ |
++ z5 = z3 << CONST_BITS; |
++ |
++ z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ |
++ z4 = z5 + tmp12; |
++ |
++ tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ |
++ tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ |
++ |
++ z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ |
++ z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1)); |
++ |
++ tmp12 = (z1 - tmp13 - z3) << PASS1_BITS; |
++ |
++ tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ |
++ tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ |
++ |
++ /* Final output stage */ |
++ |
++ wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); |
++ wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); |
++ wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); |
++ wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); |
++ wsptr[8*2] = (int) (tmp22 + tmp12); |
++ wsptr[8*7] = (int) (tmp22 - tmp12); |
++ wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); |
++ wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); |
++ wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); |
++ wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); |
++ } |
++ |
++ /* Pass 2: process 10 rows from work array, store into output array. */ |
++ |
++ wsptr = workspace; |
++ for (ctr = 0; ctr < 10; ctr++) { |
++ outptr = output_buf[ctr] + output_col; |
++ |
++ /* Even part */ |
++ |
++ /* Add fudge factor here for final descale. */ |
++ z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
++ z3 <<= CONST_BITS; |
++ z4 = (INT32) wsptr[4]; |
++ z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ |
++ z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ |
++ tmp10 = z3 + z1; |
++ tmp11 = z3 - z2; |
++ |
++ tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */ |
++ |
++ z2 = (INT32) wsptr[2]; |
++ z3 = (INT32) wsptr[6]; |
++ |
++ z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ |
++ tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ |
++ tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ |
++ |
++ tmp20 = tmp10 + tmp12; |
++ tmp24 = tmp10 - tmp12; |
++ tmp21 = tmp11 + tmp13; |
++ tmp23 = tmp11 - tmp13; |
++ |
++ /* Odd part */ |
++ |
++ z1 = (INT32) wsptr[1]; |
++ z2 = (INT32) wsptr[3]; |
++ z3 = (INT32) wsptr[5]; |
++ z3 <<= CONST_BITS; |
++ z4 = (INT32) wsptr[7]; |
++ |
++ tmp11 = z2 + z4; |
++ tmp13 = z2 - z4; |
++ |
++ tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ |
++ |
++ z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ |
++ z4 = z3 + tmp12; |
++ |
++ tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ |
++ tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ |
++ |
++ z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ |
++ z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1)); |
++ |
++ tmp12 = ((z1 - tmp13) << CONST_BITS) - z3; |
++ |
++ tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ |
++ tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ |
++ |
++ /* Final output stage */ |
++ |
++ outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ |
++ wsptr += 8; /* advance pointer to next row */ |
++ } |
++} |
++ |
++ |
++/* |
++ * Perform dequantization and inverse DCT on one block of coefficients, |
++ * producing a 11x11 output block. |
++ * |
++ * Optimized algorithm with 24 multiplications in the 1-D kernel. |
++ * cK represents sqrt(2) * cos(K*pi/22). |
++ */ |
++ |
++GLOBAL(void) |
++jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
++ JCOEFPTR coef_block, |
++ JSAMPARRAY output_buf, JDIMENSION output_col) |
++{ |
++ INT32 tmp10, tmp11, tmp12, tmp13, tmp14; |
++ INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; |
++ INT32 z1, z2, z3, z4; |
++ JCOEFPTR inptr; |
++ ISLOW_MULT_TYPE * quantptr; |
++ int * wsptr; |
++ JSAMPROW outptr; |
++ JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
++ int ctr; |
++ int workspace[8*11]; /* buffers data between passes */ |
++ SHIFT_TEMPS |
++ |
++ /* Pass 1: process columns from input, store into work array. */ |
++ |
++ inptr = coef_block; |
++ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
++ wsptr = workspace; |
++ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { |
++ /* Even part */ |
++ |
++ tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
++ tmp10 <<= CONST_BITS; |
++ /* Add fudge factor here for final descale. */ |
++ tmp10 += ONE << (CONST_BITS-PASS1_BITS-1); |
++ |
++ z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
++ z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
++ z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
++ |
++ tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */ |
++ tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */ |
++ z4 = z1 + z3; |
++ tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */ |
++ z4 -= z2; |
++ tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */ |
++ tmp21 = tmp20 + tmp23 + tmp25 - |
++ MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */ |
++ tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */ |
++ tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */ |
++ tmp24 += tmp25; |
++ tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */ |
++ tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */ |
++ MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */ |
++ tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */ |
++ |
++ /* Odd part */ |
++ |
++ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
++ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
++ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
++ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); |
++ |
++ tmp11 = z1 + z2; |
++ tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */ |
++ tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */ |
++ tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */ |
++ tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */ |
++ tmp10 = tmp11 + tmp12 + tmp13 - |
++ MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */ |
++ z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */ |
++ tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */ |
++ tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */ |
++ z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */ |
++ tmp11 += z1; |
++ tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */ |
++ tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */ |
++ MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */ |
++ MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */ |
++ |
++ /* Final output stage */ |
++ |
++ wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); |
++ wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); |
++ wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); |
++ wsptr[8*9] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); |
++ wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); |
++ wsptr[8*8] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); |
++ wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); |
++ wsptr[8*7] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); |
++ wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); |
++ wsptr[8*6] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); |
++ wsptr[8*5] = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS); |
++ } |
++ |
++ /* Pass 2: process 11 rows from work array, store into output array. */ |
++ |
++ wsptr = workspace; |
++ for (ctr = 0; ctr < 11; ctr++) { |
++ outptr = output_buf[ctr] + output_col; |
++ |
++ /* Even part */ |
++ |
++ /* Add fudge factor here for final descale. */ |
++ tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
++ tmp10 <<= CONST_BITS; |
++ |
++ z1 = (INT32) wsptr[2]; |
++ z2 = (INT32) wsptr[4]; |
++ z3 = (INT32) wsptr[6]; |
++ |
++ tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */ |
++ tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */ |
++ z4 = z1 + z3; |
++ tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */ |
++ z4 -= z2; |
++ tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */ |
++ tmp21 = tmp20 + tmp23 + tmp25 - |
++ MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */ |
++ tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */ |
++ tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */ |
++ tmp24 += tmp25; |
++ tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */ |
++ tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */ |
++ MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */ |
++ tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */ |
++ |
++ /* Odd part */ |
++ |
++ z1 = (INT32) wsptr[1]; |
++ z2 = (INT32) wsptr[3]; |
++ z3 = (INT32) wsptr[5]; |
++ z4 = (INT32) wsptr[7]; |
++ |
++ tmp11 = z1 + z2; |
++ tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */ |
++ tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */ |
++ tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */ |
++ tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */ |
++ tmp10 = tmp11 + tmp12 + tmp13 - |
++ MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */ |
++ z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */ |
++ tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */ |
++ tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */ |
++ z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */ |
++ tmp11 += z1; |
++ tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */ |
++ tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */ |
++ MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */ |
++ MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */ |
++ |
++ /* Final output stage */ |
++ |
++ outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ |
++ wsptr += 8; /* advance pointer to next row */ |
++ } |
++} |
++ |
++ |
++/* |
++ * Perform dequantization and inverse DCT on one block of coefficients, |
++ * producing a 12x12 output block. |
++ * |
++ * Optimized algorithm with 15 multiplications in the 1-D kernel. |
++ * cK represents sqrt(2) * cos(K*pi/24). |
++ */ |
++ |
++GLOBAL(void) |
++jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
++ JCOEFPTR coef_block, |
++ JSAMPARRAY output_buf, JDIMENSION output_col) |
++{ |
++ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; |
++ INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; |
++ INT32 z1, z2, z3, z4; |
++ JCOEFPTR inptr; |
++ ISLOW_MULT_TYPE * quantptr; |
++ int * wsptr; |
++ JSAMPROW outptr; |
++ JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
++ int ctr; |
++ int workspace[8*12]; /* buffers data between passes */ |
++ SHIFT_TEMPS |
++ |
++ /* Pass 1: process columns from input, store into work array. */ |
++ |
++ inptr = coef_block; |
++ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
++ wsptr = workspace; |
++ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { |
++ /* Even part */ |
++ |
++ z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
++ z3 <<= CONST_BITS; |
++ /* Add fudge factor here for final descale. */ |
++ z3 += ONE << (CONST_BITS-PASS1_BITS-1); |
++ |
++ z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
++ z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ |
++ |
++ tmp10 = z3 + z4; |
++ tmp11 = z3 - z4; |
++ |
++ z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
++ z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ |
++ z1 <<= CONST_BITS; |
++ z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
++ z2 <<= CONST_BITS; |
++ |
++ tmp12 = z1 - z2; |
++ |
++ tmp21 = z3 + tmp12; |
++ tmp24 = z3 - tmp12; |
++ |
++ tmp12 = z4 + z2; |
++ |
++ tmp20 = tmp10 + tmp12; |
++ tmp25 = tmp10 - tmp12; |
++ |
++ tmp12 = z4 - z1 - z2; |
++ |
++ tmp22 = tmp11 + tmp12; |
++ tmp23 = tmp11 - tmp12; |
++ |
++ /* Odd part */ |
++ |
++ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
++ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
++ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
++ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); |
++ |
++ tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ |
++ tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ |
++ |
++ tmp10 = z1 + z3; |
++ tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ |
++ tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ |
++ tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ |
++ tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ |
++ tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ |
++ tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ |
++ tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ |
++ MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ |
++ |
++ z1 -= z4; |
++ z2 -= z3; |
++ z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ |
++ tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ |
++ tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ |
++ |
++ /* Final output stage */ |
++ |
++ wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); |
++ wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); |
++ wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); |
++ wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); |
++ wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); |
++ wsptr[8*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); |
++ wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); |
++ wsptr[8*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); |
++ wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); |
++ wsptr[8*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); |
++ wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); |
++ wsptr[8*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); |
++ } |
++ |
++ /* Pass 2: process 12 rows from work array, store into output array. */ |
++ |
++ wsptr = workspace; |
++ for (ctr = 0; ctr < 12; ctr++) { |
++ outptr = output_buf[ctr] + output_col; |
++ |
++ /* Even part */ |
++ |
++ /* Add fudge factor here for final descale. */ |
++ z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
++ z3 <<= CONST_BITS; |
++ |
++ z4 = (INT32) wsptr[4]; |
++ z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ |
++ |
++ tmp10 = z3 + z4; |
++ tmp11 = z3 - z4; |
++ |
++ z1 = (INT32) wsptr[2]; |
++ z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ |
++ z1 <<= CONST_BITS; |
++ z2 = (INT32) wsptr[6]; |
++ z2 <<= CONST_BITS; |
++ |
++ tmp12 = z1 - z2; |
++ |
++ tmp21 = z3 + tmp12; |
++ tmp24 = z3 - tmp12; |
++ |
++ tmp12 = z4 + z2; |
++ |
++ tmp20 = tmp10 + tmp12; |
++ tmp25 = tmp10 - tmp12; |
++ |
++ tmp12 = z4 - z1 - z2; |
++ |
++ tmp22 = tmp11 + tmp12; |
++ tmp23 = tmp11 - tmp12; |
++ |
++ /* Odd part */ |
++ |
++ z1 = (INT32) wsptr[1]; |
++ z2 = (INT32) wsptr[3]; |
++ z3 = (INT32) wsptr[5]; |
++ z4 = (INT32) wsptr[7]; |
++ |
++ tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ |
++ tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ |
++ |
++ tmp10 = z1 + z3; |
++ tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ |
++ tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ |
++ tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ |
++ tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ |
++ tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ |
++ tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ |
++ tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ |
++ MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ |
++ |
++ z1 -= z4; |
++ z2 -= z3; |
++ z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ |
++ tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ |
++ tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ |
++ |
++ /* Final output stage */ |
++ |
++ outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ |
++ wsptr += 8; /* advance pointer to next row */ |
++ } |
++} |
++ |
++ |
++/* |
++ * Perform dequantization and inverse DCT on one block of coefficients, |
++ * producing a 13x13 output block. |
++ * |
++ * Optimized algorithm with 29 multiplications in the 1-D kernel. |
++ * cK represents sqrt(2) * cos(K*pi/26). |
++ */ |
++ |
++GLOBAL(void) |
++jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
++ JCOEFPTR coef_block, |
++ JSAMPARRAY output_buf, JDIMENSION output_col) |
++{ |
++ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; |
++ INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; |
++ INT32 z1, z2, z3, z4; |
++ JCOEFPTR inptr; |
++ ISLOW_MULT_TYPE * quantptr; |
++ int * wsptr; |
++ JSAMPROW outptr; |
++ JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
++ int ctr; |
++ int workspace[8*13]; /* buffers data between passes */ |
++ SHIFT_TEMPS |
++ |
++ /* Pass 1: process columns from input, store into work array. */ |
++ |
++ inptr = coef_block; |
++ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
++ wsptr = workspace; |
++ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { |
++ /* Even part */ |
++ |
++ z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
++ z1 <<= CONST_BITS; |
++ /* Add fudge factor here for final descale. */ |
++ z1 += ONE << (CONST_BITS-PASS1_BITS-1); |
++ |
++ z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
++ z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
++ z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
++ |
++ tmp10 = z3 + z4; |
++ tmp11 = z3 - z4; |
++ |
++ tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */ |
++ tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */ |
++ |
++ tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */ |
++ tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */ |
++ |
++ tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */ |
++ tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */ |
++ |
++ tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */ |
++ tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */ |
++ |
++ tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */ |
++ tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */ |
++ |
++ tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */ |
++ tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */ |
++ |
++ tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */ |
++ |
++ /* Odd part */ |
++ |
++ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
++ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
++ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
++ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); |
++ |
++ tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */ |
++ tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */ |
++ tmp15 = z1 + z4; |
++ tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */ |
++ tmp10 = tmp11 + tmp12 + tmp13 - |
++ MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */ |
++ tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */ |
++ tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */ |
++ tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */ |
++ tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */ |
++ tmp11 += tmp14; |
++ tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */ |
++ tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */ |
++ tmp12 += tmp14; |
++ tmp13 += tmp14; |
++ tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */ |
++ tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */ |
++ MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */ |
++ z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */ |
++ tmp14 += z1; |
++ tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */ |
++ MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */ |
++ |
++ /* Final output stage */ |
++ |
++ wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); |
++ wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); |
++ wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); |
++ wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); |
++ wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); |
++ wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); |
++ wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); |
++ wsptr[8*9] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); |
++ wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); |
++ wsptr[8*8] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); |
++ wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); |
++ wsptr[8*7] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); |
++ wsptr[8*6] = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS); |
++ } |
++ |
++ /* Pass 2: process 13 rows from work array, store into output array. */ |
++ |
++ wsptr = workspace; |
++ for (ctr = 0; ctr < 13; ctr++) { |
++ outptr = output_buf[ctr] + output_col; |
++ |
++ /* Even part */ |
++ |
++ /* Add fudge factor here for final descale. */ |
++ z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
++ z1 <<= CONST_BITS; |
++ |
++ z2 = (INT32) wsptr[2]; |
++ z3 = (INT32) wsptr[4]; |
++ z4 = (INT32) wsptr[6]; |
++ |
++ tmp10 = z3 + z4; |
++ tmp11 = z3 - z4; |
++ |
++ tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */ |
++ tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */ |
++ |
++ tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */ |
++ tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */ |
++ |
++ tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */ |
++ tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */ |
++ |
++ tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */ |
++ tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */ |
++ |
++ tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */ |
++ tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */ |
++ |
++ tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */ |
++ tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */ |
++ |
++ tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */ |
++ |
++ /* Odd part */ |
++ |
++ z1 = (INT32) wsptr[1]; |
++ z2 = (INT32) wsptr[3]; |
++ z3 = (INT32) wsptr[5]; |
++ z4 = (INT32) wsptr[7]; |
++ |
++ tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */ |
++ tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */ |
++ tmp15 = z1 + z4; |
++ tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */ |
++ tmp10 = tmp11 + tmp12 + tmp13 - |
++ MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */ |
++ tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */ |
++ tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */ |
++ tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */ |
++ tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */ |
++ tmp11 += tmp14; |
++ tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */ |
++ tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */ |
++ tmp12 += tmp14; |
++ tmp13 += tmp14; |
++ tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */ |
++ tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */ |
++ MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */ |
++ z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */ |
++ tmp14 += z1; |
++ tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */ |
++ MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */ |
++ |
++ /* Final output stage */ |
++ |
++ outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ |
++ wsptr += 8; /* advance pointer to next row */ |
++ } |
++} |
++ |
++ |
++/* |
++ * Perform dequantization and inverse DCT on one block of coefficients, |
++ * producing a 14x14 output block. |
++ * |
++ * Optimized algorithm with 20 multiplications in the 1-D kernel. |
++ * cK represents sqrt(2) * cos(K*pi/28). |
++ */ |
++ |
++GLOBAL(void) |
++jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
++ JCOEFPTR coef_block, |
++ JSAMPARRAY output_buf, JDIMENSION output_col) |
++{ |
++ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; |
++ INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; |
++ INT32 z1, z2, z3, z4; |
++ JCOEFPTR inptr; |
++ ISLOW_MULT_TYPE * quantptr; |
++ int * wsptr; |
++ JSAMPROW outptr; |
++ JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
++ int ctr; |
++ int workspace[8*14]; /* buffers data between passes */ |
++ SHIFT_TEMPS |
++ |
++ /* Pass 1: process columns from input, store into work array. */ |
++ |
++ inptr = coef_block; |
++ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
++ wsptr = workspace; |
++ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { |
++ /* Even part */ |
++ |
++ z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
++ z1 <<= CONST_BITS; |
++ /* Add fudge factor here for final descale. */ |
++ z1 += ONE << (CONST_BITS-PASS1_BITS-1); |
++ z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
++ z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ |
++ z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ |
++ z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ |
++ |
++ tmp10 = z1 + z2; |
++ tmp11 = z1 + z3; |
++ tmp12 = z1 - z4; |
++ |
++ tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */ |
++ CONST_BITS-PASS1_BITS); |
++ |
++ z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
++ z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
++ |
++ z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ |
++ |
++ tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ |
++ tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ |
++ tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ |
++ MULTIPLY(z2, FIX(1.378756276)); /* c2 */ |
++ |
++ tmp20 = tmp10 + tmp13; |
++ tmp26 = tmp10 - tmp13; |
++ tmp21 = tmp11 + tmp14; |
++ tmp25 = tmp11 - tmp14; |
++ tmp22 = tmp12 + tmp15; |
++ tmp24 = tmp12 - tmp15; |
++ |
++ /* Odd part */ |
++ |
++ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
++ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
++ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
++ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); |
++ tmp13 = z4 << CONST_BITS; |
++ |
++ tmp14 = z1 + z3; |
++ tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ |
++ tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ |
++ tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ |
++ tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ |
++ tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ |
++ z1 -= z2; |
++ tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */ |
++ tmp16 += tmp15; |
++ z1 += z4; |
++ z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */ |
++ tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ |
++ tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ |
++ z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ |
++ tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ |
++ tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ |
++ |
++ tmp13 = (z1 - z3) << PASS1_BITS; |
++ |
++ /* Final output stage */ |
++ |
++ wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); |
++ wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); |
++ wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); |
++ wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); |
++ wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); |
++ wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); |
++ wsptr[8*3] = (int) (tmp23 + tmp13); |
++ wsptr[8*10] = (int) (tmp23 - tmp13); |
++ wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); |
++ wsptr[8*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); |
++ wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); |
++ wsptr[8*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); |
++ wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); |
++ wsptr[8*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); |
++ } |
++ |
++ /* Pass 2: process 14 rows from work array, store into output array. */ |
++ |
++ wsptr = workspace; |
++ for (ctr = 0; ctr < 14; ctr++) { |
++ outptr = output_buf[ctr] + output_col; |
++ |
++ /* Even part */ |
++ |
++ /* Add fudge factor here for final descale. */ |
++ z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
++ z1 <<= CONST_BITS; |
++ z4 = (INT32) wsptr[4]; |
++ z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ |
++ z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ |
++ z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ |
++ |
++ tmp10 = z1 + z2; |
++ tmp11 = z1 + z3; |
++ tmp12 = z1 - z4; |
++ |
++ tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */ |
++ |
++ z1 = (INT32) wsptr[2]; |
++ z2 = (INT32) wsptr[6]; |
++ |
++ z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ |
++ |
++ tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ |
++ tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ |
++ tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ |
++ MULTIPLY(z2, FIX(1.378756276)); /* c2 */ |
++ |
++ tmp20 = tmp10 + tmp13; |
++ tmp26 = tmp10 - tmp13; |
++ tmp21 = tmp11 + tmp14; |
++ tmp25 = tmp11 - tmp14; |
++ tmp22 = tmp12 + tmp15; |
++ tmp24 = tmp12 - tmp15; |
++ |
++ /* Odd part */ |
++ |
++ z1 = (INT32) wsptr[1]; |
++ z2 = (INT32) wsptr[3]; |
++ z3 = (INT32) wsptr[5]; |
++ z4 = (INT32) wsptr[7]; |
++ z4 <<= CONST_BITS; |
++ |
++ tmp14 = z1 + z3; |
++ tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ |
++ tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ |
++ tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ |
++ tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ |
++ tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ |
++ z1 -= z2; |
++ tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */ |
++ tmp16 += tmp15; |
++ tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */ |
++ tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ |
++ tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ |
++ tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ |
++ tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ |
++ tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ |
++ |
++ tmp13 = ((z1 - z3) << CONST_BITS) + z4; |
++ |
++ /* Final output stage */ |
++ |
++ outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ |
++ wsptr += 8; /* advance pointer to next row */ |
++ } |
++} |
++ |
++ |
++/* |
++ * Perform dequantization and inverse DCT on one block of coefficients, |
++ * producing a 15x15 output block. |
++ * |
++ * Optimized algorithm with 22 multiplications in the 1-D kernel. |
++ * cK represents sqrt(2) * cos(K*pi/30). |
++ */ |
++ |
++GLOBAL(void) |
++jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
++ JCOEFPTR coef_block, |
++ JSAMPARRAY output_buf, JDIMENSION output_col) |
++{ |
++ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; |
++ INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; |
++ INT32 z1, z2, z3, z4; |
++ JCOEFPTR inptr; |
++ ISLOW_MULT_TYPE * quantptr; |
++ int * wsptr; |
++ JSAMPROW outptr; |
++ JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
++ int ctr; |
++ int workspace[8*15]; /* buffers data between passes */ |
++ SHIFT_TEMPS |
++ |
++ /* Pass 1: process columns from input, store into work array. */ |
++ |
++ inptr = coef_block; |
++ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
++ wsptr = workspace; |
++ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { |
++ /* Even part */ |
++ |
++ z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
++ z1 <<= CONST_BITS; |
++ /* Add fudge factor here for final descale. */ |
++ z1 += ONE << (CONST_BITS-PASS1_BITS-1); |
++ |
++ z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
++ z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
++ z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
++ |
++ tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */ |
++ tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */ |
++ |
++ tmp12 = z1 - tmp10; |
++ tmp13 = z1 + tmp11; |
++ z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */ |
++ |
++ z4 = z2 - z3; |
++ z3 += z2; |
++ tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */ |
++ tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */ |
++ z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */ |
++ |
++ tmp20 = tmp13 + tmp10 + tmp11; |
++ tmp23 = tmp12 - tmp10 + tmp11 + z2; |
++ |
++ tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */ |
++ tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */ |
++ |
++ tmp25 = tmp13 - tmp10 - tmp11; |
++ tmp26 = tmp12 + tmp10 - tmp11 - z2; |
++ |
++ tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */ |
++ tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */ |
++ |
++ tmp21 = tmp12 + tmp10 + tmp11; |
++ tmp24 = tmp13 - tmp10 + tmp11; |
++ tmp11 += tmp11; |
++ tmp22 = z1 + tmp11; /* c10 = c6-c12 */ |
++ tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */ |
++ |
++ /* Odd part */ |
++ |
++ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
++ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
++ z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
++ z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */ |
++ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); |
++ |
++ tmp13 = z2 - z4; |
++ tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */ |
++ tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */ |
++ tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */ |
++ |
++ tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */ |
++ tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */ |
++ z2 = z1 - z4; |
++ tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */ |
++ |
++ tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */ |
++ tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */ |
++ tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */ |
++ z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */ |
++ tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */ |
++ tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */ |
++ |
++ /* Final output stage */ |
++ |
++ wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); |
++ wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); |
++ wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); |
++ wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); |
++ wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); |
++ wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); |
++ wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); |
++ wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); |
++ wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); |
++ wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); |
++ wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); |
++ wsptr[8*9] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); |
++ wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); |
++ wsptr[8*8] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); |
++ wsptr[8*7] = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS); |
++ } |
++ |
++ /* Pass 2: process 15 rows from work array, store into output array. */ |
++ |
++ wsptr = workspace; |
++ for (ctr = 0; ctr < 15; ctr++) { |
++ outptr = output_buf[ctr] + output_col; |
++ |
++ /* Even part */ |
++ |
++ /* Add fudge factor here for final descale. */ |
++ z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
++ z1 <<= CONST_BITS; |
++ |
++ z2 = (INT32) wsptr[2]; |
++ z3 = (INT32) wsptr[4]; |
++ z4 = (INT32) wsptr[6]; |
++ |
++ tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */ |
++ tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */ |
++ |
++ tmp12 = z1 - tmp10; |
++ tmp13 = z1 + tmp11; |
++ z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */ |
++ |
++ z4 = z2 - z3; |
++ z3 += z2; |
++ tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */ |
++ tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */ |
++ z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */ |
++ |
++ tmp20 = tmp13 + tmp10 + tmp11; |
++ tmp23 = tmp12 - tmp10 + tmp11 + z2; |
++ |
++ tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */ |
++ tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */ |
++ |
++ tmp25 = tmp13 - tmp10 - tmp11; |
++ tmp26 = tmp12 + tmp10 - tmp11 - z2; |
++ |
++ tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */ |
++ tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */ |
++ |
++ tmp21 = tmp12 + tmp10 + tmp11; |
++ tmp24 = tmp13 - tmp10 + tmp11; |
++ tmp11 += tmp11; |
++ tmp22 = z1 + tmp11; /* c10 = c6-c12 */ |
++ tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */ |
++ |
++ /* Odd part */ |
++ |
++ z1 = (INT32) wsptr[1]; |
++ z2 = (INT32) wsptr[3]; |
++ z4 = (INT32) wsptr[5]; |
++ z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */ |
++ z4 = (INT32) wsptr[7]; |
++ |
++ tmp13 = z2 - z4; |
++ tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */ |
++ tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */ |
++ tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */ |
++ |
++ tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */ |
++ tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */ |
++ z2 = z1 - z4; |
++ tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */ |
++ |
++ tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */ |
++ tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */ |
++ tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */ |
++ z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */ |
++ tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */ |
++ tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */ |
++ |
++ /* Final output stage */ |
++ |
++ outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ |
++ wsptr += 8; /* advance pointer to next row */ |
++ } |
++} |
++ |
++ |
++/* |
++ * Perform dequantization and inverse DCT on one block of coefficients, |
++ * producing a 16x16 output block. |
++ * |
++ * Optimized algorithm with 28 multiplications in the 1-D kernel. |
++ * cK represents sqrt(2) * cos(K*pi/32). |
++ */ |
++ |
++GLOBAL(void) |
++jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
++ JCOEFPTR coef_block, |
++ JSAMPARRAY output_buf, JDIMENSION output_col) |
++{ |
++ INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13; |
++ INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; |
++ INT32 z1, z2, z3, z4; |
++ JCOEFPTR inptr; |
++ ISLOW_MULT_TYPE * quantptr; |
++ int * wsptr; |
++ JSAMPROW outptr; |
++ JSAMPLE *range_limit = IDCT_range_limit(cinfo); |
++ int ctr; |
++ int workspace[8*16]; /* buffers data between passes */ |
++ SHIFT_TEMPS |
++ |
++ /* Pass 1: process columns from input, store into work array. */ |
++ |
++ inptr = coef_block; |
++ quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; |
++ wsptr = workspace; |
++ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { |
++ /* Even part */ |
++ |
++ tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); |
++ tmp0 <<= CONST_BITS; |
++ /* Add fudge factor here for final descale. */ |
++ tmp0 += 1 << (CONST_BITS-PASS1_BITS-1); |
++ |
++ z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); |
++ tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ |
++ tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ |
++ |
++ tmp10 = tmp0 + tmp1; |
++ tmp11 = tmp0 - tmp1; |
++ tmp12 = tmp0 + tmp2; |
++ tmp13 = tmp0 - tmp2; |
++ |
++ z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); |
++ z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); |
++ z3 = z1 - z2; |
++ z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ |
++ z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ |
++ |
++ tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ |
++ tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ |
++ tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ |
++ tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ |
++ |
++ tmp20 = tmp10 + tmp0; |
++ tmp27 = tmp10 - tmp0; |
++ tmp21 = tmp12 + tmp1; |
++ tmp26 = tmp12 - tmp1; |
++ tmp22 = tmp13 + tmp2; |
++ tmp25 = tmp13 - tmp2; |
++ tmp23 = tmp11 + tmp3; |
++ tmp24 = tmp11 - tmp3; |
++ |
++ /* Odd part */ |
++ |
++ z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); |
++ z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); |
++ z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); |
++ z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); |
++ |
++ tmp11 = z1 + z3; |
++ |
++ tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ |
++ tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ |
++ tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ |
++ tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ |
++ tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ |
++ tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ |
++ tmp0 = tmp1 + tmp2 + tmp3 - |
++ MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ |
++ tmp13 = tmp10 + tmp11 + tmp12 - |
++ MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ |
++ z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ |
++ tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ |
++ tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ |
++ z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ |
++ tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ |
++ tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ |
++ z2 += z4; |
++ z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ |
++ tmp1 += z1; |
++ tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ |
++ z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ |
++ tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ |
++ tmp12 += z2; |
++ z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ |
++ tmp2 += z2; |
++ tmp3 += z2; |
++ z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ |
++ tmp10 += z2; |
++ tmp11 += z2; |
++ |
++ /* Final output stage */ |
++ |
++ wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS); |
++ wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS); |
++ wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS); |
++ wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS); |
++ wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS); |
++ wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS); |
++ wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS); |
++ wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS); |
++ wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS); |
++ wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS); |
++ wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS); |
++ wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS); |
++ wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS); |
++ wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS); |
++ wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS); |
++ wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS); |
++ } |
++ |
++ /* Pass 2: process 16 rows from work array, store into output array. */ |
++ |
++ wsptr = workspace; |
++ for (ctr = 0; ctr < 16; ctr++) { |
++ outptr = output_buf[ctr] + output_col; |
++ |
++ /* Even part */ |
++ |
++ /* Add fudge factor here for final descale. */ |
++ tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); |
++ tmp0 <<= CONST_BITS; |
++ |
++ z1 = (INT32) wsptr[4]; |
++ tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ |
++ tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ |
+ |
-+/* DC table 1 */ |
-+LOCAL(const unsigned char) mjpg_dc1_bits[] = { |
-+ 0x00, 0x03, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, |
-+ 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 |
-+}; |
++ tmp10 = tmp0 + tmp1; |
++ tmp11 = tmp0 - tmp1; |
++ tmp12 = tmp0 + tmp2; |
++ tmp13 = tmp0 - tmp2; |
+ |
-+LOCAL(const unsigned char) mjpg_dc1_huffval[] = { |
-+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
-+ 0x08, 0x09, 0x0A, 0x0B |
-+}; |
-+ |
-+/* AC table 0 */ |
-+LOCAL(const unsigned char) mjpg_ac0_bits[] = { |
-+ 0x00, 0x02, 0x01, 0x03, 0x03, 0x02, 0x04, 0x03, |
-+ 0x05, 0x05, 0x04, 0x04, 0x00, 0x00, 0x01, 0x7D |
-+}; |
++ z1 = (INT32) wsptr[2]; |
++ z2 = (INT32) wsptr[6]; |
++ z3 = z1 - z2; |
++ z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ |
++ z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ |
+ |
-+LOCAL(const unsigned char) mjpg_ac0_huffval[] = { |
-+ 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, |
-+ 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, |
-+ 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xA1, 0x08, |
-+ 0x23, 0x42, 0xB1, 0xC1, 0x15, 0x52, 0xD1, 0xF0, |
-+ 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0A, 0x16, |
-+ 0x17, 0x18, 0x19, 0x1A, 0x25, 0x26, 0x27, 0x28, |
-+ 0x29, 0x2A, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, |
-+ 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, |
-+ 0x4A, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, |
-+ 0x5A, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, |
-+ 0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, |
-+ 0x7A, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, |
-+ 0x8A, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, |
-+ 0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, |
-+ 0xA8, 0xA9, 0xAA, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, |
-+ 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3, 0xC4, 0xC5, |
-+ 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2, 0xD3, 0xD4, |
-+ 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xE1, 0xE2, |
-+ 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, |
-+ 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, |
-+ 0xF9, 0xFA |
-+}; |
++ tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ |
++ tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ |
++ tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ |
++ tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ |
+ |
-+/* AC table 1 */ |
-+LOCAL(const unsigned char) mjpg_ac1_bits[] = { |
-+ 0x00, 0x02, 0x01, 0x02, 0x04, 0x04, 0x03, 0x04, |
-+ 0x07, 0x05, 0x04, 0x04, 0x00, 0x01, 0x02, 0x77 |
-+}; |
++ tmp20 = tmp10 + tmp0; |
++ tmp27 = tmp10 - tmp0; |
++ tmp21 = tmp12 + tmp1; |
++ tmp26 = tmp12 - tmp1; |
++ tmp22 = tmp13 + tmp2; |
++ tmp25 = tmp13 - tmp2; |
++ tmp23 = tmp11 + tmp3; |
++ tmp24 = tmp11 - tmp3; |
+ |
-+LOCAL(const unsigned char) mjpg_ac1_huffval[] = { |
-+ 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, |
-+ 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71, |
-+ 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, |
-+ 0xA1, 0xB1, 0xC1, 0x09, 0x23, 0x33, 0x52, 0xF0, |
-+ 0x15, 0x62, 0x72, 0xD1, 0x0A, 0x16, 0x24, 0x34, |
-+ 0xE1, 0x25, 0xF1, 0x17, 0x18, 0x19, 0x1A, 0x26, |
-+ 0x27, 0x28, 0x29, 0x2A, 0x35, 0x36, 0x37, 0x38, |
-+ 0x39, 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, |
-+ 0x49, 0x4A, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, |
-+ 0x59, 0x5A, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, |
-+ 0x69, 0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, |
-+ 0x79, 0x7A, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
-+ 0x88, 0x89, 0x8A, 0x92, 0x93, 0x94, 0x95, 0x96, |
-+ 0x97, 0x98, 0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5, |
-+ 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xB2, 0xB3, 0xB4, |
-+ 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3, |
-+ 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2, |
-+ 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, |
-+ 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, |
-+ 0xEA, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, |
-+ 0xF9, 0xFA |
-+}; |
++ /* Odd part */ |
+ |
-+/* Loads the default Huffman tables used by motion JPEG frames. This function |
-+ * just copies the huffman tables suggested in the JPEG standard when we have |
-+ * not load them. |
-+ */ |
-+LOCAL(void) |
-+mjpg_load_huff_tables (j_decompress_ptr cinfo) |
-+{ |
-+ JHUFF_TBL *htblptr; |
++ z1 = (INT32) wsptr[1]; |
++ z2 = (INT32) wsptr[3]; |
++ z3 = (INT32) wsptr[5]; |
++ z4 = (INT32) wsptr[7]; |
+ |
-+ if (! cinfo->dc_huff_tbl_ptrs[0]) { |
-+ htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); |
-+ MEMZERO(htblptr, SIZEOF(JHUFF_TBL)); |
-+ MEMCOPY(&htblptr->bits[1], mjpg_dc0_bits, SIZEOF(mjpg_dc0_bits)); |
-+ MEMCOPY(&htblptr->huffval[0], mjpg_dc0_huffval, SIZEOF(mjpg_dc0_huffval)); |
-+ cinfo->dc_huff_tbl_ptrs[0] = htblptr; |
-+ } |
++ tmp11 = z1 + z3; |
+ |
-+ if (! cinfo->dc_huff_tbl_ptrs[1]) { |
-+ htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); |
-+ MEMZERO(htblptr, SIZEOF(JHUFF_TBL)); |
-+ MEMCOPY(&htblptr->bits[1], mjpg_dc1_bits, SIZEOF(mjpg_dc1_bits)); |
-+ MEMCOPY(&htblptr->huffval[0], mjpg_dc1_huffval, SIZEOF(mjpg_dc1_huffval)); |
-+ cinfo->dc_huff_tbl_ptrs[1] = htblptr; |
-+ } |
++ tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ |
++ tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ |
++ tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ |
++ tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ |
++ tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ |
++ tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ |
++ tmp0 = tmp1 + tmp2 + tmp3 - |
++ MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ |
++ tmp13 = tmp10 + tmp11 + tmp12 - |
++ MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ |
++ z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ |
++ tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ |
++ tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ |
++ z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ |
++ tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ |
++ tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ |
++ z2 += z4; |
++ z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ |
++ tmp1 += z1; |
++ tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ |
++ z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ |
++ tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ |
++ tmp12 += z2; |
++ z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ |
++ tmp2 += z2; |
++ tmp3 += z2; |
++ z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ |
++ tmp10 += z2; |
++ tmp11 += z2; |
+ |
-+ if (! cinfo->ac_huff_tbl_ptrs[0]) { |
-+ htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); |
-+ MEMZERO(htblptr, SIZEOF(JHUFF_TBL)); |
-+ MEMCOPY(&htblptr->bits[1], mjpg_ac0_bits, SIZEOF(mjpg_ac0_bits)); |
-+ MEMCOPY(&htblptr->huffval[0], mjpg_ac0_huffval, SIZEOF(mjpg_ac0_huffval)); |
-+ cinfo->ac_huff_tbl_ptrs[0] = htblptr; |
-+ } |
++ /* Final output stage */ |
+ |
-+ if (! cinfo->ac_huff_tbl_ptrs[1]) { |
-+ htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); |
-+ MEMZERO(htblptr, SIZEOF(JHUFF_TBL)); |
-+ MEMCOPY(&htblptr->bits[1], mjpg_ac1_bits, SIZEOF(mjpg_ac1_bits)); |
-+ MEMCOPY(&htblptr->huffval[0], mjpg_ac1_huffval, SIZEOF(mjpg_ac1_huffval)); |
-+ cinfo->ac_huff_tbl_ptrs[1] = htblptr; |
++ outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13, |
++ CONST_BITS+PASS1_BITS+3) |
++ & RANGE_MASK]; |
++ |
++ wsptr += 8; /* advance pointer to next row */ |
+ } |
+} |
+ |
-+#else |
-+ |
-+#define mjpg_load_huff_tables(cinfo) |
-+ |
-+#endif /* MOTION_JPEG_SUPPORTED */ |
++#endif /* IDCT_SCALING_SUPPORTED */ |
+ #endif /* DCT_ISLOW_SUPPORTED */ |
+Index: jmemmgr.c |
+=================================================================== |
+--- jmemmgr.c (revision 829) |
++++ jmemmgr.c (working copy) |
+@@ -37,6 +37,15 @@ |
+ #endif |
+ |
+ |
++LOCAL(size_t) |
++round_up_pow2 (size_t a, size_t b) |
++/* a rounded up to the next multiple of b, i.e. ceil(a/b)*b */ |
++/* Assumes a >= 0, b > 0, and b is a power of 2 */ |
++{ |
++ return ((a + b - 1) & (~(b - 1))); |
++} |
+ |
+ |
/* |
- * Read markers until SOS or EOI. |
- * |
-@@ -1013,6 +1150,7 @@ |
- break; |
+ * Some important notes: |
+ * The allocation routines provided here must never return NULL. |
+@@ -122,7 +131,7 @@ |
+ jvirt_barray_ptr virt_barray_list; |
+ |
+ /* This counts total space obtained from jpeg_get_small/large */ |
+- long total_space_allocated; |
++ size_t total_space_allocated; |
+ |
+ /* alloc_sarray and alloc_barray set this value for use by virtual |
+ * array routines. |
+@@ -265,7 +274,7 @@ |
+ * and so that algorithms can straddle outside the proper area up |
+ * to the next alignment. |
+ */ |
+- sizeofobject = jround_up(sizeofobject, ALIGN_SIZE); |
++ sizeofobject = round_up_pow2(sizeofobject, ALIGN_SIZE); |
+ |
+ /* Check for unsatisfiable request (do now to ensure no overflow below) */ |
+ if ((SIZEOF(small_pool_hdr) + sizeofobject + ALIGN_SIZE - 1) > MAX_ALLOC_CHUNK) |
+@@ -317,8 +326,8 @@ |
+ /* OK, allocate the object from the current pool */ |
+ data_ptr = (char *) hdr_ptr; /* point to first data byte in pool... */ |
+ data_ptr += SIZEOF(small_pool_hdr); /* ...by skipping the header... */ |
+- if ((unsigned long)data_ptr % ALIGN_SIZE) /* ...and adjust for alignment */ |
+- data_ptr += ALIGN_SIZE - (unsigned long)data_ptr % ALIGN_SIZE; |
++ if ((size_t)data_ptr % ALIGN_SIZE) /* ...and adjust for alignment */ |
++ data_ptr += ALIGN_SIZE - (size_t)data_ptr % ALIGN_SIZE; |
+ data_ptr += hdr_ptr->bytes_used; /* point to place for object */ |
+ hdr_ptr->bytes_used += sizeofobject; |
+ hdr_ptr->bytes_left -= sizeofobject; |
+@@ -354,7 +363,7 @@ |
+ * algorithms can straddle outside the proper area up to the next |
+ * alignment. |
+ */ |
+- sizeofobject = jround_up(sizeofobject, ALIGN_SIZE); |
++ sizeofobject = round_up_pow2(sizeofobject, ALIGN_SIZE); |
- case M_SOS: |
-+ mjpg_load_huff_tables(cinfo); |
- if (! get_sos(cinfo)) |
- return JPEG_SUSPENDED; |
- cinfo->unread_marker = 0; /* processed the marker */ |
+ /* Check for unsatisfiable request (do now to ensure no overflow below) */ |
+ if ((SIZEOF(large_pool_hdr) + sizeofobject + ALIGN_SIZE - 1) > MAX_ALLOC_CHUNK) |
+@@ -382,8 +391,8 @@ |
+ |
+ data_ptr = (char *) hdr_ptr; /* point to first data byte in pool... */ |
+ data_ptr += SIZEOF(small_pool_hdr); /* ...by skipping the header... */ |
+- if ((unsigned long)data_ptr % ALIGN_SIZE) /* ...and adjust for alignment */ |
+- data_ptr += ALIGN_SIZE - (unsigned long)data_ptr % ALIGN_SIZE; |
++ if ((size_t)data_ptr % ALIGN_SIZE) /* ...and adjust for alignment */ |
++ data_ptr += ALIGN_SIZE - (size_t)data_ptr % ALIGN_SIZE; |
+ |
+ return (void FAR *) data_ptr; |
+ } |
+@@ -420,7 +429,7 @@ |
+ /* Make sure each row is properly aligned */ |
+ if ((ALIGN_SIZE % SIZEOF(JSAMPLE)) != 0) |
+ out_of_memory(cinfo, 5); /* safety check */ |
+- samplesperrow = jround_up(samplesperrow, (2 * ALIGN_SIZE) / SIZEOF(JSAMPLE)); |
++ samplesperrow = (JDIMENSION)round_up_pow2(samplesperrow, (2 * ALIGN_SIZE) / SIZEOF(JSAMPLE)); |
+ |
+ /* Calculate max # of rows allowed in one allocation chunk */ |
+ ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) / |
+@@ -608,8 +617,8 @@ |
+ /* Allocate the in-memory buffers for any unrealized virtual arrays */ |
+ { |
+ my_mem_ptr mem = (my_mem_ptr) cinfo->mem; |
+- long space_per_minheight, maximum_space, avail_mem; |
+- long minheights, max_minheights; |
++ size_t space_per_minheight, maximum_space, avail_mem; |
++ size_t minheights, max_minheights; |
+ jvirt_sarray_ptr sptr; |
+ jvirt_barray_ptr bptr; |
+ |
+Index: jmemnobs.c |
+=================================================================== |
+--- jmemnobs.c (revision 829) |
++++ jmemnobs.c (working copy) |
+@@ -69,9 +69,9 @@ |
+ * Here we always say, "we got all you want bud!" |
+ */ |
+ |
+-GLOBAL(long) |
+-jpeg_mem_available (j_common_ptr cinfo, long min_bytes_needed, |
+- long max_bytes_needed, long already_allocated) |
++GLOBAL(size_t) |
++jpeg_mem_available (j_common_ptr cinfo, size_t min_bytes_needed, |
++ size_t max_bytes_needed, size_t already_allocated) |
+ { |
+ return max_bytes_needed; |
+ } |
+Index: jmemsys.h |
+=================================================================== |
+--- jmemsys.h (revision 829) |
++++ jmemsys.h (working copy) |
+@@ -100,10 +100,10 @@ |
+ * Conversely, zero may be returned to always use the minimum amount of memory. |
+ */ |
+ |
+-EXTERN(long) jpeg_mem_available JPP((j_common_ptr cinfo, |
+- long min_bytes_needed, |
+- long max_bytes_needed, |
+- long already_allocated)); |
++EXTERN(size_t) jpeg_mem_available JPP((j_common_ptr cinfo, |
++ size_t min_bytes_needed, |
++ size_t max_bytes_needed, |
++ size_t already_allocated)); |
+ |
+ |
+ /* |
Index: jmorecfg.h |
=================================================================== |
--- jmorecfg.h (revision 829) |
+++ jmorecfg.h (working copy) |
-@@ -153,14 +153,18 @@ |
+@@ -1,9 +1,10 @@ |
+ /* |
+ * jmorecfg.h |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1991-1997, Thomas G. Lane. |
+- * Copyright (C) 2009, D. R. Commander. |
+- * This file is part of the Independent JPEG Group's software. |
++ * Modifications: |
++ * Copyright (C) 2009, 2011, D. R. Commander. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains additional configuration options that customize the |
+@@ -153,14 +154,18 @@ |
/* INT16 must hold at least the values -32768..32767. */ |
#ifndef XMD_H /* X11/xmd.h correctly defines INT16 */ |
@@ -184,43 +9623,536 @@ Index: jmorecfg.h |
typedef long INT32; |
#endif |
+#endif |
- |
- /* Datatype used for image dimensions. The JPEG standard only supports |
- * images up to 64K*64K due to 16-bit fields in SOF markers. Therefore |
-@@ -210,11 +214,13 @@ |
- * explicit coding is needed; see uses of the NEED_FAR_POINTERS symbol. |
- */ |
- |
-+#ifndef FAR |
- #ifdef NEED_FAR_POINTERS |
- #define FAR far |
- #else |
- #define FAR |
- #endif |
+ |
+ /* Datatype used for image dimensions. The JPEG standard only supports |
+ * images up to 64K*64K due to 16-bit fields in SOF markers. Therefore |
+@@ -210,11 +215,16 @@ |
+ * explicit coding is needed; see uses of the NEED_FAR_POINTERS symbol. |
+ */ |
+ |
++#ifndef FAR |
+ #ifdef NEED_FAR_POINTERS |
++#ifndef FAR |
+ #define FAR far |
++#endif |
+ #else |
++#undef FAR |
+ #define FAR |
+ #endif |
++#endif |
+ |
+ |
+ /* |
+@@ -257,8 +267,6 @@ |
+ * (You may HAVE to do that if your compiler doesn't like null source files.) |
+ */ |
+ |
+-/* Arithmetic coding is unsupported for legal reasons. Complaints to IBM. */ |
+- |
+ /* Capability options common to encoder and decoder: */ |
+ |
+ #define DCT_ISLOW_SUPPORTED /* slow but accurate integer algorithm */ |
+@@ -267,7 +275,6 @@ |
+ |
+ /* Encoder capability options: */ |
+ |
+-#undef C_ARITH_CODING_SUPPORTED /* Arithmetic coding back end? */ |
+ #define C_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */ |
+ #define C_PROGRESSIVE_SUPPORTED /* Progressive JPEG? (Requires MULTISCAN)*/ |
+ #define ENTROPY_OPT_SUPPORTED /* Optimization of entropy coding parms? */ |
+@@ -283,7 +290,6 @@ |
+ |
+ /* Decoder capability options: */ |
+ |
+-#undef D_ARITH_CODING_SUPPORTED /* Arithmetic coding back end? */ |
+ #define D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */ |
+ #define D_PROGRESSIVE_SUPPORTED /* Progressive JPEG? (Requires MULTISCAN)*/ |
+ #define SAVE_MARKERS_SUPPORTED /* jpeg_save_markers() needed? */ |
+@@ -317,22 +323,60 @@ |
+ #define RGB_BLUE 2 /* Offset of Blue */ |
+ #define RGB_PIXELSIZE 3 /* JSAMPLEs per RGB scanline element */ |
+ |
+-#define JPEG_NUMCS 12 |
++#define JPEG_NUMCS 16 |
+ |
++#define EXT_RGB_RED 0 |
++#define EXT_RGB_GREEN 1 |
++#define EXT_RGB_BLUE 2 |
++#define EXT_RGB_PIXELSIZE 3 |
++ |
++#define EXT_RGBX_RED 0 |
++#define EXT_RGBX_GREEN 1 |
++#define EXT_RGBX_BLUE 2 |
++#define EXT_RGBX_PIXELSIZE 4 |
++ |
++#define EXT_BGR_RED 2 |
++#define EXT_BGR_GREEN 1 |
++#define EXT_BGR_BLUE 0 |
++#define EXT_BGR_PIXELSIZE 3 |
++ |
++#define EXT_BGRX_RED 2 |
++#define EXT_BGRX_GREEN 1 |
++#define EXT_BGRX_BLUE 0 |
++#define EXT_BGRX_PIXELSIZE 4 |
++ |
++#define EXT_XBGR_RED 3 |
++#define EXT_XBGR_GREEN 2 |
++#define EXT_XBGR_BLUE 1 |
++#define EXT_XBGR_PIXELSIZE 4 |
++ |
++#define EXT_XRGB_RED 1 |
++#define EXT_XRGB_GREEN 2 |
++#define EXT_XRGB_BLUE 3 |
++#define EXT_XRGB_PIXELSIZE 4 |
++ |
+ static const int rgb_red[JPEG_NUMCS] = { |
+- -1, -1, RGB_RED, -1, -1, -1, 0, 0, 2, 2, 3, 1 |
++ -1, -1, RGB_RED, -1, -1, -1, EXT_RGB_RED, EXT_RGBX_RED, |
++ EXT_BGR_RED, EXT_BGRX_RED, EXT_XBGR_RED, EXT_XRGB_RED, |
++ EXT_RGBX_RED, EXT_BGRX_RED, EXT_XBGR_RED, EXT_XRGB_RED |
+ }; |
+ |
+ static const int rgb_green[JPEG_NUMCS] = { |
+- -1, -1, RGB_GREEN, -1, -1, -1, 1, 1, 1, 1, 2, 2 |
++ -1, -1, RGB_GREEN, -1, -1, -1, EXT_RGB_GREEN, EXT_RGBX_GREEN, |
++ EXT_BGR_GREEN, EXT_BGRX_GREEN, EXT_XBGR_GREEN, EXT_XRGB_GREEN, |
++ EXT_RGBX_GREEN, EXT_BGRX_GREEN, EXT_XBGR_GREEN, EXT_XRGB_GREEN |
+ }; |
+ |
+ static const int rgb_blue[JPEG_NUMCS] = { |
+- -1, -1, RGB_BLUE, -1, -1, -1, 2, 2, 0, 0, 1, 3 |
++ -1, -1, RGB_BLUE, -1, -1, -1, EXT_RGB_BLUE, EXT_RGBX_BLUE, |
++ EXT_BGR_BLUE, EXT_BGRX_BLUE, EXT_XBGR_BLUE, EXT_XRGB_BLUE, |
++ EXT_RGBX_BLUE, EXT_BGRX_BLUE, EXT_XBGR_BLUE, EXT_XRGB_BLUE |
+ }; |
+ |
+ static const int rgb_pixelsize[JPEG_NUMCS] = { |
+- -1, -1, RGB_PIXELSIZE, -1, -1, -1, 3, 4, 3, 4, 4, 4 |
++ -1, -1, RGB_PIXELSIZE, -1, -1, -1, EXT_RGB_PIXELSIZE, EXT_RGBX_PIXELSIZE, |
++ EXT_BGR_PIXELSIZE, EXT_BGRX_PIXELSIZE, EXT_XBGR_PIXELSIZE, EXT_XRGB_PIXELSIZE, |
++ EXT_RGBX_PIXELSIZE, EXT_BGRX_PIXELSIZE, EXT_XBGR_PIXELSIZE, EXT_XRGB_PIXELSIZE |
+ }; |
+ |
+ /* Definitions for speed-related optimizations. */ |
+Index: jpegint.h |
+=================================================================== |
+--- jpegint.h (revision 829) |
++++ jpegint.h (working copy) |
+@@ -2,6 +2,7 @@ |
+ * jpegint.h |
+ * |
+ * Copyright (C) 1991-1997, Thomas G. Lane. |
++ * Modified 1997-2009 by Guido Vollbeding. |
+ * This file is part of the Independent JPEG Group's software. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+@@ -304,6 +305,7 @@ |
+ #define jinit_forward_dct jIFDCT |
+ #define jinit_huff_encoder jIHEncoder |
+ #define jinit_phuff_encoder jIPHEncoder |
++#define jinit_arith_encoder jIAEncoder |
+ #define jinit_marker_writer jIMWriter |
+ #define jinit_master_decompress jIDMaster |
+ #define jinit_d_main_controller jIDMainC |
+@@ -313,6 +315,7 @@ |
+ #define jinit_marker_reader jIMReader |
+ #define jinit_huff_decoder jIHDecoder |
+ #define jinit_phuff_decoder jIPHDecoder |
++#define jinit_arith_decoder jIADecoder |
+ #define jinit_inverse_dct jIIDCT |
+ #define jinit_upsampler jIUpsampler |
+ #define jinit_color_deconverter jIDColor |
+@@ -327,6 +330,7 @@ |
+ #define jzero_far jZeroFar |
+ #define jpeg_zigzag_order jZIGTable |
+ #define jpeg_natural_order jZAGTable |
++#define jpeg_aritab jAriTab |
+ #endif /* NEED_SHORT_EXTERNAL_NAMES */ |
+ |
+ |
+@@ -345,6 +349,7 @@ |
+ EXTERN(void) jinit_forward_dct JPP((j_compress_ptr cinfo)); |
+ EXTERN(void) jinit_huff_encoder JPP((j_compress_ptr cinfo)); |
+ EXTERN(void) jinit_phuff_encoder JPP((j_compress_ptr cinfo)); |
++EXTERN(void) jinit_arith_encoder JPP((j_compress_ptr cinfo)); |
+ EXTERN(void) jinit_marker_writer JPP((j_compress_ptr cinfo)); |
+ /* Decompression module initialization routines */ |
+ EXTERN(void) jinit_master_decompress JPP((j_decompress_ptr cinfo)); |
+@@ -358,6 +363,7 @@ |
+ EXTERN(void) jinit_marker_reader JPP((j_decompress_ptr cinfo)); |
+ EXTERN(void) jinit_huff_decoder JPP((j_decompress_ptr cinfo)); |
+ EXTERN(void) jinit_phuff_decoder JPP((j_decompress_ptr cinfo)); |
++EXTERN(void) jinit_arith_decoder JPP((j_decompress_ptr cinfo)); |
+ EXTERN(void) jinit_inverse_dct JPP((j_decompress_ptr cinfo)); |
+ EXTERN(void) jinit_upsampler JPP((j_decompress_ptr cinfo)); |
+ EXTERN(void) jinit_color_deconverter JPP((j_decompress_ptr cinfo)); |
+@@ -382,6 +388,9 @@ |
+ #endif |
+ extern const int jpeg_natural_order[]; /* zigzag coef order to natural order */ |
+ |
++/* Arithmetic coding probability estimation tables in jaricom.c */ |
++extern const INT32 jpeg_aritab[]; |
++ |
+ /* Suppress undefined-structure complaints if necessary. */ |
+ |
+ #ifdef INCOMPLETE_TYPES_BROKEN |
+Index: jpeglib.h |
+=================================================================== |
+--- jpeglib.h (revision 829) |
++++ jpeglib.h (working copy) |
+@@ -1,9 +1,12 @@ |
+ /* |
+ * jpeglib.h |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1991-1998, Thomas G. Lane. |
+- * Copyright (C) 2009, D. R. Commander. |
+- * This file is part of the Independent JPEG Group's software. |
++ * Modified 2002-2009 by Guido Vollbeding. |
++ * Modifications: |
++ * Copyright (C) 2009-2011, 2013, D. R. Commander. |
++ * Copyright (C) 2015, Google, Inc. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file defines the application interface for the JPEG library. |
+@@ -14,6 +17,10 @@ |
+ #ifndef JPEGLIB_H |
+ #define JPEGLIB_H |
+ |
++/* Begin chromium edits */ |
++#include "jpeglibmangler.h" |
++/* End chromium edits */ |
++ |
+ /* |
+ * First we include the configuration files that record how this |
+ * installation of the JPEG library is set up. jconfig.h can be |
+@@ -27,13 +34,13 @@ |
+ #include "jmorecfg.h" /* seldom changed options */ |
+ |
+ |
+-/* Version ID for the JPEG library. |
+- * Might be useful for tests like "#if JPEG_LIB_VERSION >= 60". |
+- */ |
++#ifdef __cplusplus |
++#ifndef DONT_USE_EXTERN_C |
++extern "C" { |
++#endif |
++#endif |
+ |
+-#define JPEG_LIB_VERSION 62 /* Version 6b */ |
+ |
+- |
+ /* Various constants determining the sizes of things. |
+ * All of these are specified by the JPEG standard, so don't change them |
+ * if you want to be compatible. |
+@@ -145,12 +152,17 @@ |
+ * Values of 1,2,4,8 are likely to be supported. Note that different |
+ * components may receive different IDCT scalings. |
+ */ |
++#if JPEG_LIB_VERSION >= 70 |
++ int DCT_h_scaled_size; |
++ int DCT_v_scaled_size; |
++#else |
+ int DCT_scaled_size; |
++#endif |
+ /* The downsampled dimensions are the component's actual, unpadded number |
+ * of samples at the main buffer (preprocessing/compression interface), thus |
+ * downsampled_width = ceil(image_width * Hi/Hmax) |
+ * and similarly for height. For decompression, IDCT scaling is included, so |
+- * downsampled_width = ceil(image_width * Hi/Hmax * DCT_scaled_size/DCTSIZE) |
++ * downsampled_width = ceil(image_width * Hi/Hmax * DCT_[h_]scaled_size/DCTSIZE) |
+ */ |
+ JDIMENSION downsampled_width; /* actual width in samples */ |
+ JDIMENSION downsampled_height; /* actual height in samples */ |
+@@ -165,7 +177,7 @@ |
+ int MCU_width; /* number of blocks per MCU, horizontally */ |
+ int MCU_height; /* number of blocks per MCU, vertically */ |
+ int MCU_blocks; /* MCU_width * MCU_height */ |
+- int MCU_sample_width; /* MCU width in samples, MCU_width*DCT_scaled_size */ |
++ int MCU_sample_width; /* MCU width in samples, MCU_width*DCT_[h_]scaled_size */ |
+ int last_col_width; /* # of non-dummy blocks across in last MCU */ |
+ int last_row_height; /* # of non-dummy blocks down in last MCU */ |
+ |
+@@ -205,12 +217,13 @@ |
+ /* Known color spaces. */ |
+ |
+ #define JCS_EXTENSIONS 1 |
++#define JCS_ALPHA_EXTENSIONS 1 |
+ |
+ typedef enum { |
+ JCS_UNKNOWN, /* error/unspecified */ |
+ JCS_GRAYSCALE, /* monochrome */ |
+ JCS_RGB, /* red/green/blue as specified by the RGB_RED, RGB_GREEN, |
+- RGB_BLUE, and RGB_PIXELSIZE macros */ |
++ RGB_BLUE, and RGB_PIXELSIZE macros */ |
+ JCS_YCbCr, /* Y/Cb/Cr (also known as YUV) */ |
+ JCS_CMYK, /* C/M/Y/K */ |
+ JCS_YCCK, /* Y/Cb/Cr/K */ |
+@@ -220,6 +233,17 @@ |
+ JCS_EXT_BGRX, /* blue/green/red/x */ |
+ JCS_EXT_XBGR, /* x/blue/green/red */ |
+ JCS_EXT_XRGB, /* x/red/green/blue */ |
++ /* When out_color_space it set to JCS_EXT_RGBX, JCS_EXT_BGRX, |
++ JCS_EXT_XBGR, or JCS_EXT_XRGB during decompression, the X byte is |
++ undefined, and in order to ensure the best performance, |
++ libjpeg-turbo can set that byte to whatever value it wishes. Use |
++ the following colorspace constants to ensure that the X byte is set |
++ to 0xFF, so that it can be interpreted as an opaque alpha |
++ channel. */ |
++ JCS_EXT_RGBA, /* red/green/blue/alpha */ |
++ JCS_EXT_BGRA, /* blue/green/red/alpha */ |
++ JCS_EXT_ABGR, /* alpha/blue/green/red */ |
++ JCS_EXT_ARGB /* alpha/red/green/blue */ |
+ } J_COLOR_SPACE; |
+ |
+ /* DCT/IDCT algorithm options. */ |
+@@ -301,6 +325,19 @@ |
+ * helper routines to simplify changing parameters. |
+ */ |
+ |
++#if JPEG_LIB_VERSION >= 70 |
++ unsigned int scale_num, scale_denom; /* fraction by which to scale image */ |
++ |
++ JDIMENSION jpeg_width; /* scaled JPEG image width */ |
++ JDIMENSION jpeg_height; /* scaled JPEG image height */ |
++ /* Dimensions of actual JPEG image that will be written to file, |
++ * derived from input dimensions by scaling factors above. |
++ * These fields are computed by jpeg_start_compress(). |
++ * You can also use jpeg_calc_jpeg_dimensions() to determine these values |
++ * in advance of calling jpeg_start_compress(). |
++ */ |
++#endif |
++ |
+ int data_precision; /* bits of precision in image data */ |
+ |
+ int num_components; /* # of color components in JPEG image */ |
+@@ -308,14 +345,19 @@ |
+ |
+ jpeg_component_info * comp_info; |
+ /* comp_info[i] describes component that appears i'th in SOF */ |
+- |
++ |
+ JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS]; |
+- /* ptrs to coefficient quantization tables, or NULL if not defined */ |
+- |
++#if JPEG_LIB_VERSION >= 70 |
++ int q_scale_factor[NUM_QUANT_TBLS]; |
++#endif |
++ /* ptrs to coefficient quantization tables, or NULL if not defined, |
++ * and corresponding scale factors (percentage, initialized 100). |
++ */ |
++ |
+ JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS]; |
+ JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS]; |
+ /* ptrs to Huffman coding tables, or NULL if not defined */ |
+- |
++ |
+ UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */ |
+ UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */ |
+ UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */ |
+@@ -331,6 +373,9 @@ |
+ boolean arith_code; /* TRUE=arithmetic coding, FALSE=Huffman */ |
+ boolean optimize_coding; /* TRUE=optimize entropy encoding parms */ |
+ boolean CCIR601_sampling; /* TRUE=first samples are cosited */ |
++#if JPEG_LIB_VERSION >= 70 |
++ boolean do_fancy_downsampling; /* TRUE=apply fancy downsampling */ |
++#endif |
+ int smoothing_factor; /* 1..100, or 0 for no input smoothing */ |
+ J_DCT_METHOD dct_method; /* DCT algorithm selector */ |
+ |
+@@ -374,6 +419,11 @@ |
+ int max_h_samp_factor; /* largest h_samp_factor */ |
+ int max_v_samp_factor; /* largest v_samp_factor */ |
+ |
++#if JPEG_LIB_VERSION >= 70 |
++ int min_DCT_h_scaled_size; /* smallest DCT_h_scaled_size of any component */ |
++ int min_DCT_v_scaled_size; /* smallest DCT_v_scaled_size of any component */ |
++#endif |
++ |
+ JDIMENSION total_iMCU_rows; /* # of iMCU rows to be input to coef ctlr */ |
+ /* The coefficient controller receives data in units of MCU rows as defined |
+ * for fully interleaved scans (whether the JPEG file is interleaved or not). |
+@@ -399,6 +449,12 @@ |
+ |
+ int Ss, Se, Ah, Al; /* progressive JPEG parameters for scan */ |
+ |
++#if JPEG_LIB_VERSION >= 80 |
++ int block_size; /* the basic DCT block size: 1..16 */ |
++ const int * natural_order; /* natural-order position array */ |
++ int lim_Se; /* min( Se, DCTSIZE2-1 ) */ |
++#endif |
++ |
+ /* |
+ * Links to compression subobjects (methods and private variables of modules) |
+ */ |
+@@ -545,6 +601,9 @@ |
+ jpeg_component_info * comp_info; |
+ /* comp_info[i] describes component that appears i'th in SOF */ |
+ |
++#if JPEG_LIB_VERSION >= 80 |
++ boolean is_baseline; /* TRUE if Baseline SOF0 encountered */ |
++#endif |
+ boolean progressive_mode; /* TRUE if SOFn specifies progressive mode */ |
+ boolean arith_code; /* TRUE=arithmetic coding, FALSE=Huffman */ |
+ |
+@@ -585,7 +644,12 @@ |
+ int max_h_samp_factor; /* largest h_samp_factor */ |
+ int max_v_samp_factor; /* largest v_samp_factor */ |
+ |
++#if JPEG_LIB_VERSION >= 70 |
++ int min_DCT_h_scaled_size; /* smallest DCT_h_scaled_size of any component */ |
++ int min_DCT_v_scaled_size; /* smallest DCT_v_scaled_size of any component */ |
++#else |
+ int min_DCT_scaled_size; /* smallest DCT_scaled_size of any component */ |
++#endif |
+ |
+ JDIMENSION total_iMCU_rows; /* # of iMCU rows in image */ |
+ /* The coefficient controller's input and output progress is measured in |
+@@ -593,7 +657,7 @@ |
+ * in fully interleaved JPEG scans, but are used whether the scan is |
+ * interleaved or not. We define an iMCU row as v_samp_factor DCT block |
+ * rows of each component. Therefore, the IDCT output contains |
+- * v_samp_factor*DCT_scaled_size sample rows of a component per iMCU row. |
++ * v_samp_factor*DCT_[v_]scaled_size sample rows of a component per iMCU row. |
+ */ |
+ |
+ JSAMPLE * sample_range_limit; /* table for fast range-limiting */ |
+@@ -617,6 +681,14 @@ |
+ |
+ int Ss, Se, Ah, Al; /* progressive JPEG parameters for scan */ |
+ |
++#if JPEG_LIB_VERSION >= 80 |
++ /* These fields are derived from Se of first SOS marker. |
++ */ |
++ int block_size; /* the basic DCT block size: 1..16 */ |
++ const int * natural_order; /* natural-order position array for entropy decode */ |
++ int lim_Se; /* min( Se, DCTSIZE2-1 ) for entropy decode */ |
++#endif |
++ |
+ /* This field is shared between entropy decoder and marker parser. |
+ * It is either zero or the code of a JPEG marker that has been |
+ * read from the data source, but has not yet been processed. |
+@@ -846,11 +918,18 @@ |
+ #define jpeg_destroy_decompress jDestDecompress |
+ #define jpeg_stdio_dest jStdDest |
+ #define jpeg_stdio_src jStdSrc |
++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) |
++#define jpeg_mem_dest jMemDest |
++#define jpeg_mem_src jMemSrc |
++#endif |
+ #define jpeg_set_defaults jSetDefaults |
+ #define jpeg_set_colorspace jSetColorspace |
+ #define jpeg_default_colorspace jDefColorspace |
+ #define jpeg_set_quality jSetQuality |
+ #define jpeg_set_linear_quality jSetLQuality |
++#if JPEG_LIB_VERSION >= 70 |
++#define jpeg_default_qtables jDefQTables |
++#endif |
+ #define jpeg_add_quant_table jAddQuantTable |
+ #define jpeg_quality_scaling jQualityScaling |
+ #define jpeg_simple_progression jSimProgress |
+@@ -860,6 +939,9 @@ |
+ #define jpeg_start_compress jStrtCompress |
+ #define jpeg_write_scanlines jWrtScanlines |
+ #define jpeg_finish_compress jFinCompress |
++#if JPEG_LIB_VERSION >= 70 |
++#define jpeg_calc_jpeg_dimensions jCjpegDimensions |
++#endif |
+ #define jpeg_write_raw_data jWrtRawData |
+ #define jpeg_write_marker jWrtMarker |
+ #define jpeg_write_m_header jWrtMHeader |
+@@ -876,6 +958,9 @@ |
+ #define jpeg_input_complete jInComplete |
+ #define jpeg_new_colormap jNewCMap |
+ #define jpeg_consume_input jConsumeInput |
++#if JPEG_LIB_VERSION >= 80 |
++#define jpeg_core_output_dimensions jCoreDimensions |
++#endif |
+ #define jpeg_calc_output_dimensions jCalcDimensions |
+ #define jpeg_save_markers jSaveMarkers |
+ #define jpeg_set_marker_processor jSetMarker |
+@@ -920,6 +1005,16 @@ |
+ EXTERN(void) jpeg_stdio_dest JPP((j_compress_ptr cinfo, FILE * outfile)); |
+ EXTERN(void) jpeg_stdio_src JPP((j_decompress_ptr cinfo, FILE * infile)); |
+ |
++#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED) |
++/* Data source and destination managers: memory buffers. */ |
++EXTERN(void) jpeg_mem_dest JPP((j_compress_ptr cinfo, |
++ unsigned char ** outbuffer, |
++ unsigned long * outsize)); |
++EXTERN(void) jpeg_mem_src JPP((j_decompress_ptr cinfo, |
++ unsigned char * inbuffer, |
++ unsigned long insize)); |
++#endif |
++ |
+ /* Default parameter setup for compression */ |
+ EXTERN(void) jpeg_set_defaults JPP((j_compress_ptr cinfo)); |
+ /* Compression parameter setup aids */ |
+@@ -931,6 +1026,10 @@ |
+ EXTERN(void) jpeg_set_linear_quality JPP((j_compress_ptr cinfo, |
+ int scale_factor, |
+ boolean force_baseline)); |
++#if JPEG_LIB_VERSION >= 70 |
++EXTERN(void) jpeg_default_qtables JPP((j_compress_ptr cinfo, |
++ boolean force_baseline)); |
++#endif |
+ EXTERN(void) jpeg_add_quant_table JPP((j_compress_ptr cinfo, int which_tbl, |
+ const unsigned int *basic_table, |
+ int scale_factor, |
+@@ -950,12 +1049,17 @@ |
+ JDIMENSION num_lines)); |
+ EXTERN(void) jpeg_finish_compress JPP((j_compress_ptr cinfo)); |
+ |
++#if JPEG_LIB_VERSION >= 70 |
++/* Precalculate JPEG dimensions for current compression parameters. */ |
++EXTERN(void) jpeg_calc_jpeg_dimensions JPP((j_compress_ptr cinfo)); |
++#endif |
++ |
+ /* Replaces jpeg_write_scanlines when writing raw downsampled data. */ |
+ EXTERN(JDIMENSION) jpeg_write_raw_data JPP((j_compress_ptr cinfo, |
+ JSAMPIMAGE data, |
+ JDIMENSION num_lines)); |
+ |
+-/* Write a special marker. See libjpeg.doc concerning safe usage. */ |
++/* Write a special marker. See libjpeg.txt concerning safe usage. */ |
+ EXTERN(void) jpeg_write_marker |
+ JPP((j_compress_ptr cinfo, int marker, |
+ const JOCTET * dataptr, unsigned int datalen)); |
+@@ -986,6 +1090,8 @@ |
+ EXTERN(JDIMENSION) jpeg_read_scanlines JPP((j_decompress_ptr cinfo, |
+ JSAMPARRAY scanlines, |
+ JDIMENSION max_lines)); |
++EXTERN(JDIMENSION) jpeg_skip_scanlines (j_decompress_ptr cinfo, |
++ JDIMENSION num_lines); |
+ EXTERN(boolean) jpeg_finish_decompress JPP((j_decompress_ptr cinfo)); |
+ |
+ /* Replaces jpeg_read_scanlines when reading raw downsampled data. */ |
+@@ -1009,6 +1115,9 @@ |
+ #define JPEG_SCAN_COMPLETED 4 /* Completed last iMCU row of a scan */ |
+ |
+ /* Precalculate output dimensions for current decompression parameters. */ |
++#if JPEG_LIB_VERSION >= 80 |
++EXTERN(void) jpeg_core_output_dimensions JPP((j_decompress_ptr cinfo)); |
+#endif |
+ EXTERN(void) jpeg_calc_output_dimensions JPP((j_decompress_ptr cinfo)); |
+ /* Control saving of COM and APPn markers into marker_list. */ |
+@@ -1103,4 +1212,10 @@ |
+ #include "jerror.h" /* fetch error codes too */ |
+ #endif |
- /* |
-Index: jpeglib.h |
-=================================================================== |
---- jpeglib.h (revision 829) |
-+++ jpeglib.h (working copy) |
-@@ -15,6 +15,10 @@ |
- #ifndef JPEGLIB_H |
- #define JPEGLIB_H |
- |
-+/* Begin chromium edits */ |
-+#include "jpeglibmangler.h" |
-+/* End chromium edits */ |
++#ifdef __cplusplus |
++#ifndef DONT_USE_EXTERN_C |
++} |
++#endif |
++#endif |
+ |
- /* |
- * First we include the configuration files that record how this |
- * installation of the JPEG library is set up. jconfig.h can be |
+ #endif /* JPEGLIB_H */ |
Index: jpeglibmangler.h |
=================================================================== |
--- jpeglibmangler.h (revision 0) |
-+++ jpeglibmangler.h (revision 0) |
-@@ -0,0 +1,113 @@ |
++++ jpeglibmangler.h (working copy) |
+@@ -0,0 +1,114 @@ |
+// Copyright (c) 2009 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
@@ -305,6 +10237,7 @@ Index: jpeglibmangler.h |
+#define jpeg_read_header chromium_jpeg_read_header |
+#define jpeg_start_decompress chromium_jpeg_start_decompress |
+#define jpeg_read_scanlines chromium_jpeg_read_scanlines |
++#define jpeg_skip_scanlines chromium_jpeg_skip_scanlines |
+#define jpeg_finish_decompress chromium_jpeg_finish_decompress |
+#define jpeg_read_raw_data chromium_jpeg_read_raw_data |
+#define jpeg_has_multiple_scans chromium_jpeg_has_multiple_scans |
@@ -334,376 +10267,1367 @@ Index: jpeglibmangler.h |
+#define jpeg_mem_term chromium_jpeg_mem_term |
+ |
+#endif // THIRD_PARTY_LIBJPEG_TURBO_JPEGLIBMANGLER_H_ |
-Index: simd/jcgrass2-64.asm |
-=================================================================== |
---- simd/jcgrass2-64.asm (revision 829) |
-+++ simd/jcgrass2-64.asm (working copy) |
-@@ -30,7 +30,7 @@ |
- SECTION SEG_CONST |
- |
- alignz 16 |
-- global EXTN(jconst_rgb_gray_convert_sse2) |
-+ global EXTN(jconst_rgb_gray_convert_sse2) PRIVATE |
- |
- EXTN(jconst_rgb_gray_convert_sse2): |
- |
-Index: simd/jiss2fst.asm |
+Index: jpegut.c |
=================================================================== |
---- simd/jiss2fst.asm (revision 829) |
-+++ simd/jiss2fst.asm (working copy) |
-@@ -59,7 +59,7 @@ |
- %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) |
- |
- alignz 16 |
-- global EXTN(jconst_idct_ifast_sse2) |
-+ global EXTN(jconst_idct_ifast_sse2) PRIVATE |
+--- jpegut.c (revision 829) |
++++ jpegut.c (working copy) |
+@@ -19,11 +19,14 @@ |
+ #include "./rrtimer.h" |
+ #include "./turbojpeg.h" |
- EXTN(jconst_idct_ifast_sse2): |
+-#define _catch(f) {if((f)==-1) {printf("TJPEG: %s\n", tjGetErrorStr()); goto finally;}} |
++#define _catch(f) {if((f)==-1) {printf("TJPEG: %s\n", tjGetErrorStr()); bailout();}} |
-@@ -92,7 +92,7 @@ |
- %define WK_NUM 2 |
+ const char *_subnamel[NUMSUBOPT]={"4:4:4", "4:2:2", "4:2:0", "GRAY"}; |
+ const char *_subnames[NUMSUBOPT]={"444", "422", "420", "GRAY"}; |
- align 16 |
-- global EXTN(jsimd_idct_ifast_sse2) |
-+ global EXTN(jsimd_idct_ifast_sse2) PRIVATE |
++int exitstatus=0; |
++#define bailout() {exitstatus=-1; goto finally;} |
++ |
+ int pixels[9][3]= |
+ { |
+ {0, 255, 0}, |
+@@ -70,7 +73,7 @@ |
+ } |
+ } |
- EXTN(jsimd_idct_ifast_sse2): |
- push ebp |
-Index: simd/jcclrss2-64.asm |
+-int dumpbuf(unsigned char *buf, int w, int h, int ps, int flags) |
++void dumpbuf(unsigned char *buf, int w, int h, int ps, int flags) |
+ { |
+ int roffset=(flags&TJ_BGR)?2:0, goffset=1, boffset=(flags&TJ_BGR)?0:2, i, |
+ j; |
+@@ -177,12 +180,12 @@ |
+ if((outfile=fopen(filename, "wb"))==NULL) |
+ { |
+ printf("ERROR: Could not open %s for writing.\n", filename); |
+- goto finally; |
++ bailout(); |
+ } |
+ if(fwrite(jpegbuf, jpgbufsize, 1, outfile)!=1) |
+ { |
+ printf("ERROR: Could not write to %s.\n", filename); |
+- goto finally; |
++ bailout(); |
+ } |
+ |
+ finally: |
+@@ -210,7 +213,7 @@ |
+ |
+ if((bmpbuf=(unsigned char *)malloc(w*h*ps+1))==NULL) |
+ { |
+- printf("ERROR: Could not allocate buffer\n"); goto finally; |
++ printf("ERROR: Could not allocate buffer\n"); bailout(); |
+ } |
+ initbuf(bmpbuf, w, h, ps, flags); |
+ memset(jpegbuf, 0, TJBUFSIZE(w, h)); |
+@@ -249,12 +252,12 @@ |
+ _catch(tjDecompressHeader(hnd, jpegbuf, jpegsize, &_w, &_h)); |
+ if(_w!=w || _h!=h) |
+ { |
+- printf("Incorrect JPEG header\n"); goto finally; |
++ printf("Incorrect JPEG header\n"); bailout(); |
+ } |
+ |
+ if((bmpbuf=(unsigned char *)malloc(w*h*ps+1))==NULL) |
+ { |
+- printf("ERROR: Could not allocate buffer\n"); goto finally; |
++ printf("ERROR: Could not allocate buffer\n"); bailout(); |
+ } |
+ memset(bmpbuf, 0, w*ps*h); |
+ |
+@@ -278,13 +281,13 @@ |
+ |
+ if((jpegbuf=(unsigned char *)malloc(TJBUFSIZE(w, h))) == NULL) |
+ { |
+- puts("ERROR: Could not allocate buffer."); goto finally; |
++ puts("ERROR: Could not allocate buffer."); bailout(); |
+ } |
+ |
+ if((hnd=tjInitCompress())==NULL) |
+- {printf("Error in tjInitCompress():\n%s\n", tjGetErrorStr()); goto finally;} |
++ {printf("Error in tjInitCompress():\n%s\n", tjGetErrorStr()); bailout();} |
+ if((dhnd=tjInitDecompress())==NULL) |
+- {printf("Error in tjInitDecompress():\n%s\n", tjGetErrorStr()); goto finally;} |
++ {printf("Error in tjInitDecompress():\n%s\n", tjGetErrorStr()); bailout();} |
+ |
+ gentestjpeg(hnd, jpegbuf, &size, w, h, ps, basefilename, subsamp, 100, 0); |
+ gentestbmp(dhnd, jpegbuf, size, w, h, ps, basefilename, subsamp, 100, 0); |
+@@ -327,7 +330,7 @@ |
+ int i, j, i2; unsigned char *bmpbuf=NULL, *jpgbuf=NULL; |
+ tjhandle hnd=NULL; unsigned long size; |
+ if((hnd=tjInitCompress())==NULL) |
+- {printf("Error in tjInitCompress():\n%s\n", tjGetErrorStr()); goto finally;} |
++ {printf("Error in tjInitCompress():\n%s\n", tjGetErrorStr()); bailout();} |
+ printf("Buffer size regression test\n"); |
+ for(j=1; j<48; j++) |
+ { |
+@@ -337,7 +340,7 @@ |
+ if((bmpbuf=(unsigned char *)malloc(i*j*4))==NULL |
+ || (jpgbuf=(unsigned char *)malloc(TJBUFSIZE(i, j)))==NULL) |
+ { |
+- printf("Memory allocation failure\n"); goto finally; |
++ printf("Memory allocation failure\n"); bailout(); |
+ } |
+ memset(bmpbuf, 0, i*j*4); |
+ for(i2=0; i2<i*j; i2++) |
+@@ -353,7 +356,7 @@ |
+ if((bmpbuf=(unsigned char *)malloc(j*i*4))==NULL |
+ || (jpgbuf=(unsigned char *)malloc(TJBUFSIZE(j, i)))==NULL) |
+ { |
+- printf("Memory allocation failure\n"); goto finally; |
++ printf("Memory allocation failure\n"); bailout(); |
+ } |
+ for(i2=0; i2<j*i*4; i2++) |
+ { |
+@@ -380,5 +383,5 @@ |
+ dotest(35, 41, 4, TJ_GRAYSCALE, "test"); |
+ dotest1(); |
+ |
+- return 0; |
++ return exitstatus; |
+ } |
+Index: jpgtest.cxx |
=================================================================== |
---- simd/jcclrss2-64.asm (revision 829) |
-+++ simd/jcclrss2-64.asm (working copy) |
-@@ -37,7 +37,7 @@ |
- |
- align 16 |
- |
-- global EXTN(jsimd_rgb_ycc_convert_sse2) |
-+ global EXTN(jsimd_rgb_ycc_convert_sse2) PRIVATE |
- |
- EXTN(jsimd_rgb_ycc_convert_sse2): |
- push rbp |
-Index: simd/jiss2red-64.asm |
+--- jpgtest.cxx (revision 829) |
++++ jpgtest.cxx (working copy) |
+@@ -322,22 +322,22 @@ |
+ if(!stricmp(argv[i], "-tile")) dotile=1; |
+ if(!stricmp(argv[i], "-forcesse3")) |
+ { |
+- printf("Using SSE3 code in Intel compressor\n"); |
++ printf("Using SSE3 code\n"); |
+ forcesse3=1; |
+ } |
+ if(!stricmp(argv[i], "-forcesse2")) |
+ { |
+- printf("Using SSE2 code in Intel compressor\n"); |
++ printf("Using SSE2 code\n"); |
+ forcesse2=1; |
+ } |
+ if(!stricmp(argv[i], "-forcesse")) |
+ { |
+- printf("Using SSE code in Intel compressor\n"); |
++ printf("Using SSE code\n"); |
+ forcesse=1; |
+ } |
+ if(!stricmp(argv[i], "-forcemmx")) |
+ { |
+- printf("Using MMX code in Intel compressor\n"); |
++ printf("Using MMX code\n"); |
+ forcemmx=1; |
+ } |
+ if(!stricmp(argv[i], "-fastupsample")) |
+Index: jquant1.c |
=================================================================== |
---- simd/jiss2red-64.asm (revision 829) |
-+++ simd/jiss2red-64.asm (working copy) |
-@@ -73,7 +73,7 @@ |
- SECTION SEG_CONST |
+--- jquant1.c (revision 829) |
++++ jquant1.c (working copy) |
+@@ -1,9 +1,10 @@ |
+ /* |
+ * jquant1.c |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1991-1996, Thomas G. Lane. |
++ * libjpeg-turbo Modifications: |
+ * Copyright (C) 2009, D. R. Commander |
+- * This file is part of the Independent JPEG Group's software. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains 1-pass color quantization (color mapping) routines. |
+Index: jquant2.c |
+=================================================================== |
+--- jquant2.c (revision 829) |
++++ jquant2.c (working copy) |
+@@ -1,9 +1,10 @@ |
+ /* |
+ * jquant2.c |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1991-1996, Thomas G. Lane. |
++ * libjpeg-turbo Modifications: |
+ * Copyright (C) 2009, D. R. Commander. |
+- * This file is part of the Independent JPEG Group's software. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains 2-pass color quantization (color mapping) routines. |
+Index: jsimd.h |
+=================================================================== |
+--- jsimd.h (revision 829) |
++++ jsimd.h (working copy) |
+@@ -2,9 +2,11 @@ |
+ * jsimd.h |
+ * |
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
++ * Copyright 2011 D. R. Commander |
+ * |
+ * Based on the x86 SIMD extension for IJG JPEG library, |
+ * Copyright (C) 1999-2006, MIYASAKA Masaru. |
++ * For conditions of distribution and use, see copyright notice in jsimdext.inc |
+ * |
+ */ |
- alignz 16 |
-- global EXTN(jconst_idct_red_sse2) |
-+ global EXTN(jconst_idct_red_sse2) PRIVATE |
+@@ -12,8 +14,10 @@ |
+ |
+ #ifdef NEED_SHORT_EXTERNAL_NAMES |
+ #define jsimd_can_rgb_ycc jSCanRgbYcc |
++#define jsimd_can_rgb_gray jSCanRgbGry |
+ #define jsimd_can_ycc_rgb jSCanYccRgb |
+ #define jsimd_rgb_ycc_convert jSRgbYccConv |
++#define jsimd_rgb_gray_convert jSRgbGryConv |
+ #define jsimd_ycc_rgb_convert jSYccRgbConv |
+ #define jsimd_can_h2v2_downsample jSCanH2V2Down |
+ #define jsimd_can_h2v1_downsample jSCanH2V1Down |
+@@ -34,6 +38,7 @@ |
+ #endif /* NEED_SHORT_EXTERNAL_NAMES */ |
+ |
+ EXTERN(int) jsimd_can_rgb_ycc JPP((void)); |
++EXTERN(int) jsimd_can_rgb_gray JPP((void)); |
+ EXTERN(int) jsimd_can_ycc_rgb JPP((void)); |
+ |
+ EXTERN(void) jsimd_rgb_ycc_convert |
+@@ -40,6 +45,10 @@ |
+ JPP((j_compress_ptr cinfo, |
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
+ JDIMENSION output_row, int num_rows)); |
++EXTERN(void) jsimd_rgb_gray_convert |
++ JPP((j_compress_ptr cinfo, |
++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
++ JDIMENSION output_row, int num_rows)); |
+ EXTERN(void) jsimd_ycc_rgb_convert |
+ JPP((j_decompress_ptr cinfo, |
+ JSAMPIMAGE input_buf, JDIMENSION input_row, |
+Index: jsimd_none.c |
+=================================================================== |
+--- jsimd_none.c (revision 829) |
++++ jsimd_none.c (working copy) |
+@@ -2,10 +2,11 @@ |
+ * jsimd_none.c |
+ * |
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
+- * Copyright 2009 D. R. Commander |
++ * Copyright 2009-2011 D. R. Commander |
+ * |
+ * Based on the x86 SIMD extension for IJG JPEG library, |
+ * Copyright (C) 1999-2006, MIYASAKA Masaru. |
++ * For conditions of distribution and use, see copyright notice in jsimdext.inc |
+ * |
+ * This file contains stubs for when there is no SIMD support available. |
+ */ |
+@@ -24,6 +25,12 @@ |
+ } |
- EXTN(jconst_idct_red_sse2): |
+ GLOBAL(int) |
++jsimd_can_rgb_gray (void) |
++{ |
++ return 0; |
++} |
++ |
++GLOBAL(int) |
+ jsimd_can_ycc_rgb (void) |
+ { |
+ return 0; |
+@@ -37,6 +44,13 @@ |
+ } |
-@@ -114,7 +114,7 @@ |
- %define WK_NUM 2 |
+ GLOBAL(void) |
++jsimd_rgb_gray_convert (j_compress_ptr cinfo, |
++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
++ JDIMENSION output_row, int num_rows) |
++{ |
++} |
++ |
++GLOBAL(void) |
+ jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, |
+ JSAMPIMAGE input_buf, JDIMENSION input_row, |
+ JSAMPARRAY output_buf, int num_rows) |
+Index: jsimddct.h |
+=================================================================== |
+--- jsimddct.h (revision 829) |
++++ jsimddct.h (working copy) |
+@@ -5,6 +5,7 @@ |
+ * |
+ * Based on the x86 SIMD extension for IJG JPEG library, |
+ * Copyright (C) 1999-2006, MIYASAKA Masaru. |
++ * For conditions of distribution and use, see copyright notice in jsimdext.inc |
+ * |
+ */ |
- align 16 |
-- global EXTN(jsimd_idct_4x4_sse2) |
-+ global EXTN(jsimd_idct_4x4_sse2) PRIVATE |
+Index: jversion.h |
+=================================================================== |
+--- jversion.h (revision 829) |
++++ jversion.h (working copy) |
+@@ -1,8 +1,10 @@ |
+ /* |
+ * jversion.h |
+ * |
+- * Copyright (C) 1991-1998, Thomas G. Lane. |
+- * This file is part of the Independent JPEG Group's software. |
++ * This file was part of the Independent JPEG Group's software: |
++ * Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding. |
++ * Modifications: |
++ * Copyright (C) 2010, 2012-2014, D. R. Commander. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains software version identification. |
+@@ -9,6 +11,22 @@ |
+ */ |
- EXTN(jsimd_idct_4x4_sse2): |
- push rbp |
-@@ -413,7 +413,7 @@ |
- ; r13 = JDIMENSION output_col |
- align 16 |
-- global EXTN(jsimd_idct_2x2_sse2) |
-+ global EXTN(jsimd_idct_2x2_sse2) PRIVATE |
++#if JPEG_LIB_VERSION >= 80 |
++ |
++#define JVERSION "8d 15-Jan-2012" |
++ |
++#elif JPEG_LIB_VERSION >= 70 |
++ |
++#define JVERSION "7 27-Jun-2009" |
++ |
++#else |
++ |
+ #define JVERSION "6b 27-Mar-1998" |
- EXTN(jsimd_idct_2x2_sse2): |
- push rbp |
-Index: simd/ji3dnflt.asm |
+-#define JCOPYRIGHT "Copyright (C) 1998, Thomas G. Lane" |
++#endif |
++ |
++#define JCOPYRIGHT "Copyright (C) 1991-2012 Thomas G. Lane, Guido Vollbeding\n" \ |
++ "Copyright (C) 1999-2006 MIYASAKA Masaru\n" \ |
++ "Copyright (C) 2009 Pierre Ossman for Cendio AB\n" \ |
++ "Copyright (C) 2009-2014 D. R. Commander\n" \ |
++ "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)" |
+Index: rdbmp.c |
=================================================================== |
---- simd/ji3dnflt.asm (revision 829) |
-+++ simd/ji3dnflt.asm (working copy) |
-@@ -27,7 +27,7 @@ |
- SECTION SEG_CONST |
- |
- alignz 16 |
-- global EXTN(jconst_idct_float_3dnow) |
-+ global EXTN(jconst_idct_float_3dnow) PRIVATE |
+--- rdbmp.c (revision 829) |
++++ rdbmp.c (working copy) |
+@@ -1,8 +1,11 @@ |
+ /* |
+ * rdbmp.c |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1994-1996, Thomas G. Lane. |
+- * This file is part of the Independent JPEG Group's software. |
++ * Modified 2009-2010 by Guido Vollbeding. |
++ * libjpeg-turbo Modifications: |
++ * Modified 2011 by Siarhei Siamashka. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains routines to read input images in Microsoft "BMP" |
+@@ -177,10 +180,41 @@ |
+ } |
- EXTN(jconst_idct_float_3dnow): |
-@@ -63,7 +63,7 @@ |
- ; FAST_FLOAT workspace[DCTSIZE2] |
++METHODDEF(JDIMENSION) |
++get_32bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo) |
++/* This version is for reading 32-bit pixels */ |
++{ |
++ bmp_source_ptr source = (bmp_source_ptr) sinfo; |
++ JSAMPARRAY image_ptr; |
++ register JSAMPROW inptr, outptr; |
++ register JDIMENSION col; |
++ |
++ /* Fetch next row from virtual array */ |
++ source->source_row--; |
++ image_ptr = (*cinfo->mem->access_virt_sarray) |
++ ((j_common_ptr) cinfo, source->whole_image, |
++ source->source_row, (JDIMENSION) 1, FALSE); |
++ /* Transfer data. Note source values are in BGR order |
++ * (even though Microsoft's own documents say the opposite). |
++ */ |
++ inptr = image_ptr[0]; |
++ outptr = source->pub.buffer[0]; |
++ for (col = cinfo->image_width; col > 0; col--) { |
++ outptr[2] = *inptr++; /* can omit GETJSAMPLE() safely */ |
++ outptr[1] = *inptr++; |
++ outptr[0] = *inptr++; |
++ inptr++; /* skip the 4th byte (Alpha channel) */ |
++ outptr += 3; |
++ } |
++ |
++ return 1; |
++} |
++ |
++ |
+ /* |
+ * This method loads the image into whole_image during the first call on |
+ * get_pixel_rows. The get_pixel_rows pointer is then adjusted to call |
+- * get_8bit_row or get_24bit_row on subsequent calls. |
++ * get_8bit_row, get_24bit_row, or get_32bit_row on subsequent calls. |
+ */ |
- align 16 |
-- global EXTN(jsimd_idct_float_3dnow) |
-+ global EXTN(jsimd_idct_float_3dnow) PRIVATE |
+ METHODDEF(JDIMENSION) |
+@@ -188,10 +222,9 @@ |
+ { |
+ bmp_source_ptr source = (bmp_source_ptr) sinfo; |
+ register FILE *infile = source->pub.input_file; |
+- register int c; |
+ register JSAMPROW out_ptr; |
+ JSAMPARRAY image_ptr; |
+- JDIMENSION row, col; |
++ JDIMENSION row; |
+ cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress; |
+ |
+ /* Read the data into a virtual array in input-file row order. */ |
+@@ -205,11 +238,11 @@ |
+ ((j_common_ptr) cinfo, source->whole_image, |
+ row, (JDIMENSION) 1, TRUE); |
+ out_ptr = image_ptr[0]; |
+- for (col = source->row_width; col > 0; col--) { |
+- /* inline copy of read_byte() for speed */ |
+- if ((c = getc(infile)) == EOF) |
+- ERREXIT(cinfo, JERR_INPUT_EOF); |
+- *out_ptr++ = (JSAMPLE) c; |
++ if (fread(out_ptr, 1, source->row_width, infile) != source->row_width) { |
++ if (feof(infile)) |
++ ERREXIT(cinfo, JERR_INPUT_EOF); |
++ else |
++ ERREXIT(cinfo, JERR_FILE_READ); |
+ } |
+ } |
+ if (progress != NULL) |
+@@ -223,6 +256,9 @@ |
+ case 24: |
+ source->pub.get_pixel_rows = get_24bit_row; |
+ break; |
++ case 32: |
++ source->pub.get_pixel_rows = get_32bit_row; |
++ break; |
+ default: |
+ ERREXIT(cinfo, JERR_BMP_BADDEPTH); |
+ } |
+@@ -251,8 +287,8 @@ |
+ (((INT32) UCH(array[offset+3])) << 24)) |
+ INT32 bfOffBits; |
+ INT32 headerSize; |
+- INT32 biWidth = 0; /* initialize to avoid compiler warning */ |
+- INT32 biHeight = 0; |
++ INT32 biWidth; |
++ INT32 biHeight; |
+ unsigned int biPlanes; |
+ INT32 biCompression; |
+ INT32 biXPelsPerMeter,biYPelsPerMeter; |
+@@ -300,8 +336,6 @@ |
+ ERREXIT(cinfo, JERR_BMP_BADDEPTH); |
+ break; |
+ } |
+- if (biPlanes != 1) |
+- ERREXIT(cinfo, JERR_BMP_BADPLANES); |
+ break; |
+ case 40: |
+ case 64: |
+@@ -325,12 +359,13 @@ |
+ case 24: /* RGB image */ |
+ TRACEMS2(cinfo, 1, JTRC_BMP, (int) biWidth, (int) biHeight); |
+ break; |
++ case 32: /* RGB image + Alpha channel */ |
++ TRACEMS2(cinfo, 1, JTRC_BMP, (int) biWidth, (int) biHeight); |
++ break; |
+ default: |
+ ERREXIT(cinfo, JERR_BMP_BADDEPTH); |
+ break; |
+ } |
+- if (biPlanes != 1) |
+- ERREXIT(cinfo, JERR_BMP_BADPLANES); |
+ if (biCompression != 0) |
+ ERREXIT(cinfo, JERR_BMP_COMPRESSED); |
+ |
+@@ -343,9 +378,14 @@ |
+ break; |
+ default: |
+ ERREXIT(cinfo, JERR_BMP_BADHEADER); |
+- break; |
++ return; |
+ } |
- EXTN(jsimd_idct_float_3dnow): |
- push ebp |
-Index: simd/jsimdcpu.asm |
++ if (biWidth <= 0 || biHeight <= 0) |
++ ERREXIT(cinfo, JERR_BMP_EMPTY); |
++ if (biPlanes != 1) |
++ ERREXIT(cinfo, JERR_BMP_BADPLANES); |
++ |
+ /* Compute distance to bitmap data --- will adjust for colormap below */ |
+ bPad = bfOffBits - (headerSize + 14); |
+ |
+@@ -375,6 +415,8 @@ |
+ /* Compute row width in file, including padding to 4-byte boundary */ |
+ if (source->bits_per_pixel == 24) |
+ row_width = (JDIMENSION) (biWidth * 3); |
++ else if (source->bits_per_pixel == 32) |
++ row_width = (JDIMENSION) (biWidth * 4); |
+ else |
+ row_width = (JDIMENSION) biWidth; |
+ while ((row_width & 3) != 0) row_width++; |
+Index: rdppm.c |
=================================================================== |
---- simd/jsimdcpu.asm (revision 829) |
-+++ simd/jsimdcpu.asm (working copy) |
-@@ -29,7 +29,7 @@ |
- ; |
- |
- align 16 |
-- global EXTN(jpeg_simd_cpu_support) |
-+ global EXTN(jpeg_simd_cpu_support) PRIVATE |
- |
- EXTN(jpeg_simd_cpu_support): |
- push ebx |
-Index: simd/jdmerss2-64.asm |
+--- rdppm.c (revision 829) |
++++ rdppm.c (working copy) |
+@@ -2,6 +2,7 @@ |
+ * rdppm.c |
+ * |
+ * Copyright (C) 1991-1997, Thomas G. Lane. |
++ * Modified 2009 by Bill Allombert, Guido Vollbeding. |
+ * This file is part of the Independent JPEG Group's software. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+@@ -250,8 +251,8 @@ |
+ bufferptr = source->iobuffer; |
+ for (col = cinfo->image_width; col > 0; col--) { |
+ register int temp; |
+- temp = UCH(*bufferptr++); |
+- temp |= UCH(*bufferptr++) << 8; |
++ temp = UCH(*bufferptr++) << 8; |
++ temp |= UCH(*bufferptr++); |
+ *ptr++ = rescale[temp]; |
+ } |
+ return 1; |
+@@ -274,14 +275,14 @@ |
+ bufferptr = source->iobuffer; |
+ for (col = cinfo->image_width; col > 0; col--) { |
+ register int temp; |
+- temp = UCH(*bufferptr++); |
+- temp |= UCH(*bufferptr++) << 8; |
++ temp = UCH(*bufferptr++) << 8; |
++ temp |= UCH(*bufferptr++); |
+ *ptr++ = rescale[temp]; |
+- temp = UCH(*bufferptr++); |
+- temp |= UCH(*bufferptr++) << 8; |
++ temp = UCH(*bufferptr++) << 8; |
++ temp |= UCH(*bufferptr++); |
+ *ptr++ = rescale[temp]; |
+- temp = UCH(*bufferptr++); |
+- temp |= UCH(*bufferptr++) << 8; |
++ temp = UCH(*bufferptr++) << 8; |
++ temp |= UCH(*bufferptr++); |
+ *ptr++ = rescale[temp]; |
+ } |
+ return 1; |
+Index: rdswitch.c |
=================================================================== |
---- simd/jdmerss2-64.asm (revision 829) |
-+++ simd/jdmerss2-64.asm (working copy) |
-@@ -35,7 +35,7 @@ |
- SECTION SEG_CONST |
+--- rdswitch.c (revision 829) |
++++ rdswitch.c (working copy) |
+@@ -1,8 +1,10 @@ |
+ /* |
+ * rdswitch.c |
+ * |
++ * This file was part of the Independent JPEG Group's software: |
+ * Copyright (C) 1991-1996, Thomas G. Lane. |
+- * This file is part of the Independent JPEG Group's software. |
++ * libjpeg-turbo Modifications: |
++ * Copyright (C) 2010, D. R. Commander. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+ * This file contains routines to process some of cjpeg's more complicated |
+@@ -9,6 +11,7 @@ |
+ * command-line switches. Switches processed here are: |
+ * -qtables file Read quantization tables from text file |
+ * -scans file Read scan script from text file |
++ * -quality N[,N,...] Set quality ratings |
+ * -qslots N[,N,...] Set component quantization table selectors |
+ * -sample HxV[,HxV,...] Set component sampling factors |
+ */ |
+@@ -69,9 +72,12 @@ |
+ } |
- alignz 16 |
-- global EXTN(jconst_merged_upsample_sse2) |
-+ global EXTN(jconst_merged_upsample_sse2) PRIVATE |
- EXTN(jconst_merged_upsample_sse2): |
++#if JPEG_LIB_VERSION < 70 |
++static int q_scale_factor[NUM_QUANT_TBLS] = {100, 100, 100, 100}; |
++#endif |
++ |
+ GLOBAL(boolean) |
+-read_quant_tables (j_compress_ptr cinfo, char * filename, |
+- int scale_factor, boolean force_baseline) |
++read_quant_tables (j_compress_ptr cinfo, char * filename, boolean force_baseline) |
+ /* Read a set of quantization tables from the specified file. |
+ * The file is plain ASCII text: decimal numbers with whitespace between. |
+ * Comments preceded by '#' may be included in the file. |
+@@ -108,7 +114,13 @@ |
+ } |
+ table[i] = (unsigned int) val; |
+ } |
+- jpeg_add_quant_table(cinfo, tblno, table, scale_factor, force_baseline); |
++#if JPEG_LIB_VERSION >= 70 |
++ jpeg_add_quant_table(cinfo, tblno, table, cinfo->q_scale_factor[tblno], |
++ force_baseline); |
++#else |
++ jpeg_add_quant_table(cinfo, tblno, table, q_scale_factor[tblno], |
++ force_baseline); |
++#endif |
+ tblno++; |
+ } |
-Index: simd/jdsammmx.asm |
-=================================================================== |
---- simd/jdsammmx.asm (revision 829) |
-+++ simd/jdsammmx.asm (working copy) |
-@@ -22,7 +22,7 @@ |
- SECTION SEG_CONST |
+@@ -262,7 +274,85 @@ |
+ #endif /* C_MULTISCAN_FILES_SUPPORTED */ |
- alignz 16 |
-- global EXTN(jconst_fancy_upsample_mmx) |
-+ global EXTN(jconst_fancy_upsample_mmx) PRIVATE |
- EXTN(jconst_fancy_upsample_mmx): |
++#if JPEG_LIB_VERSION < 70 |
++/* These are the sample quantization tables given in JPEG spec section K.1. |
++ * The spec says that the values given produce "good" quality, and |
++ * when divided by 2, "very good" quality. |
++ */ |
++static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = { |
++ 16, 11, 10, 16, 24, 40, 51, 61, |
++ 12, 12, 14, 19, 26, 58, 60, 55, |
++ 14, 13, 16, 24, 40, 57, 69, 56, |
++ 14, 17, 22, 29, 51, 87, 80, 62, |
++ 18, 22, 37, 56, 68, 109, 103, 77, |
++ 24, 35, 55, 64, 81, 104, 113, 92, |
++ 49, 64, 78, 87, 103, 121, 120, 101, |
++ 72, 92, 95, 98, 112, 100, 103, 99 |
++}; |
++static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = { |
++ 17, 18, 24, 47, 99, 99, 99, 99, |
++ 18, 21, 26, 66, 99, 99, 99, 99, |
++ 24, 26, 56, 99, 99, 99, 99, 99, |
++ 47, 66, 99, 99, 99, 99, 99, 99, |
++ 99, 99, 99, 99, 99, 99, 99, 99, |
++ 99, 99, 99, 99, 99, 99, 99, 99, |
++ 99, 99, 99, 99, 99, 99, 99, 99, |
++ 99, 99, 99, 99, 99, 99, 99, 99 |
++}; |
++ |
++ |
++LOCAL(void) |
++jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline) |
++{ |
++ jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl, |
++ q_scale_factor[0], force_baseline); |
++ jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl, |
++ q_scale_factor[1], force_baseline); |
++} |
++#endif |
++ |
++ |
+ GLOBAL(boolean) |
++set_quality_ratings (j_compress_ptr cinfo, char *arg, boolean force_baseline) |
++/* Process a quality-ratings parameter string, of the form |
++ * N[,N,...] |
++ * If there are more q-table slots than parameters, the last value is replicated. |
++ */ |
++{ |
++ int val = 75; /* default value */ |
++ int tblno; |
++ char ch; |
++ |
++ for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) { |
++ if (*arg) { |
++ ch = ','; /* if not set by sscanf, will be ',' */ |
++ if (sscanf(arg, "%d%c", &val, &ch) < 1) |
++ return FALSE; |
++ if (ch != ',') /* syntax check */ |
++ return FALSE; |
++ /* Convert user 0-100 rating to percentage scaling */ |
++#if JPEG_LIB_VERSION >= 70 |
++ cinfo->q_scale_factor[tblno] = jpeg_quality_scaling(val); |
++#else |
++ q_scale_factor[tblno] = jpeg_quality_scaling(val); |
++#endif |
++ while (*arg && *arg++ != ',') /* advance to next segment of arg string */ |
++ ; |
++ } else { |
++ /* reached end of parameter, set remaining factors to last value */ |
++#if JPEG_LIB_VERSION >= 70 |
++ cinfo->q_scale_factor[tblno] = jpeg_quality_scaling(val); |
++#else |
++ q_scale_factor[tblno] = jpeg_quality_scaling(val); |
++#endif |
++ } |
++ } |
++ jpeg_default_qtables(cinfo, force_baseline); |
++ return TRUE; |
++} |
++ |
++ |
++GLOBAL(boolean) |
+ set_quant_slots (j_compress_ptr cinfo, char *arg) |
+ /* Process a quantization-table-selectors parameter string, of the form |
+ * N[,N,...] |
+Index: rrutil.h |
+=================================================================== |
+--- rrutil.h (revision 829) |
++++ rrutil.h (working copy) |
+@@ -1,5 +1,6 @@ |
+ /* Copyright (C)2004 Landmark Graphics Corporation |
+ * Copyright (C)2005 Sun Microsystems, Inc. |
++ * Copyright (C)2010 D. R. Commander |
+ * |
+ * This library is free software and may be redistributed and/or modified under |
+ * the terms of the wxWindows Library License, Version 3.1 or (at your option) |
+@@ -47,9 +48,9 @@ |
+ static __inline int numprocs(void) |
+ { |
+ #ifdef _WIN32 |
+- DWORD ProcAff, SysAff, i; int count=0; |
++ DWORD_PTR ProcAff, SysAff, i; int count=0; |
+ if(!GetProcessAffinityMask(GetCurrentProcess(), &ProcAff, &SysAff)) return(1); |
+- for(i=0; i<32; i++) if(ProcAff&(1<<i)) count++; |
++ for(i=0; i<sizeof(long*)*8; i++) if(ProcAff&(1LL<<i)) count++; |
+ return(count); |
+ #elif defined (__APPLE__) |
+ return(1); |
+Index: simd/jcclrmmx.asm |
+=================================================================== |
+--- simd/jcclrmmx.asm (revision 829) |
++++ simd/jcclrmmx.asm (working copy) |
+@@ -19,8 +19,6 @@ |
+ %include "jcolsamp.inc" |
-@@ -58,7 +58,7 @@ |
- %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr |
+ ; -------------------------------------------------------------------------- |
+- SECTION SEG_TEXT |
+- BITS 32 |
+ ; |
+ ; Convert some rows of samples to the output colorspace. |
+ ; |
+@@ -42,7 +40,7 @@ |
+ %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr |
align 16 |
-- global EXTN(jsimd_h2v1_fancy_upsample_mmx) |
-+ global EXTN(jsimd_h2v1_fancy_upsample_mmx) PRIVATE |
+- global EXTN(jsimd_rgb_ycc_convert_mmx) |
++ global EXTN(jsimd_rgb_ycc_convert_mmx) PRIVATE |
- EXTN(jsimd_h2v1_fancy_upsample_mmx): |
+ EXTN(jsimd_rgb_ycc_convert_mmx): |
push ebp |
-@@ -216,7 +216,7 @@ |
- %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr |
+@@ -474,3 +472,6 @@ |
+ pop ebp |
+ ret |
- align 16 |
-- global EXTN(jsimd_h2v2_fancy_upsample_mmx) |
-+ global EXTN(jsimd_h2v2_fancy_upsample_mmx) PRIVATE |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jcclrss2-64.asm |
+=================================================================== |
+--- simd/jcclrss2-64.asm (revision 829) |
++++ simd/jcclrss2-64.asm (working copy) |
+@@ -1,5 +1,5 @@ |
+ ; |
+-; jcclrss2.asm - colorspace conversion (64-bit SSE2) |
++; jcclrss2-64.asm - colorspace conversion (64-bit SSE2) |
+ ; |
+ ; x86 SIMD extension for IJG JPEG library |
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
+@@ -17,8 +17,6 @@ |
+ %include "jcolsamp.inc" |
- EXTN(jsimd_h2v2_fancy_upsample_mmx): |
- push ebp |
-@@ -542,7 +542,7 @@ |
- %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr |
+ ; -------------------------------------------------------------------------- |
+- SECTION SEG_TEXT |
+- BITS 64 |
+ ; |
+ ; Convert some rows of samples to the output colorspace. |
+ ; |
+@@ -39,7 +37,7 @@ |
align 16 |
-- global EXTN(jsimd_h2v1_upsample_mmx) |
-+ global EXTN(jsimd_h2v1_upsample_mmx) PRIVATE |
- EXTN(jsimd_h2v1_upsample_mmx): |
- push ebp |
-@@ -643,7 +643,7 @@ |
- %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr |
+- global EXTN(jsimd_rgb_ycc_convert_sse2) |
++ global EXTN(jsimd_rgb_ycc_convert_sse2) PRIVATE |
+ |
+ EXTN(jsimd_rgb_ycc_convert_sse2): |
+ push rbp |
+@@ -49,8 +47,8 @@ |
+ mov [rsp],rax |
+ mov rbp,rsp ; rbp = aligned rbp |
+ lea rsp, [wk(0)] |
++ collect_args |
+ push rbx |
+- collect_args |
+ |
+ mov rcx, r10 |
+ test rcx,rcx |
+@@ -70,7 +68,7 @@ |
+ pop rcx |
+ |
+ mov rsi, r11 |
+- mov rax, r14 |
++ mov eax, r14d |
+ test rax,rax |
+ jle near .return |
+ .rowloop: |
+@@ -475,10 +473,13 @@ |
+ jg near .rowloop |
+ |
+ .return: |
++ pop rbx |
+ uncollect_args |
+- pop rbx |
+ mov rsp,rbp ; rsp <- aligned rbp |
+ pop rsp ; rsp <- original rbp |
+ pop rbp |
+ ret |
+ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jcclrss2.asm |
+=================================================================== |
+--- simd/jcclrss2.asm (revision 829) |
++++ simd/jcclrss2.asm (working copy) |
+@@ -16,8 +16,6 @@ |
+ %include "jcolsamp.inc" |
+ |
+ ; -------------------------------------------------------------------------- |
+- SECTION SEG_TEXT |
+- BITS 32 |
+ ; |
+ ; Convert some rows of samples to the output colorspace. |
+ ; |
+@@ -40,7 +38,7 @@ |
align 16 |
-- global EXTN(jsimd_h2v2_upsample_mmx) |
-+ global EXTN(jsimd_h2v2_upsample_mmx) PRIVATE |
- EXTN(jsimd_h2v2_upsample_mmx): |
+- global EXTN(jsimd_rgb_ycc_convert_sse2) |
++ global EXTN(jsimd_rgb_ycc_convert_sse2) PRIVATE |
+ |
+ EXTN(jsimd_rgb_ycc_convert_sse2): |
push ebp |
-Index: simd/jdmrgmmx.asm |
+@@ -500,3 +498,6 @@ |
+ pop ebp |
+ ret |
+ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jccolmmx.asm |
=================================================================== |
---- simd/jdmrgmmx.asm (revision 829) |
-+++ simd/jdmrgmmx.asm (working copy) |
-@@ -40,7 +40,7 @@ |
- %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr |
+--- simd/jccolmmx.asm (revision 829) |
++++ simd/jccolmmx.asm (working copy) |
+@@ -37,7 +37,7 @@ |
+ SECTION SEG_CONST |
- align 16 |
-- global EXTN(jsimd_h2v1_merged_upsample_mmx) |
-+ global EXTN(jsimd_h2v1_merged_upsample_mmx) PRIVATE |
+ alignz 16 |
+- global EXTN(jconst_rgb_ycc_convert_mmx) |
++ global EXTN(jconst_rgb_ycc_convert_mmx) PRIVATE |
- EXTN(jsimd_h2v1_merged_upsample_mmx): |
- push ebp |
-@@ -409,7 +409,7 @@ |
- %define output_buf(b) (b)+20 ; JSAMPARRAY output_buf |
+ EXTN(jconst_rgb_ycc_convert_mmx): |
- align 16 |
-- global EXTN(jsimd_h2v2_merged_upsample_mmx) |
-+ global EXTN(jsimd_h2v2_merged_upsample_mmx) PRIVATE |
+@@ -51,6 +51,9 @@ |
+ alignz 16 |
- EXTN(jsimd_h2v2_merged_upsample_mmx): |
- push ebp |
-Index: simd/jdsamss2.asm |
+ ; -------------------------------------------------------------------------- |
++ SECTION SEG_TEXT |
++ BITS 32 |
++ |
+ %include "jcclrmmx.asm" |
+ |
+ %undef RGB_RED |
+@@ -57,10 +60,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 0 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 2 |
+-%define RGB_PIXELSIZE 3 |
++%define RGB_RED EXT_RGB_RED |
++%define RGB_GREEN EXT_RGB_GREEN |
++%define RGB_BLUE EXT_RGB_BLUE |
++%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE |
+ %define jsimd_rgb_ycc_convert_mmx jsimd_extrgb_ycc_convert_mmx |
+ %include "jcclrmmx.asm" |
+ |
+@@ -68,10 +71,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 0 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 2 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_RGBX_RED |
++%define RGB_GREEN EXT_RGBX_GREEN |
++%define RGB_BLUE EXT_RGBX_BLUE |
++%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE |
+ %define jsimd_rgb_ycc_convert_mmx jsimd_extrgbx_ycc_convert_mmx |
+ %include "jcclrmmx.asm" |
+ |
+@@ -79,10 +82,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 2 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 0 |
+-%define RGB_PIXELSIZE 3 |
++%define RGB_RED EXT_BGR_RED |
++%define RGB_GREEN EXT_BGR_GREEN |
++%define RGB_BLUE EXT_BGR_BLUE |
++%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE |
+ %define jsimd_rgb_ycc_convert_mmx jsimd_extbgr_ycc_convert_mmx |
+ %include "jcclrmmx.asm" |
+ |
+@@ -90,10 +93,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 2 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 0 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_BGRX_RED |
++%define RGB_GREEN EXT_BGRX_GREEN |
++%define RGB_BLUE EXT_BGRX_BLUE |
++%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE |
+ %define jsimd_rgb_ycc_convert_mmx jsimd_extbgrx_ycc_convert_mmx |
+ %include "jcclrmmx.asm" |
+ |
+@@ -101,10 +104,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 3 |
+-%define RGB_GREEN 2 |
+-%define RGB_BLUE 1 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_XBGR_RED |
++%define RGB_GREEN EXT_XBGR_GREEN |
++%define RGB_BLUE EXT_XBGR_BLUE |
++%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE |
+ %define jsimd_rgb_ycc_convert_mmx jsimd_extxbgr_ycc_convert_mmx |
+ %include "jcclrmmx.asm" |
+ |
+@@ -112,9 +115,9 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 1 |
+-%define RGB_GREEN 2 |
+-%define RGB_BLUE 3 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_XRGB_RED |
++%define RGB_GREEN EXT_XRGB_GREEN |
++%define RGB_BLUE EXT_XRGB_BLUE |
++%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE |
+ %define jsimd_rgb_ycc_convert_mmx jsimd_extxrgb_ycc_convert_mmx |
+ %include "jcclrmmx.asm" |
+Index: simd/jccolss2-64.asm |
=================================================================== |
---- simd/jdsamss2.asm (revision 829) |
-+++ simd/jdsamss2.asm (working copy) |
-@@ -22,7 +22,7 @@ |
+--- simd/jccolss2-64.asm (revision 829) |
++++ simd/jccolss2-64.asm (working copy) |
+@@ -1,5 +1,5 @@ |
+ ; |
+-; jccolss2.asm - colorspace conversion (64-bit SSE2) |
++; jccolss2-64.asm - colorspace conversion (64-bit SSE2) |
+ ; |
+ ; x86 SIMD extension for IJG JPEG library |
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
+@@ -34,7 +34,7 @@ |
SECTION SEG_CONST |
alignz 16 |
-- global EXTN(jconst_fancy_upsample_sse2) |
-+ global EXTN(jconst_fancy_upsample_sse2) PRIVATE |
+- global EXTN(jconst_rgb_ycc_convert_sse2) |
++ global EXTN(jconst_rgb_ycc_convert_sse2) PRIVATE |
- EXTN(jconst_fancy_upsample_sse2): |
+ EXTN(jconst_rgb_ycc_convert_sse2): |
-@@ -58,7 +58,7 @@ |
- %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr |
+@@ -48,6 +48,9 @@ |
+ alignz 16 |
- align 16 |
-- global EXTN(jsimd_h2v1_fancy_upsample_sse2) |
-+ global EXTN(jsimd_h2v1_fancy_upsample_sse2) PRIVATE |
+ ; -------------------------------------------------------------------------- |
++ SECTION SEG_TEXT |
++ BITS 64 |
++ |
+ %include "jcclrss2-64.asm" |
+ |
+ %undef RGB_RED |
+@@ -54,10 +57,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 0 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 2 |
+-%define RGB_PIXELSIZE 3 |
++%define RGB_RED EXT_RGB_RED |
++%define RGB_GREEN EXT_RGB_GREEN |
++%define RGB_BLUE EXT_RGB_BLUE |
++%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE |
+ %define jsimd_rgb_ycc_convert_sse2 jsimd_extrgb_ycc_convert_sse2 |
+ %include "jcclrss2-64.asm" |
+ |
+@@ -65,10 +68,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 0 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 2 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_RGBX_RED |
++%define RGB_GREEN EXT_RGBX_GREEN |
++%define RGB_BLUE EXT_RGBX_BLUE |
++%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE |
+ %define jsimd_rgb_ycc_convert_sse2 jsimd_extrgbx_ycc_convert_sse2 |
+ %include "jcclrss2-64.asm" |
+ |
+@@ -76,10 +79,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 2 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 0 |
+-%define RGB_PIXELSIZE 3 |
++%define RGB_RED EXT_BGR_RED |
++%define RGB_GREEN EXT_BGR_GREEN |
++%define RGB_BLUE EXT_BGR_BLUE |
++%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE |
+ %define jsimd_rgb_ycc_convert_sse2 jsimd_extbgr_ycc_convert_sse2 |
+ %include "jcclrss2-64.asm" |
+ |
+@@ -87,10 +90,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 2 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 0 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_BGRX_RED |
++%define RGB_GREEN EXT_BGRX_GREEN |
++%define RGB_BLUE EXT_BGRX_BLUE |
++%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE |
+ %define jsimd_rgb_ycc_convert_sse2 jsimd_extbgrx_ycc_convert_sse2 |
+ %include "jcclrss2-64.asm" |
+ |
+@@ -98,10 +101,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 3 |
+-%define RGB_GREEN 2 |
+-%define RGB_BLUE 1 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_XBGR_RED |
++%define RGB_GREEN EXT_XBGR_GREEN |
++%define RGB_BLUE EXT_XBGR_BLUE |
++%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE |
+ %define jsimd_rgb_ycc_convert_sse2 jsimd_extxbgr_ycc_convert_sse2 |
+ %include "jcclrss2-64.asm" |
+ |
+@@ -109,9 +112,9 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 1 |
+-%define RGB_GREEN 2 |
+-%define RGB_BLUE 3 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_XRGB_RED |
++%define RGB_GREEN EXT_XRGB_GREEN |
++%define RGB_BLUE EXT_XRGB_BLUE |
++%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE |
+ %define jsimd_rgb_ycc_convert_sse2 jsimd_extxrgb_ycc_convert_sse2 |
+ %include "jcclrss2-64.asm" |
+Index: simd/jccolss2.asm |
+=================================================================== |
+--- simd/jccolss2.asm (revision 829) |
++++ simd/jccolss2.asm (working copy) |
+@@ -34,7 +34,7 @@ |
+ SECTION SEG_CONST |
- EXTN(jsimd_h2v1_fancy_upsample_sse2): |
- push ebp |
-@@ -214,7 +214,7 @@ |
- %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr |
+ alignz 16 |
+- global EXTN(jconst_rgb_ycc_convert_sse2) |
++ global EXTN(jconst_rgb_ycc_convert_sse2) PRIVATE |
- align 16 |
-- global EXTN(jsimd_h2v2_fancy_upsample_sse2) |
-+ global EXTN(jsimd_h2v2_fancy_upsample_sse2) PRIVATE |
+ EXTN(jconst_rgb_ycc_convert_sse2): |
- EXTN(jsimd_h2v2_fancy_upsample_sse2): |
- push ebp |
-@@ -538,7 +538,7 @@ |
- %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr |
+@@ -48,6 +48,9 @@ |
+ alignz 16 |
+ |
+ ; -------------------------------------------------------------------------- |
++ SECTION SEG_TEXT |
++ BITS 32 |
++ |
+ %include "jcclrss2.asm" |
+ |
+ %undef RGB_RED |
+@@ -54,10 +57,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 0 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 2 |
+-%define RGB_PIXELSIZE 3 |
++%define RGB_RED EXT_RGB_RED |
++%define RGB_GREEN EXT_RGB_GREEN |
++%define RGB_BLUE EXT_RGB_BLUE |
++%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE |
+ %define jsimd_rgb_ycc_convert_sse2 jsimd_extrgb_ycc_convert_sse2 |
+ %include "jcclrss2.asm" |
+ |
+@@ -65,10 +68,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 0 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 2 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_RGBX_RED |
++%define RGB_GREEN EXT_RGBX_GREEN |
++%define RGB_BLUE EXT_RGBX_BLUE |
++%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE |
+ %define jsimd_rgb_ycc_convert_sse2 jsimd_extrgbx_ycc_convert_sse2 |
+ %include "jcclrss2.asm" |
+ |
+@@ -76,10 +79,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 2 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 0 |
+-%define RGB_PIXELSIZE 3 |
++%define RGB_RED EXT_BGR_RED |
++%define RGB_GREEN EXT_BGR_GREEN |
++%define RGB_BLUE EXT_BGR_BLUE |
++%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE |
+ %define jsimd_rgb_ycc_convert_sse2 jsimd_extbgr_ycc_convert_sse2 |
+ %include "jcclrss2.asm" |
+ |
+@@ -87,10 +90,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 2 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 0 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_BGRX_RED |
++%define RGB_GREEN EXT_BGRX_GREEN |
++%define RGB_BLUE EXT_BGRX_BLUE |
++%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE |
+ %define jsimd_rgb_ycc_convert_sse2 jsimd_extbgrx_ycc_convert_sse2 |
+ %include "jcclrss2.asm" |
+ |
+@@ -98,10 +101,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 3 |
+-%define RGB_GREEN 2 |
+-%define RGB_BLUE 1 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_XBGR_RED |
++%define RGB_GREEN EXT_XBGR_GREEN |
++%define RGB_BLUE EXT_XBGR_BLUE |
++%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE |
+ %define jsimd_rgb_ycc_convert_sse2 jsimd_extxbgr_ycc_convert_sse2 |
+ %include "jcclrss2.asm" |
+ |
+@@ -109,9 +112,9 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 1 |
+-%define RGB_GREEN 2 |
+-%define RGB_BLUE 3 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_XRGB_RED |
++%define RGB_GREEN EXT_XRGB_GREEN |
++%define RGB_BLUE EXT_XRGB_BLUE |
++%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE |
+ %define jsimd_rgb_ycc_convert_sse2 jsimd_extxrgb_ycc_convert_sse2 |
+ %include "jcclrss2.asm" |
+Index: simd/jcqnt3dn.asm |
+=================================================================== |
+--- simd/jcqnt3dn.asm (revision 829) |
++++ simd/jcqnt3dn.asm (working copy) |
+@@ -35,7 +35,7 @@ |
+ %define workspace ebp+16 ; FAST_FLOAT * workspace |
align 16 |
-- global EXTN(jsimd_h2v1_upsample_sse2) |
-+ global EXTN(jsimd_h2v1_upsample_sse2) PRIVATE |
+- global EXTN(jsimd_convsamp_float_3dnow) |
++ global EXTN(jsimd_convsamp_float_3dnow) PRIVATE |
- EXTN(jsimd_h2v1_upsample_sse2): |
+ EXTN(jsimd_convsamp_float_3dnow): |
push ebp |
-@@ -637,7 +637,7 @@ |
- %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr |
+@@ -138,7 +138,7 @@ |
+ %define workspace ebp+16 ; FAST_FLOAT * workspace |
align 16 |
-- global EXTN(jsimd_h2v2_upsample_sse2) |
-+ global EXTN(jsimd_h2v2_upsample_sse2) PRIVATE |
+- global EXTN(jsimd_quantize_float_3dnow) |
++ global EXTN(jsimd_quantize_float_3dnow) PRIVATE |
- EXTN(jsimd_h2v2_upsample_sse2): |
+ EXTN(jsimd_quantize_float_3dnow): |
push ebp |
-Index: simd/jiss2flt-64.asm |
-=================================================================== |
---- simd/jiss2flt-64.asm (revision 829) |
-+++ simd/jiss2flt-64.asm (working copy) |
-@@ -38,7 +38,7 @@ |
- SECTION SEG_CONST |
- |
- alignz 16 |
-- global EXTN(jconst_idct_float_sse2) |
-+ global EXTN(jconst_idct_float_sse2) PRIVATE |
+@@ -228,3 +228,6 @@ |
+ pop ebp |
+ ret |
- EXTN(jconst_idct_float_sse2): |
- |
-@@ -74,7 +74,7 @@ |
- ; FAST_FLOAT workspace[DCTSIZE2] |
- |
- align 16 |
-- global EXTN(jsimd_idct_float_sse2) |
-+ global EXTN(jsimd_idct_float_sse2) PRIVATE |
- |
- EXTN(jsimd_idct_float_sse2): |
- push rbp |
-Index: simd/jfss2int-64.asm |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jcqntmmx.asm |
=================================================================== |
---- simd/jfss2int-64.asm (revision 829) |
-+++ simd/jfss2int-64.asm (working copy) |
-@@ -67,7 +67,7 @@ |
- SECTION SEG_CONST |
- |
- alignz 16 |
-- global EXTN(jconst_fdct_islow_sse2) |
-+ global EXTN(jconst_fdct_islow_sse2) PRIVATE |
+--- simd/jcqntmmx.asm (revision 829) |
++++ simd/jcqntmmx.asm (working copy) |
+@@ -35,7 +35,7 @@ |
+ %define workspace ebp+16 ; DCTELEM * workspace |
- EXTN(jconst_fdct_islow_sse2): |
+ align 16 |
+- global EXTN(jsimd_convsamp_mmx) |
++ global EXTN(jsimd_convsamp_mmx) PRIVATE |
-@@ -101,7 +101,7 @@ |
- %define WK_NUM 6 |
+ EXTN(jsimd_convsamp_mmx): |
+ push ebp |
+@@ -140,7 +140,7 @@ |
+ %define workspace ebp+16 ; DCTELEM * workspace |
align 16 |
-- global EXTN(jsimd_fdct_islow_sse2) |
-+ global EXTN(jsimd_fdct_islow_sse2) PRIVATE |
+- global EXTN(jsimd_quantize_mmx) |
++ global EXTN(jsimd_quantize_mmx) PRIVATE |
- EXTN(jsimd_fdct_islow_sse2): |
- push rbp |
-Index: simd/jcqnts2f.asm |
+ EXTN(jsimd_quantize_mmx): |
+ push ebp |
+@@ -269,3 +269,6 @@ |
+ pop ebp |
+ ret |
+ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jcqnts2f-64.asm |
=================================================================== |
---- simd/jcqnts2f.asm (revision 829) |
-+++ simd/jcqnts2f.asm (working copy) |
-@@ -35,7 +35,7 @@ |
- %define workspace ebp+16 ; FAST_FLOAT * workspace |
+--- simd/jcqnts2f-64.asm (revision 829) |
++++ simd/jcqnts2f-64.asm (working copy) |
+@@ -1,5 +1,5 @@ |
+ ; |
+-; jcqnts2f.asm - sample data conversion and quantization (64-bit SSE & SSE2) |
++; jcqnts2f-64.asm - sample data conversion and quantization (64-bit SSE & SSE2) |
+ ; |
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
+ ; Copyright 2009 D. R. Commander |
+@@ -36,13 +36,14 @@ |
+ ; r12 = FAST_FLOAT * workspace |
align 16 |
- global EXTN(jsimd_convsamp_float_sse2) |
+ global EXTN(jsimd_convsamp_float_sse2) PRIVATE |
EXTN(jsimd_convsamp_float_sse2): |
- push ebp |
-@@ -115,7 +115,7 @@ |
- %define workspace ebp+16 ; FAST_FLOAT * workspace |
+ push rbp |
++ mov rax,rsp |
+ mov rbp,rsp |
++ collect_args |
+ push rbx |
+- collect_args |
+ |
+ pcmpeqw xmm7,xmm7 |
+ psllw xmm7,7 |
+@@ -89,8 +90,8 @@ |
+ dec rcx |
+ jnz short .convloop |
+ |
++ pop rbx |
+ uncollect_args |
+- pop rbx |
+ pop rbp |
+ ret |
+ |
+@@ -109,10 +110,11 @@ |
+ ; r12 = FAST_FLOAT * workspace |
align 16 |
- global EXTN(jsimd_quantize_float_sse2) |
+ global EXTN(jsimd_quantize_float_sse2) PRIVATE |
EXTN(jsimd_quantize_float_sse2): |
- push ebp |
-Index: simd/jdmrgss2.asm |
+ push rbp |
++ mov rax,rsp |
+ mov rbp,rsp |
+ collect_args |
+ |
+@@ -150,3 +152,7 @@ |
+ uncollect_args |
+ pop rbp |
+ ret |
++ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jcqnts2f.asm |
=================================================================== |
---- simd/jdmrgss2.asm (revision 829) |
-+++ simd/jdmrgss2.asm (working copy) |
-@@ -40,7 +40,7 @@ |
- %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr |
- |
- align 16 |
-- global EXTN(jsimd_h2v1_merged_upsample_sse2) |
-+ global EXTN(jsimd_h2v1_merged_upsample_sse2) PRIVATE |
- |
- EXTN(jsimd_h2v1_merged_upsample_sse2): |
- push ebp |
-@@ -560,7 +560,7 @@ |
- %define output_buf(b) (b)+20 ; JSAMPARRAY output_buf |
+--- simd/jcqnts2f.asm (revision 829) |
++++ simd/jcqnts2f.asm (working copy) |
+@@ -35,7 +35,7 @@ |
+ %define workspace ebp+16 ; FAST_FLOAT * workspace |
align 16 |
-- global EXTN(jsimd_h2v2_merged_upsample_sse2) |
-+ global EXTN(jsimd_h2v2_merged_upsample_sse2) PRIVATE |
+- global EXTN(jsimd_convsamp_float_sse2) |
++ global EXTN(jsimd_convsamp_float_sse2) PRIVATE |
- EXTN(jsimd_h2v2_merged_upsample_sse2): |
+ EXTN(jsimd_convsamp_float_sse2): |
push ebp |
-Index: simd/jfmmxint.asm |
-=================================================================== |
---- simd/jfmmxint.asm (revision 829) |
-+++ simd/jfmmxint.asm (working copy) |
-@@ -66,7 +66,7 @@ |
- SECTION SEG_CONST |
- |
- alignz 16 |
-- global EXTN(jconst_fdct_islow_mmx) |
-+ global EXTN(jconst_fdct_islow_mmx) PRIVATE |
- |
- EXTN(jconst_fdct_islow_mmx): |
- |
-@@ -101,7 +101,7 @@ |
- %define WK_NUM 2 |
+@@ -115,7 +115,7 @@ |
+ %define workspace ebp+16 ; FAST_FLOAT * workspace |
align 16 |
-- global EXTN(jsimd_fdct_islow_mmx) |
-+ global EXTN(jsimd_fdct_islow_mmx) PRIVATE |
+- global EXTN(jsimd_quantize_float_sse2) |
++ global EXTN(jsimd_quantize_float_sse2) PRIVATE |
- EXTN(jsimd_fdct_islow_mmx): |
+ EXTN(jsimd_quantize_float_sse2): |
push ebp |
-Index: simd/jcgryss2-64.asm |
+@@ -166,3 +166,6 @@ |
+ pop ebp |
+ ret |
+ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jcqnts2i-64.asm |
=================================================================== |
---- simd/jcgryss2-64.asm (revision 829) |
-+++ simd/jcgryss2-64.asm (working copy) |
-@@ -37,7 +37,7 @@ |
+--- simd/jcqnts2i-64.asm (revision 829) |
++++ simd/jcqnts2i-64.asm (working copy) |
+@@ -1,5 +1,5 @@ |
+ ; |
+-; jcqnts2i.asm - sample data conversion and quantization (64-bit SSE2) |
++; jcqnts2i-64.asm - sample data conversion and quantization (64-bit SSE2) |
+ ; |
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
+ ; Copyright 2009 D. R. Commander |
+@@ -36,13 +36,14 @@ |
+ ; r12 = DCTELEM * workspace |
align 16 |
+- global EXTN(jsimd_convsamp_sse2) |
++ global EXTN(jsimd_convsamp_sse2) PRIVATE |
-- global EXTN(jsimd_rgb_gray_convert_sse2) |
-+ global EXTN(jsimd_rgb_gray_convert_sse2) PRIVATE |
+ EXTN(jsimd_convsamp_sse2): |
+ push rbp |
++ mov rax,rsp |
+ mov rbp,rsp |
++ collect_args |
+ push rbx |
+- collect_args |
+ |
+ pxor xmm6,xmm6 ; xmm6=(all 0's) |
+ pcmpeqw xmm7,xmm7 |
+@@ -84,8 +85,8 @@ |
+ dec rcx |
+ jnz short .convloop |
+ |
++ pop rbx |
+ uncollect_args |
+- pop rbx |
+ pop rbp |
+ ret |
+ |
+@@ -111,10 +112,11 @@ |
+ ; r12 = DCTELEM * workspace |
+ |
+ align 16 |
+- global EXTN(jsimd_quantize_sse2) |
++ global EXTN(jsimd_quantize_sse2) PRIVATE |
- EXTN(jsimd_rgb_gray_convert_sse2): |
+ EXTN(jsimd_quantize_sse2): |
push rbp |
++ mov rax,rsp |
+ mov rbp,rsp |
+ collect_args |
+ |
+@@ -179,3 +181,7 @@ |
+ uncollect_args |
+ pop rbp |
+ ret |
++ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
Index: simd/jcqnts2i.asm |
=================================================================== |
--- simd/jcqnts2i.asm (revision 829) |
@@ -726,112 +11650,92 @@ Index: simd/jcqnts2i.asm |
EXTN(jsimd_quantize_sse2): |
push ebp |
-Index: simd/jiss2fst-64.asm |
-=================================================================== |
---- simd/jiss2fst-64.asm (revision 829) |
-+++ simd/jiss2fst-64.asm (working copy) |
-@@ -60,7 +60,7 @@ |
- %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) |
- |
- alignz 16 |
-- global EXTN(jconst_idct_ifast_sse2) |
-+ global EXTN(jconst_idct_ifast_sse2) PRIVATE |
- |
- EXTN(jconst_idct_ifast_sse2): |
- |
-@@ -93,7 +93,7 @@ |
- %define WK_NUM 2 |
+@@ -195,3 +195,6 @@ |
+ pop ebp |
+ ret |
- align 16 |
-- global EXTN(jsimd_idct_ifast_sse2) |
-+ global EXTN(jsimd_idct_ifast_sse2) PRIVATE |
- |
- EXTN(jsimd_idct_ifast_sse2): |
- push rbp |
-Index: simd/jiss2flt.asm |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jcqntsse.asm |
=================================================================== |
---- simd/jiss2flt.asm (revision 829) |
-+++ simd/jiss2flt.asm (working copy) |
-@@ -37,7 +37,7 @@ |
- SECTION SEG_CONST |
- |
- alignz 16 |
-- global EXTN(jconst_idct_float_sse2) |
-+ global EXTN(jconst_idct_float_sse2) PRIVATE |
- |
- EXTN(jconst_idct_float_sse2): |
- |
-@@ -73,7 +73,7 @@ |
- ; FAST_FLOAT workspace[DCTSIZE2] |
+--- simd/jcqntsse.asm (revision 829) |
++++ simd/jcqntsse.asm (working copy) |
+@@ -35,7 +35,7 @@ |
+ %define workspace ebp+16 ; FAST_FLOAT * workspace |
align 16 |
-- global EXTN(jsimd_idct_float_sse2) |
-+ global EXTN(jsimd_idct_float_sse2) PRIVATE |
+- global EXTN(jsimd_convsamp_float_sse) |
++ global EXTN(jsimd_convsamp_float_sse) PRIVATE |
- EXTN(jsimd_idct_float_sse2): |
+ EXTN(jsimd_convsamp_float_sse): |
push ebp |
-Index: simd/jiss2int.asm |
-=================================================================== |
---- simd/jiss2int.asm (revision 829) |
-+++ simd/jiss2int.asm (working copy) |
-@@ -66,7 +66,7 @@ |
- SECTION SEG_CONST |
+@@ -138,7 +138,7 @@ |
+ %define workspace ebp+16 ; FAST_FLOAT * workspace |
- alignz 16 |
-- global EXTN(jconst_idct_islow_sse2) |
-+ global EXTN(jconst_idct_islow_sse2) PRIVATE |
+ align 16 |
+- global EXTN(jsimd_quantize_float_sse) |
++ global EXTN(jsimd_quantize_float_sse) PRIVATE |
- EXTN(jconst_idct_islow_sse2): |
+ EXTN(jsimd_quantize_float_sse): |
+ push ebp |
+@@ -206,3 +206,6 @@ |
+ pop ebp |
+ ret |
-@@ -105,7 +105,7 @@ |
- %define WK_NUM 12 |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jcsammmx.asm |
+=================================================================== |
+--- simd/jcsammmx.asm (revision 829) |
++++ simd/jcsammmx.asm (working copy) |
+@@ -40,7 +40,7 @@ |
+ %define output_data(b) (b)+28 ; JSAMPARRAY output_data |
align 16 |
-- global EXTN(jsimd_idct_islow_sse2) |
-+ global EXTN(jsimd_idct_islow_sse2) PRIVATE |
+- global EXTN(jsimd_h2v1_downsample_mmx) |
++ global EXTN(jsimd_h2v1_downsample_mmx) PRIVATE |
- EXTN(jsimd_idct_islow_sse2): |
+ EXTN(jsimd_h2v1_downsample_mmx): |
push ebp |
-Index: simd/jfsseflt-64.asm |
-=================================================================== |
---- simd/jfsseflt-64.asm (revision 829) |
-+++ simd/jfsseflt-64.asm (working copy) |
-@@ -38,7 +38,7 @@ |
- SECTION SEG_CONST |
- |
- alignz 16 |
-- global EXTN(jconst_fdct_float_sse) |
-+ global EXTN(jconst_fdct_float_sse) PRIVATE |
+@@ -95,7 +95,7 @@ |
- EXTN(jconst_fdct_float_sse): |
+ mov eax, JDIMENSION [v_samp(ebp)] ; rowctr |
+ test eax,eax |
+- jle short .return |
++ jle near .return |
-@@ -65,7 +65,7 @@ |
- %define WK_NUM 2 |
+ mov edx, 0x00010000 ; bias pattern |
+ movd mm7,edx |
+@@ -182,7 +182,7 @@ |
+ %define output_data(b) (b)+28 ; JSAMPARRAY output_data |
align 16 |
-- global EXTN(jsimd_fdct_float_sse) |
-+ global EXTN(jsimd_fdct_float_sse) PRIVATE |
- |
- EXTN(jsimd_fdct_float_sse): |
- push rbp |
-Index: simd/jccolss2-64.asm |
-=================================================================== |
---- simd/jccolss2-64.asm (revision 829) |
-+++ simd/jccolss2-64.asm (working copy) |
-@@ -34,7 +34,7 @@ |
- SECTION SEG_CONST |
- |
- alignz 16 |
-- global EXTN(jconst_rgb_ycc_convert_sse2) |
-+ global EXTN(jconst_rgb_ycc_convert_sse2) PRIVATE |
+- global EXTN(jsimd_h2v2_downsample_mmx) |
++ global EXTN(jsimd_h2v2_downsample_mmx) PRIVATE |
- EXTN(jconst_rgb_ycc_convert_sse2): |
+ EXTN(jsimd_h2v2_downsample_mmx): |
+ push ebp |
+@@ -319,3 +319,6 @@ |
+ pop ebp |
+ ret |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
Index: simd/jcsamss2-64.asm |
=================================================================== |
--- simd/jcsamss2-64.asm (revision 829) |
+++ simd/jcsamss2-64.asm (working copy) |
-@@ -41,7 +41,7 @@ |
+@@ -1,5 +1,5 @@ |
+ ; |
+-; jcsamss2.asm - downsampling (64-bit SSE2) |
++; jcsamss2-64.asm - downsampling (64-bit SSE2) |
+ ; |
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
+ ; Copyright 2009 D. R. Commander |
+@@ -41,10 +41,11 @@ |
; r15 = JSAMPARRAY output_data |
align 16 |
@@ -840,7 +11744,11 @@ Index: simd/jcsamss2-64.asm |
EXTN(jsimd_h2v1_downsample_sse2): |
push rbp |
-@@ -185,7 +185,7 @@ |
++ mov rax,rsp |
+ mov rbp,rsp |
+ collect_args |
+ |
+@@ -184,10 +185,11 @@ |
; r15 = JSAMPARRAY output_data |
align 16 |
@@ -849,11 +11757,102 @@ Index: simd/jcsamss2-64.asm |
EXTN(jsimd_h2v2_downsample_sse2): |
push rbp |
++ mov rax,rsp |
+ mov rbp,rsp |
+ collect_args |
+ |
+@@ -322,3 +324,7 @@ |
+ uncollect_args |
+ pop rbp |
+ ret |
++ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jcsamss2.asm |
+=================================================================== |
+--- simd/jcsamss2.asm (revision 829) |
++++ simd/jcsamss2.asm (working copy) |
+@@ -40,7 +40,7 @@ |
+ %define output_data(b) (b)+28 ; JSAMPARRAY output_data |
+ |
+ align 16 |
+- global EXTN(jsimd_h2v1_downsample_sse2) |
++ global EXTN(jsimd_h2v1_downsample_sse2) PRIVATE |
+ |
+ EXTN(jsimd_h2v1_downsample_sse2): |
+ push ebp |
+@@ -195,7 +195,7 @@ |
+ %define output_data(b) (b)+28 ; JSAMPARRAY output_data |
+ |
+ align 16 |
+- global EXTN(jsimd_h2v2_downsample_sse2) |
++ global EXTN(jsimd_h2v2_downsample_sse2) PRIVATE |
+ |
+ EXTN(jsimd_h2v2_downsample_sse2): |
+ push ebp |
+@@ -346,3 +346,6 @@ |
+ pop ebp |
+ ret |
+ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jdclrmmx.asm |
+=================================================================== |
+--- simd/jdclrmmx.asm (revision 829) |
++++ simd/jdclrmmx.asm (working copy) |
+@@ -19,8 +19,6 @@ |
+ %include "jcolsamp.inc" |
+ |
+ ; -------------------------------------------------------------------------- |
+- SECTION SEG_TEXT |
+- BITS 32 |
+ ; |
+ ; Convert some rows of samples to the output colorspace. |
+ ; |
+@@ -42,7 +40,7 @@ |
+ %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr |
+ |
+ align 16 |
+- global EXTN(jsimd_ycc_rgb_convert_mmx) |
++ global EXTN(jsimd_ycc_rgb_convert_mmx) PRIVATE |
+ |
+ EXTN(jsimd_ycc_rgb_convert_mmx): |
+ push ebp |
+@@ -402,3 +400,6 @@ |
+ pop ebp |
+ ret |
+ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
Index: simd/jdclrss2-64.asm |
=================================================================== |
--- simd/jdclrss2-64.asm (revision 829) |
+++ simd/jdclrss2-64.asm (working copy) |
-@@ -39,7 +39,7 @@ |
+@@ -1,8 +1,8 @@ |
+ ; |
+-; jdclrss2.asm - colorspace conversion (64-bit SSE2) |
++; jdclrss2-64.asm - colorspace conversion (64-bit SSE2) |
+ ; |
+-; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
+-; Copyright 2009 D. R. Commander |
++; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB |
++; Copyright 2009, 2012 D. R. Commander |
+ ; |
+ ; Based on |
+ ; x86 SIMD extension for IJG JPEG library |
+@@ -20,8 +20,6 @@ |
+ %include "jcolsamp.inc" |
+ |
+ ; -------------------------------------------------------------------------- |
+- SECTION SEG_TEXT |
+- BITS 64 |
+ ; |
+ ; Convert some rows of samples to the output colorspace. |
+ ; |
+@@ -41,7 +39,7 @@ |
%define WK_NUM 2 |
align 16 |
@@ -862,76 +11861,776 @@ Index: simd/jdclrss2-64.asm |
EXTN(jsimd_ycc_rgb_convert_sse2): |
push rbp |
-Index: simd/jdcolmmx.asm |
-=================================================================== |
---- simd/jdcolmmx.asm (revision 829) |
-+++ simd/jdcolmmx.asm (working copy) |
-@@ -35,7 +35,7 @@ |
- SECTION SEG_CONST |
- |
- alignz 16 |
-- global EXTN(jconst_ycc_rgb_convert_mmx) |
-+ global EXTN(jconst_ycc_rgb_convert_mmx) PRIVATE |
- |
- EXTN(jconst_ycc_rgb_convert_mmx): |
- |
-Index: simd/jcclrmmx.asm |
+@@ -51,8 +49,8 @@ |
+ mov [rsp],rax |
+ mov rbp,rsp ; rbp = aligned rbp |
+ lea rsp, [wk(0)] |
++ collect_args |
+ push rbx |
+- collect_args |
+ |
+ mov rcx, r10 ; num_cols |
+ test rcx,rcx |
+@@ -72,7 +70,7 @@ |
+ pop rcx |
+ |
+ mov rdi, r13 |
+- mov rax, r14 |
++ mov eax, r14d |
+ test rax,rax |
+ jle near .return |
+ .rowloop: |
+@@ -253,17 +251,13 @@ |
+ movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA |
+ movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD |
+ movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF |
+- add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr |
+ jmp short .out0 |
+ .out1: ; --(unaligned)----------------- |
+- pcmpeqb xmmH,xmmH ; xmmH=(all 1's) |
+- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [rdi], xmmA |
+- add rdi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [rdi], xmmD |
+- add rdi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmF,xmmH ; movntdqu XMMWORD [rdi], xmmF |
+- add rdi, byte SIZEOF_XMMWORD ; outptr |
++ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA |
++ movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD |
++ movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF |
+ .out0: |
++ add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr |
+ sub rcx, byte SIZEOF_XMMWORD |
+ jz near .nextrow |
+ |
+@@ -273,14 +267,12 @@ |
+ jmp near .columnloop |
+ |
+ .column_st32: |
+- pcmpeqb xmmH,xmmH ; xmmH=(all 1's) |
+ lea rcx, [rcx+rcx*2] ; imul ecx, RGB_PIXELSIZE |
+ cmp rcx, byte 2*SIZEOF_XMMWORD |
+ jb short .column_st16 |
+- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [rdi], xmmA |
+- add rdi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [rdi], xmmD |
+- add rdi, byte SIZEOF_XMMWORD ; outptr |
++ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA |
++ movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD |
++ add rdi, byte 2*SIZEOF_XMMWORD ; outptr |
+ movdqa xmmA,xmmF |
+ sub rcx, byte 2*SIZEOF_XMMWORD |
+ jmp short .column_st15 |
+@@ -287,50 +279,44 @@ |
+ .column_st16: |
+ cmp rcx, byte SIZEOF_XMMWORD |
+ jb short .column_st15 |
+- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [rdi], xmmA |
++ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA |
+ add rdi, byte SIZEOF_XMMWORD ; outptr |
+ movdqa xmmA,xmmD |
+ sub rcx, byte SIZEOF_XMMWORD |
+ .column_st15: |
+- mov rax,rcx |
+- xor rcx, byte 0x0F |
+- shl rcx, 2 |
+- movd xmmB,ecx |
+- psrlq xmmH,4 |
+- pcmpeqb xmmE,xmmE |
+- psrlq xmmH,xmmB |
+- psrlq xmmE,xmmB |
+- punpcklbw xmmE,xmmH |
+- ; ---------------- |
+- mov rcx,rdi |
+- and rcx, byte SIZEOF_XMMWORD-1 |
+- jz short .adj0 |
+- add rax,rcx |
+- cmp rax, byte SIZEOF_XMMWORD |
+- ja short .adj0 |
+- and rdi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary |
+- shl rcx, 3 ; pslldq xmmA,ecx & pslldq xmmE,rcx |
+- movdqa xmmG,xmmA |
+- movdqa xmmC,xmmE |
+- pslldq xmmA, SIZEOF_XMMWORD/2 |
+- pslldq xmmE, SIZEOF_XMMWORD/2 |
+- movd xmmD,ecx |
+- sub rcx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT |
+- jb short .adj1 |
+- movd xmmF,ecx |
+- psllq xmmA,xmmF |
+- psllq xmmE,xmmF |
+- jmp short .adj0 |
+-.adj1: neg ecx |
+- movd xmmF,ecx |
+- psrlq xmmA,xmmF |
+- psrlq xmmE,xmmF |
+- psllq xmmG,xmmD |
+- psllq xmmC,xmmD |
+- por xmmA,xmmG |
+- por xmmE,xmmC |
+-.adj0: ; ---------------- |
+- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA |
++ ; Store the lower 8 bytes of xmmA to the output when it has enough |
++ ; space. |
++ cmp rcx, byte SIZEOF_MMWORD |
++ jb short .column_st7 |
++ movq XMM_MMWORD [rdi], xmmA |
++ add rdi, byte SIZEOF_MMWORD |
++ sub rcx, byte SIZEOF_MMWORD |
++ psrldq xmmA, SIZEOF_MMWORD |
++.column_st7: |
++ ; Store the lower 4 bytes of xmmA to the output when it has enough |
++ ; space. |
++ cmp rcx, byte SIZEOF_DWORD |
++ jb short .column_st3 |
++ movd XMM_DWORD [rdi], xmmA |
++ add rdi, byte SIZEOF_DWORD |
++ sub rcx, byte SIZEOF_DWORD |
++ psrldq xmmA, SIZEOF_DWORD |
++.column_st3: |
++ ; Store the lower 2 bytes of rax to the output when it has enough |
++ ; space. |
++ movd eax, xmmA |
++ cmp rcx, byte SIZEOF_WORD |
++ jb short .column_st1 |
++ mov WORD [rdi], ax |
++ add rdi, byte SIZEOF_WORD |
++ sub rcx, byte SIZEOF_WORD |
++ shr rax, 16 |
++.column_st1: |
++ ; Store the lower 1 byte of rax to the output when it has enough |
++ ; space. |
++ test rcx, rcx |
++ jz short .nextrow |
++ mov BYTE [rdi], al |
+ |
+ %else ; RGB_PIXELSIZE == 4 ; ----------- |
+ |
+@@ -375,19 +361,14 @@ |
+ movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD |
+ movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC |
+ movntdq XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH |
+- add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr |
+ jmp short .out0 |
+ .out1: ; --(unaligned)----------------- |
+- pcmpeqb xmmE,xmmE ; xmmE=(all 1's) |
+- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA |
+- add rdi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [rdi], xmmD |
+- add rdi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmC,xmmE ; movntdqu XMMWORD [rdi], xmmC |
+- add rdi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmH,xmmE ; movntdqu XMMWORD [rdi], xmmH |
+- add rdi, byte SIZEOF_XMMWORD ; outptr |
++ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA |
++ movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD |
++ movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC |
++ movdqu XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH |
+ .out0: |
++ add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr |
+ sub rcx, byte SIZEOF_XMMWORD |
+ jz near .nextrow |
+ |
+@@ -397,13 +378,11 @@ |
+ jmp near .columnloop |
+ |
+ .column_st32: |
+- pcmpeqb xmmE,xmmE ; xmmE=(all 1's) |
+ cmp rcx, byte SIZEOF_XMMWORD/2 |
+ jb short .column_st16 |
+- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA |
+- add rdi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [rdi], xmmD |
+- add rdi, byte SIZEOF_XMMWORD ; outptr |
++ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA |
++ movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD |
++ add rdi, byte 2*SIZEOF_XMMWORD ; outptr |
+ movdqa xmmA,xmmC |
+ movdqa xmmD,xmmH |
+ sub rcx, byte SIZEOF_XMMWORD/2 |
+@@ -410,50 +389,25 @@ |
+ .column_st16: |
+ cmp rcx, byte SIZEOF_XMMWORD/4 |
+ jb short .column_st15 |
+- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA |
++ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA |
+ add rdi, byte SIZEOF_XMMWORD ; outptr |
+ movdqa xmmA,xmmD |
+ sub rcx, byte SIZEOF_XMMWORD/4 |
+ .column_st15: |
+- cmp rcx, byte SIZEOF_XMMWORD/16 |
+- jb near .nextrow |
+- mov rax,rcx |
+- xor rcx, byte 0x03 |
+- inc rcx |
+- shl rcx, 4 |
+- movd xmmF,ecx |
+- psrlq xmmE,xmmF |
+- punpcklbw xmmE,xmmE |
+- ; ---------------- |
+- mov rcx,rdi |
+- and rcx, byte SIZEOF_XMMWORD-1 |
+- jz short .adj0 |
+- lea rax, [rcx+rax*4] ; RGB_PIXELSIZE |
+- cmp rax, byte SIZEOF_XMMWORD |
+- ja short .adj0 |
+- and rdi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary |
+- shl rcx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx |
+- movdqa xmmB,xmmA |
+- movdqa xmmG,xmmE |
+- pslldq xmmA, SIZEOF_XMMWORD/2 |
+- pslldq xmmE, SIZEOF_XMMWORD/2 |
+- movd xmmC,ecx |
+- sub rcx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT |
+- jb short .adj1 |
+- movd xmmH,ecx |
+- psllq xmmA,xmmH |
+- psllq xmmE,xmmH |
+- jmp short .adj0 |
+-.adj1: neg rcx |
+- movd xmmH,ecx |
+- psrlq xmmA,xmmH |
+- psrlq xmmE,xmmH |
+- psllq xmmB,xmmC |
+- psllq xmmG,xmmC |
+- por xmmA,xmmB |
+- por xmmE,xmmG |
+-.adj0: ; ---------------- |
+- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA |
++ ; Store two pixels (8 bytes) of xmmA to the output when it has enough |
++ ; space. |
++ cmp rcx, byte SIZEOF_XMMWORD/8 |
++ jb short .column_st7 |
++ movq MMWORD [rdi], xmmA |
++ add rdi, byte SIZEOF_XMMWORD/8*4 |
++ sub rcx, byte SIZEOF_XMMWORD/8 |
++ psrldq xmmA, SIZEOF_XMMWORD/8*4 |
++.column_st7: |
++ ; Store one pixel (4 bytes) of xmmA to the output when it has enough |
++ ; space. |
++ test rcx, rcx |
++ jz short .nextrow |
++ movd XMM_DWORD [rdi], xmmA |
+ |
+ %endif ; RGB_PIXELSIZE ; --------------- |
+ |
+@@ -475,9 +429,13 @@ |
+ sfence ; flush the write buffer |
+ |
+ .return: |
++ pop rbx |
+ uncollect_args |
+- pop rbx |
+ mov rsp,rbp ; rsp <- aligned rbp |
+ pop rsp ; rsp <- original rbp |
+ pop rbp |
+ ret |
++ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jdclrss2.asm |
=================================================================== |
---- simd/jcclrmmx.asm (revision 829) |
-+++ simd/jcclrmmx.asm (working copy) |
-@@ -40,7 +40,7 @@ |
+--- simd/jdclrss2.asm (revision 829) |
++++ simd/jdclrss2.asm (working copy) |
+@@ -1,7 +1,8 @@ |
+ ; |
+ ; jdclrss2.asm - colorspace conversion (SSE2) |
+ ; |
+-; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
++; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB |
++; Copyright 2012 D. R. Commander |
+ ; |
+ ; Based on |
+ ; x86 SIMD extension for IJG JPEG library |
+@@ -19,8 +20,6 @@ |
+ %include "jcolsamp.inc" |
+ |
+ ; -------------------------------------------------------------------------- |
+- SECTION SEG_TEXT |
+- BITS 32 |
+ ; |
+ ; Convert some rows of samples to the output colorspace. |
+ ; |
+@@ -42,7 +41,7 @@ |
%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr |
align 16 |
-- global EXTN(jsimd_rgb_ycc_convert_mmx) |
-+ global EXTN(jsimd_rgb_ycc_convert_mmx) PRIVATE |
+- global EXTN(jsimd_ycc_rgb_convert_sse2) |
++ global EXTN(jsimd_ycc_rgb_convert_sse2) PRIVATE |
- EXTN(jsimd_rgb_ycc_convert_mmx): |
+ EXTN(jsimd_ycc_rgb_convert_sse2): |
push ebp |
-Index: simd/jfsseflt.asm |
+@@ -264,17 +263,13 @@ |
+ movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA |
+ movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD |
+ movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF |
+- add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr |
+ jmp short .out0 |
+ .out1: ; --(unaligned)----------------- |
+- pcmpeqb xmmH,xmmH ; xmmH=(all 1's) |
+- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA |
+- add edi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [edi], xmmD |
+- add edi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmF,xmmH ; movntdqu XMMWORD [edi], xmmF |
+- add edi, byte SIZEOF_XMMWORD ; outptr |
++ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA |
++ movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD |
++ movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF |
+ .out0: |
++ add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr |
+ sub ecx, byte SIZEOF_XMMWORD |
+ jz near .nextrow |
+ |
+@@ -285,14 +280,12 @@ |
+ alignx 16,7 |
+ |
+ .column_st32: |
+- pcmpeqb xmmH,xmmH ; xmmH=(all 1's) |
+ lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE |
+ cmp ecx, byte 2*SIZEOF_XMMWORD |
+ jb short .column_st16 |
+- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA |
+- add edi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [edi], xmmD |
+- add edi, byte SIZEOF_XMMWORD ; outptr |
++ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA |
++ movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD |
++ add edi, byte 2*SIZEOF_XMMWORD ; outptr |
+ movdqa xmmA,xmmF |
+ sub ecx, byte 2*SIZEOF_XMMWORD |
+ jmp short .column_st15 |
+@@ -299,50 +292,44 @@ |
+ .column_st16: |
+ cmp ecx, byte SIZEOF_XMMWORD |
+ jb short .column_st15 |
+- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA |
++ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA |
+ add edi, byte SIZEOF_XMMWORD ; outptr |
+ movdqa xmmA,xmmD |
+ sub ecx, byte SIZEOF_XMMWORD |
+ .column_st15: |
+- mov eax,ecx |
+- xor ecx, byte 0x0F |
+- shl ecx, 2 |
+- movd xmmB,ecx |
+- psrlq xmmH,4 |
+- pcmpeqb xmmE,xmmE |
+- psrlq xmmH,xmmB |
+- psrlq xmmE,xmmB |
+- punpcklbw xmmE,xmmH |
+- ; ---------------- |
+- mov ecx,edi |
+- and ecx, byte SIZEOF_XMMWORD-1 |
+- jz short .adj0 |
+- add eax,ecx |
+- cmp eax, byte SIZEOF_XMMWORD |
+- ja short .adj0 |
+- and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary |
+- shl ecx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx |
+- movdqa xmmG,xmmA |
+- movdqa xmmC,xmmE |
+- pslldq xmmA, SIZEOF_XMMWORD/2 |
+- pslldq xmmE, SIZEOF_XMMWORD/2 |
+- movd xmmD,ecx |
+- sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT |
+- jb short .adj1 |
+- movd xmmF,ecx |
+- psllq xmmA,xmmF |
+- psllq xmmE,xmmF |
+- jmp short .adj0 |
+-.adj1: neg ecx |
+- movd xmmF,ecx |
+- psrlq xmmA,xmmF |
+- psrlq xmmE,xmmF |
+- psllq xmmG,xmmD |
+- psllq xmmC,xmmD |
+- por xmmA,xmmG |
+- por xmmE,xmmC |
+-.adj0: ; ---------------- |
+- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA |
++ ; Store the lower 8 bytes of xmmA to the output when it has enough |
++ ; space. |
++ cmp ecx, byte SIZEOF_MMWORD |
++ jb short .column_st7 |
++ movq XMM_MMWORD [edi], xmmA |
++ add edi, byte SIZEOF_MMWORD |
++ sub ecx, byte SIZEOF_MMWORD |
++ psrldq xmmA, SIZEOF_MMWORD |
++.column_st7: |
++ ; Store the lower 4 bytes of xmmA to the output when it has enough |
++ ; space. |
++ cmp ecx, byte SIZEOF_DWORD |
++ jb short .column_st3 |
++ movd XMM_DWORD [edi], xmmA |
++ add edi, byte SIZEOF_DWORD |
++ sub ecx, byte SIZEOF_DWORD |
++ psrldq xmmA, SIZEOF_DWORD |
++.column_st3: |
++ ; Store the lower 2 bytes of eax to the output when it has enough |
++ ; space. |
++ movd eax, xmmA |
++ cmp ecx, byte SIZEOF_WORD |
++ jb short .column_st1 |
++ mov WORD [edi], ax |
++ add edi, byte SIZEOF_WORD |
++ sub ecx, byte SIZEOF_WORD |
++ shr eax, 16 |
++.column_st1: |
++ ; Store the lower 1 byte of eax to the output when it has enough |
++ ; space. |
++ test ecx, ecx |
++ jz short .nextrow |
++ mov BYTE [edi], al |
+ |
+ %else ; RGB_PIXELSIZE == 4 ; ----------- |
+ |
+@@ -387,19 +374,14 @@ |
+ movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD |
+ movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC |
+ movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH |
+- add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr |
+ jmp short .out0 |
+ .out1: ; --(unaligned)----------------- |
+- pcmpeqb xmmE,xmmE ; xmmE=(all 1's) |
+- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA |
+- add edi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [edi], xmmD |
+- add edi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmC,xmmE ; movntdqu XMMWORD [edi], xmmC |
+- add edi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmH,xmmE ; movntdqu XMMWORD [edi], xmmH |
+- add edi, byte SIZEOF_XMMWORD ; outptr |
++ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA |
++ movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD |
++ movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC |
++ movdqu XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH |
+ .out0: |
++ add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr |
+ sub ecx, byte SIZEOF_XMMWORD |
+ jz near .nextrow |
+ |
+@@ -410,13 +392,11 @@ |
+ alignx 16,7 |
+ |
+ .column_st32: |
+- pcmpeqb xmmE,xmmE ; xmmE=(all 1's) |
+ cmp ecx, byte SIZEOF_XMMWORD/2 |
+ jb short .column_st16 |
+- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA |
+- add edi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [edi], xmmD |
+- add edi, byte SIZEOF_XMMWORD ; outptr |
++ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA |
++ movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD |
++ add edi, byte 2*SIZEOF_XMMWORD ; outptr |
+ movdqa xmmA,xmmC |
+ movdqa xmmD,xmmH |
+ sub ecx, byte SIZEOF_XMMWORD/2 |
+@@ -423,50 +403,25 @@ |
+ .column_st16: |
+ cmp ecx, byte SIZEOF_XMMWORD/4 |
+ jb short .column_st15 |
+- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA |
++ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA |
+ add edi, byte SIZEOF_XMMWORD ; outptr |
+ movdqa xmmA,xmmD |
+ sub ecx, byte SIZEOF_XMMWORD/4 |
+ .column_st15: |
+- cmp ecx, byte SIZEOF_XMMWORD/16 |
+- jb short .nextrow |
+- mov eax,ecx |
+- xor ecx, byte 0x03 |
+- inc ecx |
+- shl ecx, 4 |
+- movd xmmF,ecx |
+- psrlq xmmE,xmmF |
+- punpcklbw xmmE,xmmE |
+- ; ---------------- |
+- mov ecx,edi |
+- and ecx, byte SIZEOF_XMMWORD-1 |
+- jz short .adj0 |
+- lea eax, [ecx+eax*4] ; RGB_PIXELSIZE |
+- cmp eax, byte SIZEOF_XMMWORD |
+- ja short .adj0 |
+- and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary |
+- shl ecx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx |
+- movdqa xmmB,xmmA |
+- movdqa xmmG,xmmE |
+- pslldq xmmA, SIZEOF_XMMWORD/2 |
+- pslldq xmmE, SIZEOF_XMMWORD/2 |
+- movd xmmC,ecx |
+- sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT |
+- jb short .adj1 |
+- movd xmmH,ecx |
+- psllq xmmA,xmmH |
+- psllq xmmE,xmmH |
+- jmp short .adj0 |
+-.adj1: neg ecx |
+- movd xmmH,ecx |
+- psrlq xmmA,xmmH |
+- psrlq xmmE,xmmH |
+- psllq xmmB,xmmC |
+- psllq xmmG,xmmC |
+- por xmmA,xmmB |
+- por xmmE,xmmG |
+-.adj0: ; ---------------- |
+- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA |
++ ; Store two pixels (8 bytes) of xmmA to the output when it has enough |
++ ; space. |
++ cmp ecx, byte SIZEOF_XMMWORD/8 |
++ jb short .column_st7 |
++ movq XMM_MMWORD [edi], xmmA |
++ add edi, byte SIZEOF_XMMWORD/8*4 |
++ sub ecx, byte SIZEOF_XMMWORD/8 |
++ psrldq xmmA, SIZEOF_XMMWORD/8*4 |
++.column_st7: |
++ ; Store one pixel (4 bytes) of xmmA to the output when it has enough |
++ ; space. |
++ test ecx, ecx |
++ jz short .nextrow |
++ movd XMM_DWORD [edi], xmmA |
+ |
+ %endif ; RGB_PIXELSIZE ; --------------- |
+ |
+@@ -500,3 +455,6 @@ |
+ pop ebp |
+ ret |
+ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jdcolmmx.asm |
=================================================================== |
---- simd/jfsseflt.asm (revision 829) |
-+++ simd/jfsseflt.asm (working copy) |
-@@ -37,7 +37,7 @@ |
+--- simd/jdcolmmx.asm (revision 829) |
++++ simd/jdcolmmx.asm (working copy) |
+@@ -35,7 +35,7 @@ |
SECTION SEG_CONST |
- alignz 16 |
-- global EXTN(jconst_fdct_float_sse) |
-+ global EXTN(jconst_fdct_float_sse) PRIVATE |
- |
- EXTN(jconst_fdct_float_sse): |
+ alignz 16 |
+- global EXTN(jconst_ycc_rgb_convert_mmx) |
++ global EXTN(jconst_ycc_rgb_convert_mmx) PRIVATE |
-@@ -65,7 +65,7 @@ |
- %define WK_NUM 2 |
+ EXTN(jconst_ycc_rgb_convert_mmx): |
- align 16 |
-- global EXTN(jsimd_fdct_float_sse) |
-+ global EXTN(jsimd_fdct_float_sse) PRIVATE |
+@@ -48,6 +48,9 @@ |
+ alignz 16 |
- EXTN(jsimd_fdct_float_sse): |
- push ebp |
-Index: simd/jdmrgss2-64.asm |
+ ; -------------------------------------------------------------------------- |
++ SECTION SEG_TEXT |
++ BITS 32 |
++ |
+ %include "jdclrmmx.asm" |
+ |
+ %undef RGB_RED |
+@@ -54,10 +57,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 0 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 2 |
+-%define RGB_PIXELSIZE 3 |
++%define RGB_RED EXT_RGB_RED |
++%define RGB_GREEN EXT_RGB_GREEN |
++%define RGB_BLUE EXT_RGB_BLUE |
++%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE |
+ %define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extrgb_convert_mmx |
+ %include "jdclrmmx.asm" |
+ |
+@@ -65,10 +68,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 0 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 2 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_RGBX_RED |
++%define RGB_GREEN EXT_RGBX_GREEN |
++%define RGB_BLUE EXT_RGBX_BLUE |
++%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE |
+ %define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extrgbx_convert_mmx |
+ %include "jdclrmmx.asm" |
+ |
+@@ -76,10 +79,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 2 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 0 |
+-%define RGB_PIXELSIZE 3 |
++%define RGB_RED EXT_BGR_RED |
++%define RGB_GREEN EXT_BGR_GREEN |
++%define RGB_BLUE EXT_BGR_BLUE |
++%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE |
+ %define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extbgr_convert_mmx |
+ %include "jdclrmmx.asm" |
+ |
+@@ -87,10 +90,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 2 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 0 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_BGRX_RED |
++%define RGB_GREEN EXT_BGRX_GREEN |
++%define RGB_BLUE EXT_BGRX_BLUE |
++%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE |
+ %define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extbgrx_convert_mmx |
+ %include "jdclrmmx.asm" |
+ |
+@@ -98,10 +101,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 3 |
+-%define RGB_GREEN 2 |
+-%define RGB_BLUE 1 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_XBGR_RED |
++%define RGB_GREEN EXT_XBGR_GREEN |
++%define RGB_BLUE EXT_XBGR_BLUE |
++%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE |
+ %define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extxbgr_convert_mmx |
+ %include "jdclrmmx.asm" |
+ |
+@@ -109,9 +112,9 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 1 |
+-%define RGB_GREEN 2 |
+-%define RGB_BLUE 3 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_XRGB_RED |
++%define RGB_GREEN EXT_XRGB_GREEN |
++%define RGB_BLUE EXT_XRGB_BLUE |
++%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE |
+ %define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extxrgb_convert_mmx |
+ %include "jdclrmmx.asm" |
+Index: simd/jdcolss2-64.asm |
=================================================================== |
---- simd/jdmrgss2-64.asm (revision 829) |
-+++ simd/jdmrgss2-64.asm (working copy) |
-@@ -39,7 +39,7 @@ |
- %define WK_NUM 3 |
+--- simd/jdcolss2-64.asm (revision 829) |
++++ simd/jdcolss2-64.asm (working copy) |
+@@ -1,5 +1,5 @@ |
+ ; |
+-; jdcolss2.asm - colorspace conversion (64-bit SSE2) |
++; jdcolss2-64.asm - colorspace conversion (64-bit SSE2) |
+ ; |
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
+ ; Copyright 2009 D. R. Commander |
+@@ -35,7 +35,7 @@ |
+ SECTION SEG_CONST |
- align 16 |
-- global EXTN(jsimd_h2v1_merged_upsample_sse2) |
-+ global EXTN(jsimd_h2v1_merged_upsample_sse2) PRIVATE |
+ alignz 16 |
+- global EXTN(jconst_ycc_rgb_convert_sse2) |
++ global EXTN(jconst_ycc_rgb_convert_sse2) PRIVATE |
- EXTN(jsimd_h2v1_merged_upsample_sse2): |
- push rbp |
-@@ -543,7 +543,7 @@ |
- ; r13 = JSAMPARRAY output_buf |
+ EXTN(jconst_ycc_rgb_convert_sse2): |
- align 16 |
-- global EXTN(jsimd_h2v2_merged_upsample_sse2) |
-+ global EXTN(jsimd_h2v2_merged_upsample_sse2) PRIVATE |
+@@ -48,6 +48,9 @@ |
+ alignz 16 |
- EXTN(jsimd_h2v2_merged_upsample_sse2): |
- push rbp |
+ ; -------------------------------------------------------------------------- |
++ SECTION SEG_TEXT |
++ BITS 64 |
++ |
+ %include "jdclrss2-64.asm" |
+ |
+ %undef RGB_RED |
+@@ -54,10 +57,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 0 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 2 |
+-%define RGB_PIXELSIZE 3 |
++%define RGB_RED EXT_RGB_RED |
++%define RGB_GREEN EXT_RGB_GREEN |
++%define RGB_BLUE EXT_RGB_BLUE |
++%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE |
+ %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgb_convert_sse2 |
+ %include "jdclrss2-64.asm" |
+ |
+@@ -65,10 +68,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 0 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 2 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_RGBX_RED |
++%define RGB_GREEN EXT_RGBX_GREEN |
++%define RGB_BLUE EXT_RGBX_BLUE |
++%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE |
+ %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgbx_convert_sse2 |
+ %include "jdclrss2-64.asm" |
+ |
+@@ -76,10 +79,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 2 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 0 |
+-%define RGB_PIXELSIZE 3 |
++%define RGB_RED EXT_BGR_RED |
++%define RGB_GREEN EXT_BGR_GREEN |
++%define RGB_BLUE EXT_BGR_BLUE |
++%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE |
+ %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgr_convert_sse2 |
+ %include "jdclrss2-64.asm" |
+ |
+@@ -87,10 +90,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 2 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 0 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_BGRX_RED |
++%define RGB_GREEN EXT_BGRX_GREEN |
++%define RGB_BLUE EXT_BGRX_BLUE |
++%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE |
+ %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgrx_convert_sse2 |
+ %include "jdclrss2-64.asm" |
+ |
+@@ -98,10 +101,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 3 |
+-%define RGB_GREEN 2 |
+-%define RGB_BLUE 1 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_XBGR_RED |
++%define RGB_GREEN EXT_XBGR_GREEN |
++%define RGB_BLUE EXT_XBGR_BLUE |
++%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE |
+ %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxbgr_convert_sse2 |
+ %include "jdclrss2-64.asm" |
+ |
+@@ -109,9 +112,9 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 1 |
+-%define RGB_GREEN 2 |
+-%define RGB_BLUE 3 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_XRGB_RED |
++%define RGB_GREEN EXT_XRGB_GREEN |
++%define RGB_BLUE EXT_XRGB_BLUE |
++%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE |
+ %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxrgb_convert_sse2 |
+ %include "jdclrss2-64.asm" |
Index: simd/jdcolss2.asm |
=================================================================== |
--- simd/jdcolss2.asm (revision 829) |
@@ -945,6 +12644,105 @@ Index: simd/jdcolss2.asm |
EXTN(jconst_ycc_rgb_convert_sse2): |
+@@ -48,6 +48,9 @@ |
+ alignz 16 |
+ |
+ ; -------------------------------------------------------------------------- |
++ SECTION SEG_TEXT |
++ BITS 32 |
++ |
+ %include "jdclrss2.asm" |
+ |
+ %undef RGB_RED |
+@@ -54,10 +57,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 0 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 2 |
+-%define RGB_PIXELSIZE 3 |
++%define RGB_RED EXT_RGB_RED |
++%define RGB_GREEN EXT_RGB_GREEN |
++%define RGB_BLUE EXT_RGB_BLUE |
++%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE |
+ %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgb_convert_sse2 |
+ %include "jdclrss2.asm" |
+ |
+@@ -65,10 +68,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 0 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 2 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_RGBX_RED |
++%define RGB_GREEN EXT_RGBX_GREEN |
++%define RGB_BLUE EXT_RGBX_BLUE |
++%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE |
+ %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgbx_convert_sse2 |
+ %include "jdclrss2.asm" |
+ |
+@@ -76,10 +79,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 2 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 0 |
+-%define RGB_PIXELSIZE 3 |
++%define RGB_RED EXT_BGR_RED |
++%define RGB_GREEN EXT_BGR_GREEN |
++%define RGB_BLUE EXT_BGR_BLUE |
++%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE |
+ %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgr_convert_sse2 |
+ %include "jdclrss2.asm" |
+ |
+@@ -87,10 +90,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 2 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 0 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_BGRX_RED |
++%define RGB_GREEN EXT_BGRX_GREEN |
++%define RGB_BLUE EXT_BGRX_BLUE |
++%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE |
+ %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgrx_convert_sse2 |
+ %include "jdclrss2.asm" |
+ |
+@@ -98,10 +101,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 3 |
+-%define RGB_GREEN 2 |
+-%define RGB_BLUE 1 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_XBGR_RED |
++%define RGB_GREEN EXT_XBGR_GREEN |
++%define RGB_BLUE EXT_XBGR_BLUE |
++%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE |
+ %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxbgr_convert_sse2 |
+ %include "jdclrss2.asm" |
+ |
+@@ -109,9 +112,9 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 1 |
+-%define RGB_GREEN 2 |
+-%define RGB_BLUE 3 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_XRGB_RED |
++%define RGB_GREEN EXT_XRGB_GREEN |
++%define RGB_BLUE EXT_XRGB_BLUE |
++%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE |
+ %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxrgb_convert_sse2 |
+ %include "jdclrss2.asm" |
Index: simd/jdmermmx.asm |
=================================================================== |
--- simd/jdmermmx.asm (revision 829) |
@@ -958,50 +12756,226 @@ Index: simd/jdmermmx.asm |
EXTN(jconst_merged_upsample_mmx): |
-Index: simd/jcclrss2.asm |
-=================================================================== |
---- simd/jcclrss2.asm (revision 829) |
-+++ simd/jcclrss2.asm (working copy) |
-@@ -38,7 +38,7 @@ |
- |
- align 16 |
- |
-- global EXTN(jsimd_rgb_ycc_convert_sse2) |
-+ global EXTN(jsimd_rgb_ycc_convert_sse2) PRIVATE |
+@@ -48,6 +48,9 @@ |
+ alignz 16 |
- EXTN(jsimd_rgb_ycc_convert_sse2): |
- push ebp |
-Index: simd/jiss2red.asm |
+ ; -------------------------------------------------------------------------- |
++ SECTION SEG_TEXT |
++ BITS 32 |
++ |
+ %include "jdmrgmmx.asm" |
+ |
+ %undef RGB_RED |
+@@ -54,10 +57,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 0 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 2 |
+-%define RGB_PIXELSIZE 3 |
++%define RGB_RED EXT_RGB_RED |
++%define RGB_GREEN EXT_RGB_GREEN |
++%define RGB_BLUE EXT_RGB_BLUE |
++%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE |
+ %define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extrgb_merged_upsample_mmx |
+ %define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extrgb_merged_upsample_mmx |
+ %include "jdmrgmmx.asm" |
+@@ -66,10 +69,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 0 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 2 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_RGBX_RED |
++%define RGB_GREEN EXT_RGBX_GREEN |
++%define RGB_BLUE EXT_RGBX_BLUE |
++%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE |
+ %define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extrgbx_merged_upsample_mmx |
+ %define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extrgbx_merged_upsample_mmx |
+ %include "jdmrgmmx.asm" |
+@@ -78,10 +81,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 2 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 0 |
+-%define RGB_PIXELSIZE 3 |
++%define RGB_RED EXT_BGR_RED |
++%define RGB_GREEN EXT_BGR_GREEN |
++%define RGB_BLUE EXT_BGR_BLUE |
++%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE |
+ %define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extbgr_merged_upsample_mmx |
+ %define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extbgr_merged_upsample_mmx |
+ %include "jdmrgmmx.asm" |
+@@ -90,10 +93,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 2 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 0 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_BGRX_RED |
++%define RGB_GREEN EXT_BGRX_GREEN |
++%define RGB_BLUE EXT_BGRX_BLUE |
++%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE |
+ %define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extbgrx_merged_upsample_mmx |
+ %define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extbgrx_merged_upsample_mmx |
+ %include "jdmrgmmx.asm" |
+@@ -102,10 +105,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 3 |
+-%define RGB_GREEN 2 |
+-%define RGB_BLUE 1 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_XBGR_RED |
++%define RGB_GREEN EXT_XBGR_GREEN |
++%define RGB_BLUE EXT_XBGR_BLUE |
++%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE |
+ %define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extxbgr_merged_upsample_mmx |
+ %define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extxbgr_merged_upsample_mmx |
+ %include "jdmrgmmx.asm" |
+@@ -114,10 +117,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 1 |
+-%define RGB_GREEN 2 |
+-%define RGB_BLUE 3 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_XRGB_RED |
++%define RGB_GREEN EXT_XRGB_GREEN |
++%define RGB_BLUE EXT_XRGB_BLUE |
++%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE |
+ %define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extxrgb_merged_upsample_mmx |
+ %define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extxrgb_merged_upsample_mmx |
+ %include "jdmrgmmx.asm" |
+Index: simd/jdmerss2-64.asm |
=================================================================== |
---- simd/jiss2red.asm (revision 829) |
-+++ simd/jiss2red.asm (working copy) |
-@@ -72,7 +72,7 @@ |
+--- simd/jdmerss2-64.asm (revision 829) |
++++ simd/jdmerss2-64.asm (working copy) |
+@@ -1,5 +1,5 @@ |
+ ; |
+-; jdmerss2.asm - merged upsampling/color conversion (64-bit SSE2) |
++; jdmerss2-64.asm - merged upsampling/color conversion (64-bit SSE2) |
+ ; |
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
+ ; Copyright 2009 D. R. Commander |
+@@ -35,7 +35,7 @@ |
SECTION SEG_CONST |
alignz 16 |
-- global EXTN(jconst_idct_red_sse2) |
-+ global EXTN(jconst_idct_red_sse2) PRIVATE |
- |
- EXTN(jconst_idct_red_sse2): |
- |
-@@ -113,7 +113,7 @@ |
- %define WK_NUM 2 |
- |
- align 16 |
-- global EXTN(jsimd_idct_4x4_sse2) |
-+ global EXTN(jsimd_idct_4x4_sse2) PRIVATE |
+- global EXTN(jconst_merged_upsample_sse2) |
++ global EXTN(jconst_merged_upsample_sse2) PRIVATE |
- EXTN(jsimd_idct_4x4_sse2): |
- push ebp |
-@@ -424,7 +424,7 @@ |
- %define output_col(b) (b)+20 ; JDIMENSION output_col |
+ EXTN(jconst_merged_upsample_sse2): |
- align 16 |
-- global EXTN(jsimd_idct_2x2_sse2) |
-+ global EXTN(jsimd_idct_2x2_sse2) PRIVATE |
+@@ -48,6 +48,9 @@ |
+ alignz 16 |
- EXTN(jsimd_idct_2x2_sse2): |
- push ebp |
+ ; -------------------------------------------------------------------------- |
++ SECTION SEG_TEXT |
++ BITS 64 |
++ |
+ %include "jdmrgss2-64.asm" |
+ |
+ %undef RGB_RED |
+@@ -54,10 +57,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 0 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 2 |
+-%define RGB_PIXELSIZE 3 |
++%define RGB_RED EXT_RGB_RED |
++%define RGB_GREEN EXT_RGB_GREEN |
++%define RGB_BLUE EXT_RGB_BLUE |
++%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE |
+ %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgb_merged_upsample_sse2 |
+ %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgb_merged_upsample_sse2 |
+ %include "jdmrgss2-64.asm" |
+@@ -66,10 +69,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 0 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 2 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_RGBX_RED |
++%define RGB_GREEN EXT_RGBX_GREEN |
++%define RGB_BLUE EXT_RGBX_BLUE |
++%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE |
+ %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgbx_merged_upsample_sse2 |
+ %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgbx_merged_upsample_sse2 |
+ %include "jdmrgss2-64.asm" |
+@@ -78,10 +81,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 2 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 0 |
+-%define RGB_PIXELSIZE 3 |
++%define RGB_RED EXT_BGR_RED |
++%define RGB_GREEN EXT_BGR_GREEN |
++%define RGB_BLUE EXT_BGR_BLUE |
++%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE |
+ %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgr_merged_upsample_sse2 |
+ %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgr_merged_upsample_sse2 |
+ %include "jdmrgss2-64.asm" |
+@@ -90,10 +93,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 2 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 0 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_BGRX_RED |
++%define RGB_GREEN EXT_BGRX_GREEN |
++%define RGB_BLUE EXT_BGRX_BLUE |
++%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE |
+ %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgrx_merged_upsample_sse2 |
+ %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgrx_merged_upsample_sse2 |
+ %include "jdmrgss2-64.asm" |
+@@ -102,10 +105,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 3 |
+-%define RGB_GREEN 2 |
+-%define RGB_BLUE 1 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_XBGR_RED |
++%define RGB_GREEN EXT_XBGR_GREEN |
++%define RGB_BLUE EXT_XBGR_BLUE |
++%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE |
+ %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxbgr_merged_upsample_sse2 |
+ %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxbgr_merged_upsample_sse2 |
+ %include "jdmrgss2-64.asm" |
+@@ -114,10 +117,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 1 |
+-%define RGB_GREEN 2 |
+-%define RGB_BLUE 3 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_XRGB_RED |
++%define RGB_GREEN EXT_XRGB_GREEN |
++%define RGB_BLUE EXT_XRGB_BLUE |
++%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE |
+ %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxrgb_merged_upsample_sse2 |
+ %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxrgb_merged_upsample_sse2 |
+ %include "jdmrgss2-64.asm" |
Index: simd/jdmerss2.asm |
=================================================================== |
--- simd/jdmerss2.asm (revision 829) |
@@ -1015,146 +12989,854 @@ Index: simd/jdmerss2.asm |
EXTN(jconst_merged_upsample_sse2): |
-Index: simd/jfss2fst-64.asm |
-=================================================================== |
---- simd/jfss2fst-64.asm (revision 829) |
-+++ simd/jfss2fst-64.asm (working copy) |
-@@ -53,7 +53,7 @@ |
- %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) |
- |
+@@ -48,6 +48,9 @@ |
alignz 16 |
-- global EXTN(jconst_fdct_ifast_sse2) |
-+ global EXTN(jconst_fdct_ifast_sse2) PRIVATE |
- EXTN(jconst_fdct_ifast_sse2): |
+ ; -------------------------------------------------------------------------- |
++ SECTION SEG_TEXT |
++ BITS 32 |
++ |
+ %include "jdmrgss2.asm" |
+ |
+ %undef RGB_RED |
+@@ -54,10 +57,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 0 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 2 |
+-%define RGB_PIXELSIZE 3 |
++%define RGB_RED EXT_RGB_RED |
++%define RGB_GREEN EXT_RGB_GREEN |
++%define RGB_BLUE EXT_RGB_BLUE |
++%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE |
+ %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgb_merged_upsample_sse2 |
+ %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgb_merged_upsample_sse2 |
+ %include "jdmrgss2.asm" |
+@@ -66,10 +69,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 0 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 2 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_RGBX_RED |
++%define RGB_GREEN EXT_RGBX_GREEN |
++%define RGB_BLUE EXT_RGBX_BLUE |
++%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE |
+ %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgbx_merged_upsample_sse2 |
+ %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgbx_merged_upsample_sse2 |
+ %include "jdmrgss2.asm" |
+@@ -78,10 +81,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 2 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 0 |
+-%define RGB_PIXELSIZE 3 |
++%define RGB_RED EXT_BGR_RED |
++%define RGB_GREEN EXT_BGR_GREEN |
++%define RGB_BLUE EXT_BGR_BLUE |
++%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE |
+ %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgr_merged_upsample_sse2 |
+ %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgr_merged_upsample_sse2 |
+ %include "jdmrgss2.asm" |
+@@ -90,10 +93,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 2 |
+-%define RGB_GREEN 1 |
+-%define RGB_BLUE 0 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_BGRX_RED |
++%define RGB_GREEN EXT_BGRX_GREEN |
++%define RGB_BLUE EXT_BGRX_BLUE |
++%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE |
+ %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgrx_merged_upsample_sse2 |
+ %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgrx_merged_upsample_sse2 |
+ %include "jdmrgss2.asm" |
+@@ -102,10 +105,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 3 |
+-%define RGB_GREEN 2 |
+-%define RGB_BLUE 1 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_XBGR_RED |
++%define RGB_GREEN EXT_XBGR_GREEN |
++%define RGB_BLUE EXT_XBGR_BLUE |
++%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE |
+ %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxbgr_merged_upsample_sse2 |
+ %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxbgr_merged_upsample_sse2 |
+ %include "jdmrgss2.asm" |
+@@ -114,10 +117,10 @@ |
+ %undef RGB_GREEN |
+ %undef RGB_BLUE |
+ %undef RGB_PIXELSIZE |
+-%define RGB_RED 1 |
+-%define RGB_GREEN 2 |
+-%define RGB_BLUE 3 |
+-%define RGB_PIXELSIZE 4 |
++%define RGB_RED EXT_XRGB_RED |
++%define RGB_GREEN EXT_XRGB_GREEN |
++%define RGB_BLUE EXT_XRGB_BLUE |
++%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE |
+ %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxrgb_merged_upsample_sse2 |
+ %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxrgb_merged_upsample_sse2 |
+ %include "jdmrgss2.asm" |
+Index: simd/jdmrgmmx.asm |
+=================================================================== |
+--- simd/jdmrgmmx.asm (revision 829) |
++++ simd/jdmrgmmx.asm (working copy) |
+@@ -19,8 +19,6 @@ |
+ %include "jcolsamp.inc" |
-@@ -80,7 +80,7 @@ |
- %define WK_NUM 2 |
+ ; -------------------------------------------------------------------------- |
+- SECTION SEG_TEXT |
+- BITS 32 |
+ ; |
+ ; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. |
+ ; |
+@@ -42,7 +40,7 @@ |
+ %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr |
align 16 |
-- global EXTN(jsimd_fdct_ifast_sse2) |
-+ global EXTN(jsimd_fdct_ifast_sse2) PRIVATE |
+- global EXTN(jsimd_h2v1_merged_upsample_mmx) |
++ global EXTN(jsimd_h2v1_merged_upsample_mmx) PRIVATE |
- EXTN(jsimd_fdct_ifast_sse2): |
- push rbp |
-Index: simd/jcqntmmx.asm |
-=================================================================== |
---- simd/jcqntmmx.asm (revision 829) |
-+++ simd/jcqntmmx.asm (working copy) |
-@@ -35,7 +35,7 @@ |
- %define workspace ebp+16 ; DCTELEM * workspace |
+ EXTN(jsimd_h2v1_merged_upsample_mmx): |
+ push ebp |
+@@ -253,7 +251,7 @@ |
+ movq MMWORD [edi+2*SIZEOF_MMWORD], mmC |
- align 16 |
-- global EXTN(jsimd_convsamp_mmx) |
-+ global EXTN(jsimd_convsamp_mmx) PRIVATE |
+ sub ecx, byte SIZEOF_MMWORD |
+- jz short .endcolumn |
++ jz near .endcolumn |
- EXTN(jsimd_convsamp_mmx): |
- push ebp |
-@@ -140,7 +140,7 @@ |
- %define workspace ebp+16 ; DCTELEM * workspace |
+ add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr |
+ add esi, byte SIZEOF_MMWORD ; inptr0 |
+@@ -411,7 +409,7 @@ |
+ %define output_buf(b) (b)+20 ; JSAMPARRAY output_buf |
align 16 |
-- global EXTN(jsimd_quantize_mmx) |
-+ global EXTN(jsimd_quantize_mmx) PRIVATE |
+- global EXTN(jsimd_h2v2_merged_upsample_mmx) |
++ global EXTN(jsimd_h2v2_merged_upsample_mmx) PRIVATE |
- EXTN(jsimd_quantize_mmx): |
+ EXTN(jsimd_h2v2_merged_upsample_mmx): |
push ebp |
-Index: simd/jimmxfst.asm |
-=================================================================== |
---- simd/jimmxfst.asm (revision 829) |
-+++ simd/jimmxfst.asm (working copy) |
-@@ -59,7 +59,7 @@ |
- %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) |
+@@ -461,3 +459,6 @@ |
+ pop ebp |
+ ret |
- alignz 16 |
-- global EXTN(jconst_idct_ifast_mmx) |
-+ global EXTN(jconst_idct_ifast_mmx) PRIVATE |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jdmrgss2-64.asm |
+=================================================================== |
+--- simd/jdmrgss2-64.asm (revision 829) |
++++ simd/jdmrgss2-64.asm (working copy) |
+@@ -1,8 +1,8 @@ |
+ ; |
+-; jdmrgss2.asm - merged upsampling/color conversion (64-bit SSE2) |
++; jdmrgss2-64.asm - merged upsampling/color conversion (64-bit SSE2) |
+ ; |
+-; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
+-; Copyright 2009 D. R. Commander |
++; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB |
++; Copyright 2009, 2012 D. R. Commander |
+ ; |
+ ; Based on |
+ ; x86 SIMD extension for IJG JPEG library |
+@@ -20,8 +20,6 @@ |
+ %include "jcolsamp.inc" |
+ |
+ ; -------------------------------------------------------------------------- |
+- SECTION SEG_TEXT |
+- BITS 64 |
+ ; |
+ ; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. |
+ ; |
+@@ -41,7 +39,7 @@ |
+ %define WK_NUM 3 |
- EXTN(jconst_idct_ifast_mmx): |
+ align 16 |
+- global EXTN(jsimd_h2v1_merged_upsample_sse2) |
++ global EXTN(jsimd_h2v1_merged_upsample_sse2) PRIVATE |
-@@ -94,7 +94,7 @@ |
- ; JCOEF workspace[DCTSIZE2] |
+ EXTN(jsimd_h2v1_merged_upsample_sse2): |
+ push rbp |
+@@ -51,8 +49,8 @@ |
+ mov [rsp],rax |
+ mov rbp,rsp ; rbp = aligned rbp |
+ lea rsp, [wk(0)] |
++ collect_args |
+ push rbx |
+- collect_args |
+ |
+ mov rcx, r10 ; col |
+ test rcx,rcx |
+@@ -254,17 +252,13 @@ |
+ movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA |
+ movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD |
+ movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF |
+- add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr |
+ jmp short .out0 |
+ .out1: ; --(unaligned)----------------- |
+- pcmpeqb xmmH,xmmH ; xmmH=(all 1's) |
+- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [rdi], xmmA |
+- add rdi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [rdi], xmmD |
+- add rdi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmF,xmmH ; movntdqu XMMWORD [rdi], xmmF |
+- add rdi, byte SIZEOF_XMMWORD ; outptr |
++ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA |
++ movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD |
++ movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF |
+ .out0: |
++ add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr |
+ sub rcx, byte SIZEOF_XMMWORD |
+ jz near .endcolumn |
+ |
+@@ -277,14 +271,12 @@ |
+ jmp near .columnloop |
+ |
+ .column_st32: |
+- pcmpeqb xmmH,xmmH ; xmmH=(all 1's) |
+ lea rcx, [rcx+rcx*2] ; imul ecx, RGB_PIXELSIZE |
+ cmp rcx, byte 2*SIZEOF_XMMWORD |
+ jb short .column_st16 |
+- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [rdi], xmmA |
+- add rdi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [rdi], xmmD |
+- add rdi, byte SIZEOF_XMMWORD ; outptr |
++ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA |
++ movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD |
++ add rdi, byte 2*SIZEOF_XMMWORD ; outptr |
+ movdqa xmmA,xmmF |
+ sub rcx, byte 2*SIZEOF_XMMWORD |
+ jmp short .column_st15 |
+@@ -291,50 +283,44 @@ |
+ .column_st16: |
+ cmp rcx, byte SIZEOF_XMMWORD |
+ jb short .column_st15 |
+- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [rdi], xmmA |
++ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA |
+ add rdi, byte SIZEOF_XMMWORD ; outptr |
+ movdqa xmmA,xmmD |
+ sub rcx, byte SIZEOF_XMMWORD |
+ .column_st15: |
+- mov rax,rcx |
+- xor rcx, byte 0x0F |
+- shl rcx, 2 |
+- movd xmmB,ecx |
+- psrlq xmmH,4 |
+- pcmpeqb xmmE,xmmE |
+- psrlq xmmH,xmmB |
+- psrlq xmmE,xmmB |
+- punpcklbw xmmE,xmmH |
+- ; ---------------- |
+- mov rcx,rdi |
+- and rcx, byte SIZEOF_XMMWORD-1 |
+- jz short .adj0 |
+- add rax,rcx |
+- cmp rax, byte SIZEOF_XMMWORD |
+- ja short .adj0 |
+- and rdi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary |
+- shl rcx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx |
+- movdqa xmmG,xmmA |
+- movdqa xmmC,xmmE |
+- pslldq xmmA, SIZEOF_XMMWORD/2 |
+- pslldq xmmE, SIZEOF_XMMWORD/2 |
+- movd xmmD,ecx |
+- sub rcx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT |
+- jb short .adj1 |
+- movd xmmF,ecx |
+- psllq xmmA,xmmF |
+- psllq xmmE,xmmF |
+- jmp short .adj0 |
+-.adj1: neg rcx |
+- movd xmmF,ecx |
+- psrlq xmmA,xmmF |
+- psrlq xmmE,xmmF |
+- psllq xmmG,xmmD |
+- psllq xmmC,xmmD |
+- por xmmA,xmmG |
+- por xmmE,xmmC |
+-.adj0: ; ---------------- |
+- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA |
++ ; Store the lower 8 bytes of xmmA to the output when it has enough |
++ ; space. |
++ cmp rcx, byte SIZEOF_MMWORD |
++ jb short .column_st7 |
++ movq XMM_MMWORD [rdi], xmmA |
++ add rdi, byte SIZEOF_MMWORD |
++ sub rcx, byte SIZEOF_MMWORD |
++ psrldq xmmA, SIZEOF_MMWORD |
++.column_st7: |
++ ; Store the lower 4 bytes of xmmA to the output when it has enough |
++ ; space. |
++ cmp rcx, byte SIZEOF_DWORD |
++ jb short .column_st3 |
++ movd XMM_DWORD [rdi], xmmA |
++ add rdi, byte SIZEOF_DWORD |
++ sub rcx, byte SIZEOF_DWORD |
++ psrldq xmmA, SIZEOF_DWORD |
++.column_st3: |
++ ; Store the lower 2 bytes of rax to the output when it has enough |
++ ; space. |
++ movd eax, xmmA |
++ cmp rcx, byte SIZEOF_WORD |
++ jb short .column_st1 |
++ mov WORD [rdi], ax |
++ add rdi, byte SIZEOF_WORD |
++ sub rcx, byte SIZEOF_WORD |
++ shr rax, 16 |
++.column_st1: |
++ ; Store the lower 1 byte of rax to the output when it has enough |
++ ; space. |
++ test rcx, rcx |
++ jz short .endcolumn |
++ mov BYTE [rdi], al |
+ |
+ %else ; RGB_PIXELSIZE == 4 ; ----------- |
+ |
+@@ -379,19 +365,14 @@ |
+ movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD |
+ movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC |
+ movntdq XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH |
+- add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr |
+ jmp short .out0 |
+ .out1: ; --(unaligned)----------------- |
+- pcmpeqb xmmE,xmmE ; xmmE=(all 1's) |
+- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA |
+- add rdi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [rdi], xmmD |
+- add rdi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmC,xmmE ; movntdqu XMMWORD [rdi], xmmC |
+- add rdi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmH,xmmE ; movntdqu XMMWORD [rdi], xmmH |
+- add rdi, byte SIZEOF_XMMWORD ; outptr |
++ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA |
++ movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD |
++ movdqu XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC |
++ movdqu XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH |
+ .out0: |
++ add rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr |
+ sub rcx, byte SIZEOF_XMMWORD |
+ jz near .endcolumn |
+ |
+@@ -404,13 +385,11 @@ |
+ jmp near .columnloop |
+ |
+ .column_st32: |
+- pcmpeqb xmmE,xmmE ; xmmE=(all 1's) |
+ cmp rcx, byte SIZEOF_XMMWORD/2 |
+ jb short .column_st16 |
+- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [rdi], xmmA |
+- add rdi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [rdi], xmmD |
+- add rdi, byte SIZEOF_XMMWORD ; outptr |
++ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA |
++ movdqu XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD |
++ add rdi, byte 2*SIZEOF_XMMWORD ; outptr |
+ movdqa xmmA,xmmC |
+ movdqa xmmD,xmmH |
+ sub rcx, byte SIZEOF_XMMWORD/2 |
+@@ -417,50 +396,25 @@ |
+ .column_st16: |
+ cmp rcx, byte SIZEOF_XMMWORD/4 |
+ jb short .column_st15 |
+- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA |
++ movdqu XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA |
+ add rdi, byte SIZEOF_XMMWORD ; outptr |
+ movdqa xmmA,xmmD |
+ sub rcx, byte SIZEOF_XMMWORD/4 |
+ .column_st15: |
+- cmp rcx, byte SIZEOF_XMMWORD/16 |
+- jb near .endcolumn |
+- mov rax,rcx |
+- xor rcx, byte 0x03 |
+- inc rcx |
+- shl rcx, 4 |
+- movd xmmF,ecx |
+- psrlq xmmE,xmmF |
+- punpcklbw xmmE,xmmE |
+- ; ---------------- |
+- mov rcx,rdi |
+- and rcx, byte SIZEOF_XMMWORD-1 |
+- jz short .adj0 |
+- lea rax, [rcx+rax*4] ; RGB_PIXELSIZE |
+- cmp rax, byte SIZEOF_XMMWORD |
+- ja short .adj0 |
+- and rdi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary |
+- shl rcx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx |
+- movdqa xmmB,xmmA |
+- movdqa xmmG,xmmE |
+- pslldq xmmA, SIZEOF_XMMWORD/2 |
+- pslldq xmmE, SIZEOF_XMMWORD/2 |
+- movd xmmC,ecx |
+- sub rcx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT |
+- jb short .adj1 |
+- movd xmmH,ecx |
+- psllq xmmA,xmmH |
+- psllq xmmE,xmmH |
+- jmp short .adj0 |
+-.adj1: neg rcx |
+- movd xmmH,ecx |
+- psrlq xmmA,xmmH |
+- psrlq xmmE,xmmH |
+- psllq xmmB,xmmC |
+- psllq xmmG,xmmC |
+- por xmmA,xmmB |
+- por xmmE,xmmG |
+-.adj0: ; ---------------- |
+- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA |
++ ; Store two pixels (8 bytes) of xmmA to the output when it has enough |
++ ; space. |
++ cmp rcx, byte SIZEOF_XMMWORD/8 |
++ jb short .column_st7 |
++ movq XMM_MMWORD [rdi], xmmA |
++ add rdi, byte SIZEOF_XMMWORD/8*4 |
++ sub rcx, byte SIZEOF_XMMWORD/8 |
++ psrldq xmmA, SIZEOF_XMMWORD/8*4 |
++.column_st7: |
++ ; Store one pixel (4 bytes) of xmmA to the output when it has enough |
++ ; space. |
++ test rcx, rcx |
++ jz short .endcolumn |
++ movd XMM_DWORD [rdi], xmmA |
+ |
+ %endif ; RGB_PIXELSIZE ; --------------- |
+ |
+@@ -468,8 +422,8 @@ |
+ sfence ; flush the write buffer |
+ |
+ .return: |
++ pop rbx |
+ uncollect_args |
+- pop rbx |
+ mov rsp,rbp ; rsp <- aligned rbp |
+ pop rsp ; rsp <- original rbp |
+ pop rbp |
+@@ -492,13 +446,14 @@ |
+ ; r13 = JSAMPARRAY output_buf |
align 16 |
-- global EXTN(jsimd_idct_ifast_mmx) |
-+ global EXTN(jsimd_idct_ifast_mmx) PRIVATE |
+- global EXTN(jsimd_h2v2_merged_upsample_sse2) |
++ global EXTN(jsimd_h2v2_merged_upsample_sse2) PRIVATE |
- EXTN(jsimd_idct_ifast_mmx): |
- push ebp |
-Index: simd/jfss2fst.asm |
+ EXTN(jsimd_h2v2_merged_upsample_sse2): |
+ push rbp |
++ mov rax,rsp |
+ mov rbp,rsp |
++ collect_args |
+ push rbx |
+- collect_args |
+ |
+ mov rax, r10 |
+ |
+@@ -519,10 +474,17 @@ |
+ push rcx |
+ push rax |
+ |
++ %ifdef WIN64 |
++ mov r8, rcx |
++ mov r9, rdi |
++ mov rcx, rax |
++ mov rdx, rbx |
++ %else |
+ mov rdx, rcx |
+ mov rcx, rdi |
+ mov rdi, rax |
+ mov rsi, rbx |
++ %endif |
+ |
+ call EXTN(jsimd_h2v1_merged_upsample_sse2) |
+ |
+@@ -545,10 +507,17 @@ |
+ push rcx |
+ push rax |
+ |
++ %ifdef WIN64 |
++ mov r8, rcx |
++ mov r9, rdi |
++ mov rcx, rax |
++ mov rdx, rbx |
++ %else |
+ mov rdx, rcx |
+ mov rcx, rdi |
+ mov rdi, rax |
+ mov rsi, rbx |
++ %endif |
+ |
+ call EXTN(jsimd_h2v1_merged_upsample_sse2) |
+ |
+@@ -559,7 +528,11 @@ |
+ pop rbx |
+ pop rdx |
+ |
++ pop rbx |
+ uncollect_args |
+- pop rbx |
+ pop rbp |
+ ret |
++ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jdmrgss2.asm |
=================================================================== |
---- simd/jfss2fst.asm (revision 829) |
-+++ simd/jfss2fst.asm (working copy) |
-@@ -52,7 +52,7 @@ |
- %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) |
- |
- alignz 16 |
-- global EXTN(jconst_fdct_ifast_sse2) |
-+ global EXTN(jconst_fdct_ifast_sse2) PRIVATE |
+--- simd/jdmrgss2.asm (revision 829) |
++++ simd/jdmrgss2.asm (working copy) |
+@@ -1,7 +1,8 @@ |
+ ; |
+ ; jdmrgss2.asm - merged upsampling/color conversion (SSE2) |
+ ; |
+-; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
++; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB |
++; Copyright 2012 D. R. Commander |
+ ; |
+ ; Based on |
+ ; x86 SIMD extension for IJG JPEG library |
+@@ -19,8 +20,6 @@ |
+ %include "jcolsamp.inc" |
+ |
+ ; -------------------------------------------------------------------------- |
+- SECTION SEG_TEXT |
+- BITS 32 |
+ ; |
+ ; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. |
+ ; |
+@@ -42,7 +41,7 @@ |
+ %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr |
- EXTN(jconst_fdct_ifast_sse2): |
+ align 16 |
+- global EXTN(jsimd_h2v1_merged_upsample_sse2) |
++ global EXTN(jsimd_h2v1_merged_upsample_sse2) PRIVATE |
-@@ -80,7 +80,7 @@ |
- %define WK_NUM 2 |
+ EXTN(jsimd_h2v1_merged_upsample_sse2): |
+ push ebp |
+@@ -266,17 +265,13 @@ |
+ movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA |
+ movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD |
+ movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF |
+- add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr |
+ jmp short .out0 |
+ .out1: ; --(unaligned)----------------- |
+- pcmpeqb xmmH,xmmH ; xmmH=(all 1's) |
+- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA |
+- add edi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [edi], xmmD |
+- add edi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmF,xmmH ; movntdqu XMMWORD [edi], xmmF |
+- add edi, byte SIZEOF_XMMWORD ; outptr |
++ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA |
++ movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD |
++ movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF |
+ .out0: |
++ add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr |
+ sub ecx, byte SIZEOF_XMMWORD |
+ jz near .endcolumn |
+ |
+@@ -290,14 +285,12 @@ |
+ alignx 16,7 |
+ |
+ .column_st32: |
+- pcmpeqb xmmH,xmmH ; xmmH=(all 1's) |
+ lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE |
+ cmp ecx, byte 2*SIZEOF_XMMWORD |
+ jb short .column_st16 |
+- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA |
+- add edi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [edi], xmmD |
+- add edi, byte SIZEOF_XMMWORD ; outptr |
++ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA |
++ movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD |
++ add edi, byte 2*SIZEOF_XMMWORD ; outptr |
+ movdqa xmmA,xmmF |
+ sub ecx, byte 2*SIZEOF_XMMWORD |
+ jmp short .column_st15 |
+@@ -304,50 +297,44 @@ |
+ .column_st16: |
+ cmp ecx, byte SIZEOF_XMMWORD |
+ jb short .column_st15 |
+- maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA |
++ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA |
+ add edi, byte SIZEOF_XMMWORD ; outptr |
+ movdqa xmmA,xmmD |
+ sub ecx, byte SIZEOF_XMMWORD |
+ .column_st15: |
+- mov eax,ecx |
+- xor ecx, byte 0x0F |
+- shl ecx, 2 |
+- movd xmmB,ecx |
+- psrlq xmmH,4 |
+- pcmpeqb xmmE,xmmE |
+- psrlq xmmH,xmmB |
+- psrlq xmmE,xmmB |
+- punpcklbw xmmE,xmmH |
+- ; ---------------- |
+- mov ecx,edi |
+- and ecx, byte SIZEOF_XMMWORD-1 |
+- jz short .adj0 |
+- add eax,ecx |
+- cmp eax, byte SIZEOF_XMMWORD |
+- ja short .adj0 |
+- and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary |
+- shl ecx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx |
+- movdqa xmmG,xmmA |
+- movdqa xmmC,xmmE |
+- pslldq xmmA, SIZEOF_XMMWORD/2 |
+- pslldq xmmE, SIZEOF_XMMWORD/2 |
+- movd xmmD,ecx |
+- sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT |
+- jb short .adj1 |
+- movd xmmF,ecx |
+- psllq xmmA,xmmF |
+- psllq xmmE,xmmF |
+- jmp short .adj0 |
+-.adj1: neg ecx |
+- movd xmmF,ecx |
+- psrlq xmmA,xmmF |
+- psrlq xmmE,xmmF |
+- psllq xmmG,xmmD |
+- psllq xmmC,xmmD |
+- por xmmA,xmmG |
+- por xmmE,xmmC |
+-.adj0: ; ---------------- |
+- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA |
++ ; Store the lower 8 bytes of xmmA to the output when it has enough |
++ ; space. |
++ cmp ecx, byte SIZEOF_MMWORD |
++ jb short .column_st7 |
++ movq XMM_MMWORD [edi], xmmA |
++ add edi, byte SIZEOF_MMWORD |
++ sub ecx, byte SIZEOF_MMWORD |
++ psrldq xmmA, SIZEOF_MMWORD |
++.column_st7: |
++ ; Store the lower 4 bytes of xmmA to the output when it has enough |
++ ; space. |
++ cmp ecx, byte SIZEOF_DWORD |
++ jb short .column_st3 |
++ movd XMM_DWORD [edi], xmmA |
++ add edi, byte SIZEOF_DWORD |
++ sub ecx, byte SIZEOF_DWORD |
++ psrldq xmmA, SIZEOF_DWORD |
++.column_st3: |
++ ; Store the lower 2 bytes of eax to the output when it has enough |
++ ; space. |
++ movd eax, xmmA |
++ cmp ecx, byte SIZEOF_WORD |
++ jb short .column_st1 |
++ mov WORD [edi], ax |
++ add edi, byte SIZEOF_WORD |
++ sub ecx, byte SIZEOF_WORD |
++ shr eax, 16 |
++.column_st1: |
++ ; Store the lower 1 byte of eax to the output when it has enough |
++ ; space. |
++ test ecx, ecx |
++ jz short .endcolumn |
++ mov BYTE [edi], al |
+ |
+ %else ; RGB_PIXELSIZE == 4 ; ----------- |
+ |
+@@ -392,19 +379,14 @@ |
+ movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD |
+ movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC |
+ movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH |
+- add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr |
+ jmp short .out0 |
+ .out1: ; --(unaligned)----------------- |
+- pcmpeqb xmmE,xmmE ; xmmE=(all 1's) |
+- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA |
+- add edi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [edi], xmmD |
+- add edi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmC,xmmE ; movntdqu XMMWORD [edi], xmmC |
+- add edi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmH,xmmE ; movntdqu XMMWORD [edi], xmmH |
+- add edi, byte SIZEOF_XMMWORD ; outptr |
++ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA |
++ movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD |
++ movdqu XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC |
++ movdqu XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH |
+ .out0: |
++ add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr |
+ sub ecx, byte SIZEOF_XMMWORD |
+ jz near .endcolumn |
+ |
+@@ -418,13 +400,11 @@ |
+ alignx 16,7 |
+ |
+ .column_st32: |
+- pcmpeqb xmmE,xmmE ; xmmE=(all 1's) |
+ cmp ecx, byte SIZEOF_XMMWORD/2 |
+ jb short .column_st16 |
+- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA |
+- add edi, byte SIZEOF_XMMWORD ; outptr |
+- maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [edi], xmmD |
+- add edi, byte SIZEOF_XMMWORD ; outptr |
++ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA |
++ movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD |
++ add edi, byte 2*SIZEOF_XMMWORD ; outptr |
+ movdqa xmmA,xmmC |
+ movdqa xmmD,xmmH |
+ sub ecx, byte SIZEOF_XMMWORD/2 |
+@@ -431,50 +411,25 @@ |
+ .column_st16: |
+ cmp ecx, byte SIZEOF_XMMWORD/4 |
+ jb short .column_st15 |
+- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA |
++ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA |
+ add edi, byte SIZEOF_XMMWORD ; outptr |
+ movdqa xmmA,xmmD |
+ sub ecx, byte SIZEOF_XMMWORD/4 |
+ .column_st15: |
+- cmp ecx, byte SIZEOF_XMMWORD/16 |
+- jb short .endcolumn |
+- mov eax,ecx |
+- xor ecx, byte 0x03 |
+- inc ecx |
+- shl ecx, 4 |
+- movd xmmF,ecx |
+- psrlq xmmE,xmmF |
+- punpcklbw xmmE,xmmE |
+- ; ---------------- |
+- mov ecx,edi |
+- and ecx, byte SIZEOF_XMMWORD-1 |
+- jz short .adj0 |
+- lea eax, [ecx+eax*4] ; RGB_PIXELSIZE |
+- cmp eax, byte SIZEOF_XMMWORD |
+- ja short .adj0 |
+- and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary |
+- shl ecx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx |
+- movdqa xmmB,xmmA |
+- movdqa xmmG,xmmE |
+- pslldq xmmA, SIZEOF_XMMWORD/2 |
+- pslldq xmmE, SIZEOF_XMMWORD/2 |
+- movd xmmC,ecx |
+- sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT |
+- jb short .adj1 |
+- movd xmmH,ecx |
+- psllq xmmA,xmmH |
+- psllq xmmE,xmmH |
+- jmp short .adj0 |
+-.adj1: neg ecx |
+- movd xmmH,ecx |
+- psrlq xmmA,xmmH |
+- psrlq xmmE,xmmH |
+- psllq xmmB,xmmC |
+- psllq xmmG,xmmC |
+- por xmmA,xmmB |
+- por xmmE,xmmG |
+-.adj0: ; ---------------- |
+- maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA |
++ ; Store two pixels (8 bytes) of xmmA to the output when it has enough |
++ ; space. |
++ cmp ecx, byte SIZEOF_XMMWORD/8 |
++ jb short .column_st7 |
++ movq XMM_MMWORD [edi], xmmA |
++ add edi, byte SIZEOF_XMMWORD/8*4 |
++ sub ecx, byte SIZEOF_XMMWORD/8 |
++ psrldq xmmA, SIZEOF_XMMWORD/8*4 |
++.column_st7: |
++ ; Store one pixel (4 bytes) of xmmA to the output when it has enough |
++ ; space. |
++ test ecx, ecx |
++ jz short .endcolumn |
++ movd XMM_DWORD [edi], xmmA |
+ |
+ %endif ; RGB_PIXELSIZE ; --------------- |
+ |
+@@ -509,7 +464,7 @@ |
+ %define output_buf(b) (b)+20 ; JSAMPARRAY output_buf |
align 16 |
-- global EXTN(jsimd_fdct_ifast_sse2) |
-+ global EXTN(jsimd_fdct_ifast_sse2) PRIVATE |
+- global EXTN(jsimd_h2v2_merged_upsample_sse2) |
++ global EXTN(jsimd_h2v2_merged_upsample_sse2) PRIVATE |
- EXTN(jsimd_fdct_ifast_sse2): |
+ EXTN(jsimd_h2v2_merged_upsample_sse2): |
push ebp |
-Index: simd/jcgrammx.asm |
+@@ -559,3 +514,6 @@ |
+ pop ebp |
+ ret |
+ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jdsammmx.asm |
=================================================================== |
---- simd/jcgrammx.asm (revision 829) |
-+++ simd/jcgrammx.asm (working copy) |
-@@ -33,7 +33,7 @@ |
+--- simd/jdsammmx.asm (revision 829) |
++++ simd/jdsammmx.asm (working copy) |
+@@ -22,7 +22,7 @@ |
SECTION SEG_CONST |
alignz 16 |
-- global EXTN(jconst_rgb_gray_convert_mmx) |
-+ global EXTN(jconst_rgb_gray_convert_mmx) PRIVATE |
+- global EXTN(jconst_fancy_upsample_mmx) |
++ global EXTN(jconst_fancy_upsample_mmx) PRIVATE |
- EXTN(jconst_rgb_gray_convert_mmx): |
+ EXTN(jconst_fancy_upsample_mmx): |
-Index: simd/jdcolss2-64.asm |
-=================================================================== |
---- simd/jdcolss2-64.asm (revision 829) |
-+++ simd/jdcolss2-64.asm (working copy) |
-@@ -35,7 +35,7 @@ |
- SECTION SEG_CONST |
+@@ -58,7 +58,7 @@ |
+ %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr |
- alignz 16 |
-- global EXTN(jconst_ycc_rgb_convert_sse2) |
-+ global EXTN(jconst_ycc_rgb_convert_sse2) PRIVATE |
+ align 16 |
+- global EXTN(jsimd_h2v1_fancy_upsample_mmx) |
++ global EXTN(jsimd_h2v1_fancy_upsample_mmx) PRIVATE |
- EXTN(jconst_ycc_rgb_convert_sse2): |
+ EXTN(jsimd_h2v1_fancy_upsample_mmx): |
+ push ebp |
+@@ -216,7 +216,7 @@ |
+ %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr |
-Index: simd/jf3dnflt.asm |
-=================================================================== |
---- simd/jf3dnflt.asm (revision 829) |
-+++ simd/jf3dnflt.asm (working copy) |
-@@ -27,7 +27,7 @@ |
- SECTION SEG_CONST |
+ align 16 |
+- global EXTN(jsimd_h2v2_fancy_upsample_mmx) |
++ global EXTN(jsimd_h2v2_fancy_upsample_mmx) PRIVATE |
- alignz 16 |
-- global EXTN(jconst_fdct_float_3dnow) |
-+ global EXTN(jconst_fdct_float_3dnow) PRIVATE |
+ EXTN(jsimd_h2v2_fancy_upsample_mmx): |
+ push ebp |
+@@ -542,7 +542,7 @@ |
+ %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr |
- EXTN(jconst_fdct_float_3dnow): |
+ align 16 |
+- global EXTN(jsimd_h2v1_upsample_mmx) |
++ global EXTN(jsimd_h2v1_upsample_mmx) PRIVATE |
-@@ -55,7 +55,7 @@ |
- %define WK_NUM 2 |
+ EXTN(jsimd_h2v1_upsample_mmx): |
+ push ebp |
+@@ -643,7 +643,7 @@ |
+ %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr |
align 16 |
-- global EXTN(jsimd_fdct_float_3dnow) |
-+ global EXTN(jsimd_fdct_float_3dnow) PRIVATE |
+- global EXTN(jsimd_h2v2_upsample_mmx) |
++ global EXTN(jsimd_h2v2_upsample_mmx) PRIVATE |
- EXTN(jsimd_fdct_float_3dnow): |
+ EXTN(jsimd_h2v2_upsample_mmx): |
push ebp |
+@@ -732,3 +732,6 @@ |
+ pop ebp |
+ ret |
+ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
Index: simd/jdsamss2-64.asm |
=================================================================== |
--- simd/jdsamss2-64.asm (revision 829) |
+++ simd/jdsamss2-64.asm (working copy) |
+@@ -1,5 +1,5 @@ |
+ ; |
+-; jdsamss2.asm - upsampling (64-bit SSE2) |
++; jdsamss2-64.asm - upsampling (64-bit SSE2) |
+ ; |
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
+ ; Copyright 2009 D. R. Commander |
@@ -23,7 +23,7 @@ |
SECTION SEG_CONST |
@@ -1164,7 +13846,7 @@ Index: simd/jdsamss2-64.asm |
EXTN(jconst_fancy_upsample_sse2): |
-@@ -59,7 +59,7 @@ |
+@@ -59,10 +59,11 @@ |
; r13 = JSAMPARRAY * output_data_ptr |
align 16 |
@@ -1173,7 +13855,11 @@ Index: simd/jdsamss2-64.asm |
EXTN(jsimd_h2v1_fancy_upsample_sse2): |
push rbp |
-@@ -201,7 +201,7 @@ |
++ mov rax,rsp |
+ mov rbp,rsp |
+ collect_args |
+ |
+@@ -200,7 +201,7 @@ |
%define WK_NUM 4 |
align 16 |
@@ -1182,7 +13868,27 @@ Index: simd/jdsamss2-64.asm |
EXTN(jsimd_h2v2_fancy_upsample_sse2): |
push rbp |
-@@ -498,7 +498,7 @@ |
+@@ -210,8 +211,8 @@ |
+ mov [rsp],rax |
+ mov rbp,rsp ; rbp = aligned rbp |
+ lea rsp, [wk(0)] |
++ collect_args |
+ push rbx |
+- collect_args |
+ |
+ mov rax, r11 ; colctr |
+ test rax,rax |
+@@ -472,8 +473,8 @@ |
+ jg near .rowloop |
+ |
+ .return: |
++ pop rbx |
+ uncollect_args |
+- pop rbx |
+ mov rsp,rbp ; rsp <- aligned rbp |
+ pop rsp ; rsp <- original rbp |
+ pop rbp |
+@@ -497,10 +498,11 @@ |
; r13 = JSAMPARRAY * output_data_ptr |
align 16 |
@@ -1191,7 +13897,11 @@ Index: simd/jdsamss2-64.asm |
EXTN(jsimd_h2v1_upsample_sse2): |
push rbp |
-@@ -587,7 +587,7 @@ |
++ mov rax,rsp |
+ mov rbp,rsp |
+ collect_args |
+ |
+@@ -585,13 +587,14 @@ |
; r13 = JSAMPARRAY * output_data_ptr |
align 16 |
@@ -1200,406 +13910,273 @@ Index: simd/jdsamss2-64.asm |
EXTN(jsimd_h2v2_upsample_sse2): |
push rbp |
-Index: simd/jcgrass2.asm |
++ mov rax,rsp |
+ mov rbp,rsp |
++ collect_args |
+ push rbx |
+- collect_args |
+ |
+ mov rdx, r11 |
+ add rdx, byte (2*SIZEOF_XMMWORD)-1 |
+@@ -658,7 +661,11 @@ |
+ jg near .rowloop |
+ |
+ .return: |
++ pop rbx |
+ uncollect_args |
+- pop rbx |
+ pop rbp |
+ ret |
++ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jdsamss2.asm |
=================================================================== |
---- simd/jcgrass2.asm (revision 829) |
-+++ simd/jcgrass2.asm (working copy) |
-@@ -30,7 +30,7 @@ |
+--- simd/jdsamss2.asm (revision 829) |
++++ simd/jdsamss2.asm (working copy) |
+@@ -22,7 +22,7 @@ |
SECTION SEG_CONST |
alignz 16 |
-- global EXTN(jconst_rgb_gray_convert_sse2) |
-+ global EXTN(jconst_rgb_gray_convert_sse2) PRIVATE |
+- global EXTN(jconst_fancy_upsample_sse2) |
++ global EXTN(jconst_fancy_upsample_sse2) PRIVATE |
- EXTN(jconst_rgb_gray_convert_sse2): |
+ EXTN(jconst_fancy_upsample_sse2): |
-Index: simd/jcsammmx.asm |
-=================================================================== |
---- simd/jcsammmx.asm (revision 829) |
-+++ simd/jcsammmx.asm (working copy) |
-@@ -40,7 +40,7 @@ |
- %define output_data(b) (b)+28 ; JSAMPARRAY output_data |
+@@ -58,7 +58,7 @@ |
+ %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr |
align 16 |
-- global EXTN(jsimd_h2v1_downsample_mmx) |
-+ global EXTN(jsimd_h2v1_downsample_mmx) PRIVATE |
+- global EXTN(jsimd_h2v1_fancy_upsample_sse2) |
++ global EXTN(jsimd_h2v1_fancy_upsample_sse2) PRIVATE |
- EXTN(jsimd_h2v1_downsample_mmx): |
+ EXTN(jsimd_h2v1_fancy_upsample_sse2): |
push ebp |
-@@ -182,7 +182,7 @@ |
- %define output_data(b) (b)+28 ; JSAMPARRAY output_data |
+@@ -214,7 +214,7 @@ |
+ %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr |
align 16 |
-- global EXTN(jsimd_h2v2_downsample_mmx) |
-+ global EXTN(jsimd_h2v2_downsample_mmx) PRIVATE |
+- global EXTN(jsimd_h2v2_fancy_upsample_sse2) |
++ global EXTN(jsimd_h2v2_fancy_upsample_sse2) PRIVATE |
- EXTN(jsimd_h2v2_downsample_mmx): |
+ EXTN(jsimd_h2v2_fancy_upsample_sse2): |
push ebp |
-+Index: simd/jsimd_arm.c |
-+=================================================================== |
-+--- simd/jsimd_arm.c (revision 272637) |
-++++ simd/jsimd_arm.c (working copy) |
-+@@ -29,0 +29,0 @@ |
-+ |
-+ static unsigned int simd_support = ~0; |
-+ |
-+-#if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) |
-++#if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)) |
-+ |
-+ #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024) |
-+ |
-+@@ -100,6 +100,6 @@ |
-+ init_simd (void) |
-+ { |
-+ char *env = NULL; |
-+-#if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) |
-++#if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)) |
-+ int bufsize = 1024; /* an initial guess for the line buffer size limit */ |
-+ #endif |
-+ |
-Index: simd/jsimd_arm_neon.S |
-=================================================================== |
---- simd/jsimd_arm_neon.S (revision 272637) |
-+++ simd/jsimd_arm_neon.S (working copy) |
-@@ -41,11 +41,9 @@ |
- /* Supplementary macro for setting function attributes */ |
- .macro asm_function fname |
- #ifdef __APPLE__ |
-- .func _\fname |
- .globl _\fname |
- _\fname: |
- #else |
-- .func \fname |
- .global \fname |
- #ifdef __ELF__ |
- .hidden \fname |
-@@ -670,7 +668,6 @@ |
- .unreq ROW6R |
- .unreq ROW7L |
- .unreq ROW7R |
--.endfunc |
- |
- |
- /*****************************************************************************/ |
-@@ -895,7 +892,6 @@ |
- .unreq TMP2 |
- .unreq TMP3 |
- .unreq TMP4 |
--.endfunc |
- |
- |
- /*****************************************************************************/ |
-@@ -1108,7 +1104,6 @@ |
- .unreq TMP2 |
- .unreq TMP3 |
- .unreq TMP4 |
--.endfunc |
- |
- .purgem idct_helper |
- |
-@@ -1263,7 +1258,6 @@ |
- .unreq OUTPUT_COL |
- .unreq TMP1 |
- .unreq TMP2 |
--.endfunc |
- |
- .purgem idct_helper |
- |
-@@ -1547,7 +1541,6 @@ |
- .unreq U |
- .unreq V |
- .unreq N |
--.endfunc |
- |
- .purgem do_yuv_to_rgb |
- .purgem do_yuv_to_rgb_stage1 |
-@@ -1858,7 +1851,6 @@ |
- .unreq U |
- .unreq V |
- .unreq N |
--.endfunc |
- |
- .purgem do_rgb_to_yuv |
- .purgem do_rgb_to_yuv_stage1 |
-@@ -1940,7 +1932,6 @@ |
- .unreq TMP2 |
- .unreq TMP3 |
- .unreq TMP4 |
--.endfunc |
- |
- |
- /*****************************************************************************/ |
-@@ -2064,7 +2055,6 @@ |
- |
- .unreq DATA |
- .unreq TMP |
--.endfunc |
- |
- |
- /*****************************************************************************/ |
-@@ -2166,7 +2156,6 @@ |
- .unreq CORRECTION |
- .unreq SHIFT |
- .unreq LOOP_COUNT |
--.endfunc |
- |
- |
- /*****************************************************************************/ |
-@@ -2401,7 +2390,6 @@ |
- .unreq WIDTH |
- .unreq TMP |
- |
--.endfunc |
- |
- .purgem upsample16 |
- .purgem upsample32 |
-Index: simd/jsimd_i386.c |
-=================================================================== |
---- simd/jsimd_i386.c (revision 829) |
-+++ simd/jsimd_i386.c (working copy) |
-@@ -61,6 +61,7 @@ |
- simd_support &= JSIMD_SSE2; |
- } |
- |
-+#ifndef JPEG_DECODE_ONLY |
- GLOBAL(int) |
- jsimd_can_rgb_ycc (void) |
- { |
-@@ -82,6 +83,7 @@ |
- |
- return 0; |
- } |
-+#endif |
- |
- GLOBAL(int) |
- jsimd_can_rgb_gray (void) |
-@@ -127,6 +129,7 @@ |
- return 0; |
- } |
- |
-+#ifndef JPEG_DECODE_ONLY |
- GLOBAL(void) |
- jsimd_rgb_ycc_convert (j_compress_ptr cinfo, |
- JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
-@@ -179,6 +182,7 @@ |
- mmxfct(cinfo->image_width, input_buf, |
- output_buf, output_row, num_rows); |
- } |
-+#endif |
+@@ -538,7 +538,7 @@ |
+ %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr |
- GLOBAL(void) |
- jsimd_rgb_gray_convert (j_compress_ptr cinfo, |
-@@ -286,6 +290,7 @@ |
- input_row, output_buf, num_rows); |
- } |
+ align 16 |
+- global EXTN(jsimd_h2v1_upsample_sse2) |
++ global EXTN(jsimd_h2v1_upsample_sse2) PRIVATE |
-+#ifndef JPEG_DECODE_ONLY |
- GLOBAL(int) |
- jsimd_can_h2v2_downsample (void) |
- { |
-@@ -351,6 +356,7 @@ |
- compptr->v_samp_factor, compptr->width_in_blocks, |
- input_data, output_data); |
- } |
-+#endif |
+ EXTN(jsimd_h2v1_upsample_sse2): |
+ push ebp |
+@@ -637,7 +637,7 @@ |
+ %define output_data_ptr(b) (b)+20 ; JSAMPARRAY * output_data_ptr |
- GLOBAL(int) |
- jsimd_can_h2v2_upsample (void) |
-@@ -636,6 +642,7 @@ |
- in_row_group_ctr, output_buf); |
- } |
+ align 16 |
+- global EXTN(jsimd_h2v2_upsample_sse2) |
++ global EXTN(jsimd_h2v2_upsample_sse2) PRIVATE |
-+#ifndef JPEG_DECODE_ONLY |
- GLOBAL(int) |
- jsimd_can_convsamp (void) |
- { |
-@@ -855,6 +862,7 @@ |
- else if (simd_support & JSIMD_3DNOW) |
- jsimd_quantize_float_3dnow(coef_block, divisors, workspace); |
- } |
-+#endif |
+ EXTN(jsimd_h2v2_upsample_sse2): |
+ push ebp |
+@@ -724,3 +724,6 @@ |
+ pop ebp |
+ ret |
- GLOBAL(int) |
- jsimd_can_idct_2x2 (void) |
-@@ -1045,4 +1053,3 @@ |
- jsimd_idct_float_3dnow(compptr->dct_table, coef_block, |
- output_buf, output_col); |
- } |
-- |
-Index: simd/jcqnts2f-64.asm |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jf3dnflt.asm |
=================================================================== |
---- simd/jcqnts2f-64.asm (revision 829) |
-+++ simd/jcqnts2f-64.asm (working copy) |
-@@ -36,7 +36,7 @@ |
- ; r12 = FAST_FLOAT * workspace |
- |
- align 16 |
-- global EXTN(jsimd_convsamp_float_sse2) |
-+ global EXTN(jsimd_convsamp_float_sse2) PRIVATE |
+--- simd/jf3dnflt.asm (revision 829) |
++++ simd/jf3dnflt.asm (working copy) |
+@@ -27,7 +27,7 @@ |
+ SECTION SEG_CONST |
- EXTN(jsimd_convsamp_float_sse2): |
- push rbp |
-@@ -110,7 +110,7 @@ |
- ; r12 = FAST_FLOAT * workspace |
+ alignz 16 |
+- global EXTN(jconst_fdct_float_3dnow) |
++ global EXTN(jconst_fdct_float_3dnow) PRIVATE |
- align 16 |
-- global EXTN(jsimd_quantize_float_sse2) |
-+ global EXTN(jsimd_quantize_float_sse2) PRIVATE |
+ EXTN(jconst_fdct_float_3dnow): |
- EXTN(jsimd_quantize_float_sse2): |
- push rbp |
-Index: simd/jcqnt3dn.asm |
-=================================================================== |
---- simd/jcqnt3dn.asm (revision 829) |
-+++ simd/jcqnt3dn.asm (working copy) |
-@@ -35,7 +35,7 @@ |
- %define workspace ebp+16 ; FAST_FLOAT * workspace |
+@@ -55,7 +55,7 @@ |
+ %define WK_NUM 2 |
align 16 |
-- global EXTN(jsimd_convsamp_float_3dnow) |
-+ global EXTN(jsimd_convsamp_float_3dnow) PRIVATE |
+- global EXTN(jsimd_fdct_float_3dnow) |
++ global EXTN(jsimd_fdct_float_3dnow) PRIVATE |
- EXTN(jsimd_convsamp_float_3dnow): |
+ EXTN(jsimd_fdct_float_3dnow): |
push ebp |
-@@ -138,7 +138,7 @@ |
- %define workspace ebp+16 ; FAST_FLOAT * workspace |
- |
- align 16 |
-- global EXTN(jsimd_quantize_float_3dnow) |
-+ global EXTN(jsimd_quantize_float_3dnow) PRIVATE |
+@@ -315,3 +315,6 @@ |
+ pop ebp |
+ ret |
- EXTN(jsimd_quantize_float_3dnow): |
- push ebp |
-Index: simd/jcsamss2.asm |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jfmmxfst.asm |
=================================================================== |
---- simd/jcsamss2.asm (revision 829) |
-+++ simd/jcsamss2.asm (working copy) |
-@@ -40,7 +40,7 @@ |
- %define output_data(b) (b)+28 ; JSAMPARRAY output_data |
+--- simd/jfmmxfst.asm (revision 829) |
++++ simd/jfmmxfst.asm (working copy) |
+@@ -52,7 +52,7 @@ |
+ %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) |
- align 16 |
-- global EXTN(jsimd_h2v1_downsample_sse2) |
-+ global EXTN(jsimd_h2v1_downsample_sse2) PRIVATE |
+ alignz 16 |
+- global EXTN(jconst_fdct_ifast_mmx) |
++ global EXTN(jconst_fdct_ifast_mmx) PRIVATE |
- EXTN(jsimd_h2v1_downsample_sse2): |
- push ebp |
-@@ -195,7 +195,7 @@ |
- %define output_data(b) (b)+28 ; JSAMPARRAY output_data |
+ EXTN(jconst_fdct_ifast_mmx): |
+ |
+@@ -80,7 +80,7 @@ |
+ %define WK_NUM 2 |
align 16 |
-- global EXTN(jsimd_h2v2_downsample_sse2) |
-+ global EXTN(jsimd_h2v2_downsample_sse2) PRIVATE |
+- global EXTN(jsimd_fdct_ifast_mmx) |
++ global EXTN(jsimd_fdct_ifast_mmx) PRIVATE |
- EXTN(jsimd_h2v2_downsample_sse2): |
+ EXTN(jsimd_fdct_ifast_mmx): |
push ebp |
-Index: simd/jsimd_x86_64.c |
+@@ -392,3 +392,6 @@ |
+ pop ebp |
+ ret |
+ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jfmmxint.asm |
=================================================================== |
---- simd/jsimd_x86_64.c (revision 829) |
-+++ simd/jsimd_x86_64.c (working copy) |
-@@ -29,6 +29,7 @@ |
+--- simd/jfmmxint.asm (revision 829) |
++++ simd/jfmmxint.asm (working copy) |
+@@ -66,7 +66,7 @@ |
+ SECTION SEG_CONST |
- #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ |
+ alignz 16 |
+- global EXTN(jconst_fdct_islow_mmx) |
++ global EXTN(jconst_fdct_islow_mmx) PRIVATE |
-+#ifndef JPEG_DECODE_ONLY |
- GLOBAL(int) |
- jsimd_can_rgb_ycc (void) |
- { |
-@@ -45,6 +46,7 @@ |
+ EXTN(jconst_fdct_islow_mmx): |
- return 1; |
- } |
-+#endif |
+@@ -101,7 +101,7 @@ |
+ %define WK_NUM 2 |
- GLOBAL(int) |
- jsimd_can_rgb_gray (void) |
-@@ -80,6 +82,7 @@ |
- return 1; |
- } |
+ align 16 |
+- global EXTN(jsimd_fdct_islow_mmx) |
++ global EXTN(jsimd_fdct_islow_mmx) PRIVATE |
-+#ifndef JPEG_DECODE_ONLY |
- GLOBAL(void) |
- jsimd_rgb_ycc_convert (j_compress_ptr cinfo, |
- JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
-@@ -118,6 +121,7 @@ |
+ EXTN(jsimd_fdct_islow_mmx): |
+ push ebp |
+@@ -617,3 +617,6 @@ |
+ pop ebp |
+ ret |
- sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); |
- } |
-+#endif |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jfss2fst-64.asm |
+=================================================================== |
+--- simd/jfss2fst-64.asm (revision 829) |
++++ simd/jfss2fst-64.asm (working copy) |
+@@ -1,5 +1,5 @@ |
+ ; |
+-; jfss2fst.asm - fast integer FDCT (64-bit SSE2) |
++; jfss2fst-64.asm - fast integer FDCT (64-bit SSE2) |
+ ; |
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
+ ; Copyright 2009 D. R. Commander |
+@@ -53,7 +53,7 @@ |
+ %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) |
- GLOBAL(void) |
- jsimd_rgb_gray_convert (j_compress_ptr cinfo, |
-@@ -197,6 +201,7 @@ |
- sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); |
- } |
+ alignz 16 |
+- global EXTN(jconst_fdct_ifast_sse2) |
++ global EXTN(jconst_fdct_ifast_sse2) PRIVATE |
-+#ifndef JPEG_DECODE_ONLY |
- GLOBAL(int) |
- jsimd_can_h2v2_downsample (void) |
- { |
-@@ -242,6 +247,7 @@ |
- compptr->width_in_blocks, |
- input_data, output_data); |
- } |
-+#endif |
+ EXTN(jconst_fdct_ifast_sse2): |
- GLOBAL(int) |
- jsimd_can_h2v2_upsample (void) |
-@@ -451,6 +457,7 @@ |
- sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); |
- } |
+@@ -80,7 +80,7 @@ |
+ %define WK_NUM 2 |
-+#ifndef JPEG_DECODE_ONLY |
- GLOBAL(int) |
- jsimd_can_convsamp (void) |
- { |
-@@ -601,6 +608,7 @@ |
- { |
- jsimd_quantize_float_sse2(coef_block, divisors, workspace); |
- } |
-+#endif |
+ align 16 |
+- global EXTN(jsimd_fdct_ifast_sse2) |
++ global EXTN(jsimd_fdct_ifast_sse2) PRIVATE |
- GLOBAL(int) |
- jsimd_can_idct_2x2 (void) |
-@@ -750,4 +758,3 @@ |
- jsimd_idct_float_sse2(compptr->dct_table, coef_block, |
- output_buf, output_col); |
- } |
-- |
-Index: simd/jimmxint.asm |
+ EXTN(jsimd_fdct_ifast_sse2): |
+ push rbp |
+@@ -386,3 +386,7 @@ |
+ pop rsp ; rsp <- original rbp |
+ pop rbp |
+ ret |
++ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jfss2fst.asm |
=================================================================== |
---- simd/jimmxint.asm (revision 829) |
-+++ simd/jimmxint.asm (working copy) |
-@@ -66,7 +66,7 @@ |
- SECTION SEG_CONST |
+--- simd/jfss2fst.asm (revision 829) |
++++ simd/jfss2fst.asm (working copy) |
+@@ -52,7 +52,7 @@ |
+ %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) |
alignz 16 |
-- global EXTN(jconst_idct_islow_mmx) |
-+ global EXTN(jconst_idct_islow_mmx) PRIVATE |
+- global EXTN(jconst_fdct_ifast_sse2) |
++ global EXTN(jconst_fdct_ifast_sse2) PRIVATE |
- EXTN(jconst_idct_islow_mmx): |
+ EXTN(jconst_fdct_ifast_sse2): |
-@@ -107,7 +107,7 @@ |
- ; JCOEF workspace[DCTSIZE2] |
+@@ -80,7 +80,7 @@ |
+ %define WK_NUM 2 |
align 16 |
-- global EXTN(jsimd_idct_islow_mmx) |
-+ global EXTN(jsimd_idct_islow_mmx) PRIVATE |
+- global EXTN(jsimd_fdct_ifast_sse2) |
++ global EXTN(jsimd_fdct_ifast_sse2) PRIVATE |
- EXTN(jsimd_idct_islow_mmx): |
+ EXTN(jsimd_fdct_ifast_sse2): |
push ebp |
-Index: simd/jcgrymmx.asm |
+@@ -399,3 +399,6 @@ |
+ pop ebp |
+ ret |
+ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jfss2int-64.asm |
=================================================================== |
---- simd/jcgrymmx.asm (revision 829) |
-+++ simd/jcgrymmx.asm (working copy) |
-@@ -41,7 +41,7 @@ |
- %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr |
+--- simd/jfss2int-64.asm (revision 829) |
++++ simd/jfss2int-64.asm (working copy) |
+@@ -1,5 +1,5 @@ |
+ ; |
+-; jfss2int.asm - accurate integer FDCT (64-bit SSE2) |
++; jfss2int-64.asm - accurate integer FDCT (64-bit SSE2) |
+ ; |
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
+ ; Copyright 2009 D. R. Commander |
+@@ -67,7 +67,7 @@ |
+ SECTION SEG_CONST |
+ |
+ alignz 16 |
+- global EXTN(jconst_fdct_islow_sse2) |
++ global EXTN(jconst_fdct_islow_sse2) PRIVATE |
+ |
+ EXTN(jconst_fdct_islow_sse2): |
+ |
+@@ -101,7 +101,7 @@ |
+ %define WK_NUM 6 |
align 16 |
-- global EXTN(jsimd_rgb_gray_convert_mmx) |
-+ global EXTN(jsimd_rgb_gray_convert_mmx) PRIVATE |
+- global EXTN(jsimd_fdct_islow_sse2) |
++ global EXTN(jsimd_fdct_islow_sse2) PRIVATE |
- EXTN(jsimd_rgb_gray_convert_mmx): |
- push ebp |
+ EXTN(jsimd_fdct_islow_sse2): |
+ push rbp |
+@@ -616,3 +616,7 @@ |
+ pop rsp ; rsp <- original rbp |
+ pop rbp |
+ ret |
++ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
Index: simd/jfss2int.asm |
=================================================================== |
--- simd/jfss2int.asm (revision 829) |
@@ -1622,32 +14199,166 @@ Index: simd/jfss2int.asm |
EXTN(jsimd_fdct_islow_sse2): |
push ebp |
-Index: simd/jcgryss2.asm |
+@@ -629,3 +629,6 @@ |
+ pop ebp |
+ ret |
+ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jfsseflt-64.asm |
+=================================================================== |
+--- simd/jfsseflt-64.asm (revision 829) |
++++ simd/jfsseflt-64.asm (working copy) |
+@@ -1,5 +1,5 @@ |
+ ; |
+-; jfsseflt.asm - floating-point FDCT (64-bit SSE) |
++; jfsseflt-64.asm - floating-point FDCT (64-bit SSE) |
+ ; |
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
+ ; Copyright 2009 D. R. Commander |
+@@ -38,7 +38,7 @@ |
+ SECTION SEG_CONST |
+ |
+ alignz 16 |
+- global EXTN(jconst_fdct_float_sse) |
++ global EXTN(jconst_fdct_float_sse) PRIVATE |
+ |
+ EXTN(jconst_fdct_float_sse): |
+ |
+@@ -65,7 +65,7 @@ |
+ %define WK_NUM 2 |
+ |
+ align 16 |
+- global EXTN(jsimd_fdct_float_sse) |
++ global EXTN(jsimd_fdct_float_sse) PRIVATE |
+ |
+ EXTN(jsimd_fdct_float_sse): |
+ push rbp |
+@@ -352,3 +352,7 @@ |
+ pop rsp ; rsp <- original rbp |
+ pop rbp |
+ ret |
++ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jfsseflt.asm |
+=================================================================== |
+--- simd/jfsseflt.asm (revision 829) |
++++ simd/jfsseflt.asm (working copy) |
+@@ -37,7 +37,7 @@ |
+ SECTION SEG_CONST |
+ |
+ alignz 16 |
+- global EXTN(jconst_fdct_float_sse) |
++ global EXTN(jconst_fdct_float_sse) PRIVATE |
+ |
+ EXTN(jconst_fdct_float_sse): |
+ |
+@@ -65,7 +65,7 @@ |
+ %define WK_NUM 2 |
+ |
+ align 16 |
+- global EXTN(jsimd_fdct_float_sse) |
++ global EXTN(jsimd_fdct_float_sse) PRIVATE |
+ |
+ EXTN(jsimd_fdct_float_sse): |
+ push ebp |
+@@ -365,3 +365,6 @@ |
+ pop ebp |
+ ret |
+ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/ji3dnflt.asm |
=================================================================== |
---- simd/jcgryss2.asm (revision 829) |
-+++ simd/jcgryss2.asm (working copy) |
-@@ -39,7 +39,7 @@ |
+--- simd/ji3dnflt.asm (revision 829) |
++++ simd/ji3dnflt.asm (working copy) |
+@@ -27,7 +27,7 @@ |
+ SECTION SEG_CONST |
+ |
+ alignz 16 |
+- global EXTN(jconst_idct_float_3dnow) |
++ global EXTN(jconst_idct_float_3dnow) PRIVATE |
+ |
+ EXTN(jconst_idct_float_3dnow): |
+ |
+@@ -63,7 +63,7 @@ |
+ ; FAST_FLOAT workspace[DCTSIZE2] |
align 16 |
+- global EXTN(jsimd_idct_float_3dnow) |
++ global EXTN(jsimd_idct_float_3dnow) PRIVATE |
+ |
+ EXTN(jsimd_idct_float_3dnow): |
+ push ebp |
+@@ -447,3 +447,6 @@ |
+ pop ebp |
+ ret |
+ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jimmxfst.asm |
+=================================================================== |
+--- simd/jimmxfst.asm (revision 829) |
++++ simd/jimmxfst.asm (working copy) |
+@@ -59,7 +59,7 @@ |
+ %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) |
+ |
+ alignz 16 |
+- global EXTN(jconst_idct_ifast_mmx) |
++ global EXTN(jconst_idct_ifast_mmx) PRIVATE |
+ |
+ EXTN(jconst_idct_ifast_mmx): |
-- global EXTN(jsimd_rgb_gray_convert_sse2) |
-+ global EXTN(jsimd_rgb_gray_convert_sse2) PRIVATE |
+@@ -94,7 +94,7 @@ |
+ ; JCOEF workspace[DCTSIZE2] |
+ |
+ align 16 |
+- global EXTN(jsimd_idct_ifast_mmx) |
++ global EXTN(jsimd_idct_ifast_mmx) PRIVATE |
- EXTN(jsimd_rgb_gray_convert_sse2): |
+ EXTN(jsimd_idct_ifast_mmx): |
push ebp |
-Index: simd/jccolmmx.asm |
+@@ -495,3 +495,6 @@ |
+ pop ebp |
+ ret |
+ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jimmxint.asm |
=================================================================== |
---- simd/jccolmmx.asm (revision 829) |
-+++ simd/jccolmmx.asm (working copy) |
-@@ -37,7 +37,7 @@ |
+--- simd/jimmxint.asm (revision 829) |
++++ simd/jimmxint.asm (working copy) |
+@@ -66,7 +66,7 @@ |
SECTION SEG_CONST |
alignz 16 |
-- global EXTN(jconst_rgb_ycc_convert_mmx) |
-+ global EXTN(jconst_rgb_ycc_convert_mmx) PRIVATE |
+- global EXTN(jconst_idct_islow_mmx) |
++ global EXTN(jconst_idct_islow_mmx) PRIVATE |
- EXTN(jconst_rgb_ycc_convert_mmx): |
+ EXTN(jconst_idct_islow_mmx): |
+ |
+@@ -107,7 +107,7 @@ |
+ ; JCOEF workspace[DCTSIZE2] |
+ |
+ align 16 |
+- global EXTN(jsimd_idct_islow_mmx) |
++ global EXTN(jsimd_idct_islow_mmx) PRIVATE |
+ |
+ EXTN(jsimd_idct_islow_mmx): |
+ push ebp |
+@@ -847,3 +847,6 @@ |
+ pop ebp |
+ ret |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
Index: simd/jimmxred.asm |
=================================================================== |
--- simd/jimmxred.asm (revision 829) |
@@ -1679,144 +14390,186 @@ Index: simd/jimmxred.asm |
EXTN(jsimd_idct_2x2_mmx): |
push ebp |
-Index: simd/jsimdext.inc |
-=================================================================== |
---- simd/jsimdext.inc (revision 829) |
-+++ simd/jsimdext.inc (working copy) |
-@@ -73,6 +73,9 @@ |
- ; * *BSD family Unix using elf format |
- ; * Unix System V, including Solaris x86, UnixWare and SCO Unix |
- |
-+; PIC is the default on Linux |
-+%define PIC |
-+ |
- ; mark stack as non-executable |
- section .note.GNU-stack noalloc noexec nowrite progbits |
- |
-@@ -375,4 +378,14 @@ |
- ; |
- %include "jsimdcfg.inc" |
- |
-+; Begin chromium edits |
-+%ifdef MACHO ; ----(nasm -fmacho -DMACHO ...)-------- |
-+%define PRIVATE :private_extern |
-+%elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------ |
-+%define PRIVATE :hidden |
-+%else |
-+%define PRIVATE |
-+%endif |
-+; End chromium edits |
-+ |
- ; -------------------------------------------------------------------------- |
-Index: simd/jdclrmmx.asm |
-=================================================================== |
---- simd/jdclrmmx.asm (revision 829) |
-+++ simd/jdclrmmx.asm (working copy) |
-@@ -40,7 +40,7 @@ |
- %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr |
- |
- align 16 |
-- global EXTN(jsimd_ycc_rgb_convert_mmx) |
-+ global EXTN(jsimd_ycc_rgb_convert_mmx) PRIVATE |
+@@ -701,3 +701,6 @@ |
+ pop ebp |
+ ret |
- EXTN(jsimd_ycc_rgb_convert_mmx): |
- push ebp |
-Index: simd/jccolss2.asm |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jiss2flt-64.asm |
=================================================================== |
---- simd/jccolss2.asm (revision 829) |
-+++ simd/jccolss2.asm (working copy) |
-@@ -34,7 +34,7 @@ |
+--- simd/jiss2flt-64.asm (revision 829) |
++++ simd/jiss2flt-64.asm (working copy) |
+@@ -1,5 +1,5 @@ |
+ ; |
+-; jiss2flt.asm - floating-point IDCT (64-bit SSE & SSE2) |
++; jiss2flt-64.asm - floating-point IDCT (64-bit SSE & SSE2) |
+ ; |
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
+ ; Copyright 2009 D. R. Commander |
+@@ -38,7 +38,7 @@ |
SECTION SEG_CONST |
alignz 16 |
-- global EXTN(jconst_rgb_ycc_convert_sse2) |
-+ global EXTN(jconst_rgb_ycc_convert_sse2) PRIVATE |
+- global EXTN(jconst_idct_float_sse2) |
++ global EXTN(jconst_idct_float_sse2) PRIVATE |
- EXTN(jconst_rgb_ycc_convert_sse2): |
+ EXTN(jconst_idct_float_sse2): |
-Index: simd/jisseflt.asm |
+@@ -74,7 +74,7 @@ |
+ ; FAST_FLOAT workspace[DCTSIZE2] |
+ |
+ align 16 |
+- global EXTN(jsimd_idct_float_sse2) |
++ global EXTN(jsimd_idct_float_sse2) PRIVATE |
+ |
+ EXTN(jsimd_idct_float_sse2): |
+ push rbp |
+@@ -81,11 +81,11 @@ |
+ mov rax,rsp ; rax = original rbp |
+ sub rsp, byte 4 |
+ and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits |
+- mov [rsp],eax |
++ mov [rsp],rax |
+ mov rbp,rsp ; rbp = aligned rbp |
+ lea rsp, [workspace] |
++ collect_args |
+ push rbx |
+- collect_args |
+ |
+ ; ---- Pass 1: process columns from input, store into work array. |
+ |
+@@ -471,9 +471,13 @@ |
+ dec rcx ; ctr |
+ jnz near .rowloop |
+ |
++ pop rbx |
+ uncollect_args |
+- pop rbx |
+ mov rsp,rbp ; rsp <- aligned rbp |
+ pop rsp ; rsp <- original rbp |
+ pop rbp |
+ ret |
++ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jiss2flt.asm |
=================================================================== |
---- simd/jisseflt.asm (revision 829) |
-+++ simd/jisseflt.asm (working copy) |
+--- simd/jiss2flt.asm (revision 829) |
++++ simd/jiss2flt.asm (working copy) |
@@ -37,7 +37,7 @@ |
SECTION SEG_CONST |
alignz 16 |
-- global EXTN(jconst_idct_float_sse) |
-+ global EXTN(jconst_idct_float_sse) PRIVATE |
+- global EXTN(jconst_idct_float_sse2) |
++ global EXTN(jconst_idct_float_sse2) PRIVATE |
- EXTN(jconst_idct_float_sse): |
+ EXTN(jconst_idct_float_sse2): |
@@ -73,7 +73,7 @@ |
; FAST_FLOAT workspace[DCTSIZE2] |
align 16 |
-- global EXTN(jsimd_idct_float_sse) |
-+ global EXTN(jsimd_idct_float_sse) PRIVATE |
+- global EXTN(jsimd_idct_float_sse2) |
++ global EXTN(jsimd_idct_float_sse2) PRIVATE |
- EXTN(jsimd_idct_float_sse): |
+ EXTN(jsimd_idct_float_sse2): |
push ebp |
-Index: simd/jcqnts2i-64.asm |
+@@ -493,3 +493,6 @@ |
+ pop ebp |
+ ret |
+ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jiss2fst-64.asm |
=================================================================== |
---- simd/jcqnts2i-64.asm (revision 829) |
-+++ simd/jcqnts2i-64.asm (working copy) |
-@@ -36,7 +36,7 @@ |
- ; r12 = DCTELEM * workspace |
+--- simd/jiss2fst-64.asm (revision 829) |
++++ simd/jiss2fst-64.asm (working copy) |
+@@ -1,5 +1,5 @@ |
+ ; |
+-; jiss2fst.asm - fast integer IDCT (64-bit SSE2) |
++; jiss2fst-64.asm - fast integer IDCT (64-bit SSE2) |
+ ; |
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
+ ; Copyright 2009 D. R. Commander |
+@@ -60,7 +60,7 @@ |
+ %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) |
- align 16 |
-- global EXTN(jsimd_convsamp_sse2) |
-+ global EXTN(jsimd_convsamp_sse2) PRIVATE |
+ alignz 16 |
+- global EXTN(jconst_idct_ifast_sse2) |
++ global EXTN(jconst_idct_ifast_sse2) PRIVATE |
- EXTN(jsimd_convsamp_sse2): |
- push rbp |
-@@ -112,7 +112,7 @@ |
- ; r12 = DCTELEM * workspace |
+ EXTN(jconst_idct_ifast_sse2): |
+ |
+@@ -93,7 +93,7 @@ |
+ %define WK_NUM 2 |
align 16 |
-- global EXTN(jsimd_quantize_sse2) |
-+ global EXTN(jsimd_quantize_sse2) PRIVATE |
+- global EXTN(jsimd_idct_ifast_sse2) |
++ global EXTN(jsimd_idct_ifast_sse2) PRIVATE |
- EXTN(jsimd_quantize_sse2): |
+ EXTN(jsimd_idct_ifast_sse2): |
push rbp |
-Index: simd/jdclrss2.asm |
+@@ -100,7 +100,7 @@ |
+ mov rax,rsp ; rax = original rbp |
+ sub rsp, byte 4 |
+ and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits |
+- mov [rsp],eax |
++ mov [rsp],rax |
+ mov rbp,rsp ; rbp = aligned rbp |
+ lea rsp, [wk(0)] |
+ collect_args |
+@@ -486,3 +486,7 @@ |
+ pop rbp |
+ ret |
+ ret |
++ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jiss2fst.asm |
=================================================================== |
---- simd/jdclrss2.asm (revision 829) |
-+++ simd/jdclrss2.asm (working copy) |
-@@ -40,7 +40,7 @@ |
- %define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr |
- |
- align 16 |
-- global EXTN(jsimd_ycc_rgb_convert_sse2) |
-+ global EXTN(jsimd_ycc_rgb_convert_sse2) PRIVATE |
+--- simd/jiss2fst.asm (revision 829) |
++++ simd/jiss2fst.asm (working copy) |
+@@ -59,7 +59,7 @@ |
+ %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) |
- EXTN(jsimd_ycc_rgb_convert_sse2): |
- push ebp |
-Index: simd/jcqntsse.asm |
-=================================================================== |
---- simd/jcqntsse.asm (revision 829) |
-+++ simd/jcqntsse.asm (working copy) |
-@@ -35,7 +35,7 @@ |
- %define workspace ebp+16 ; FAST_FLOAT * workspace |
+ alignz 16 |
+- global EXTN(jconst_idct_ifast_sse2) |
++ global EXTN(jconst_idct_ifast_sse2) PRIVATE |
- align 16 |
-- global EXTN(jsimd_convsamp_float_sse) |
-+ global EXTN(jsimd_convsamp_float_sse) PRIVATE |
+ EXTN(jconst_idct_ifast_sse2): |
- EXTN(jsimd_convsamp_float_sse): |
- push ebp |
-@@ -138,7 +138,7 @@ |
- %define workspace ebp+16 ; FAST_FLOAT * workspace |
+@@ -92,7 +92,7 @@ |
+ %define WK_NUM 2 |
align 16 |
-- global EXTN(jsimd_quantize_float_sse) |
-+ global EXTN(jsimd_quantize_float_sse) PRIVATE |
+- global EXTN(jsimd_idct_ifast_sse2) |
++ global EXTN(jsimd_idct_ifast_sse2) PRIVATE |
- EXTN(jsimd_quantize_float_sse): |
+ EXTN(jsimd_idct_ifast_sse2): |
push ebp |
+@@ -497,3 +497,6 @@ |
+ pop ebp |
+ ret |
+ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
Index: simd/jiss2int-64.asm |
=================================================================== |
--- simd/jiss2int-64.asm (revision 829) |
+++ simd/jiss2int-64.asm (working copy) |
+@@ -1,5 +1,5 @@ |
+ ; |
+-; jiss2int.asm - accurate integer IDCT (64-bit SSE2) |
++; jiss2int-64.asm - accurate integer IDCT (64-bit SSE2) |
+ ; |
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
+ ; Copyright 2009 D. R. Commander |
@@ -67,7 +67,7 @@ |
SECTION SEG_CONST |
@@ -1835,279 +14588,507 @@ Index: simd/jiss2int-64.asm |
EXTN(jsimd_idct_islow_sse2): |
push rbp |
-Index: simd/jfmmxfst.asm |
+@@ -842,3 +842,7 @@ |
+ pop rsp ; rsp <- original rbp |
+ pop rbp |
+ ret |
++ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jiss2int.asm |
=================================================================== |
---- simd/jfmmxfst.asm (revision 829) |
-+++ simd/jfmmxfst.asm (working copy) |
-@@ -52,7 +52,7 @@ |
- %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) |
+--- simd/jiss2int.asm (revision 829) |
++++ simd/jiss2int.asm (working copy) |
+@@ -66,7 +66,7 @@ |
+ SECTION SEG_CONST |
alignz 16 |
-- global EXTN(jconst_fdct_ifast_mmx) |
-+ global EXTN(jconst_fdct_ifast_mmx) PRIVATE |
+- global EXTN(jconst_idct_islow_sse2) |
++ global EXTN(jconst_idct_islow_sse2) PRIVATE |
- EXTN(jconst_fdct_ifast_mmx): |
+ EXTN(jconst_idct_islow_sse2): |
-@@ -80,7 +80,7 @@ |
- %define WK_NUM 2 |
+@@ -105,7 +105,7 @@ |
+ %define WK_NUM 12 |
align 16 |
-- global EXTN(jsimd_fdct_ifast_mmx) |
-+ global EXTN(jsimd_fdct_ifast_mmx) PRIVATE |
+- global EXTN(jsimd_idct_islow_sse2) |
++ global EXTN(jsimd_idct_islow_sse2) PRIVATE |
- EXTN(jsimd_fdct_ifast_mmx): |
+ EXTN(jsimd_idct_islow_sse2): |
push ebp |
-Index: jdarith.c |
-=================================================================== |
---- jdarith.c (revision 829) |
-+++ jdarith.c (working copy) |
-@@ -150,8 +150,8 @@ |
- */ |
- sv = *st; |
- qe = jpeg_aritab[sv & 0x7F]; /* => Qe_Value */ |
-- nl = qe & 0xFF; qe >>= 8; /* Next_Index_LPS + Switch_MPS */ |
-- nm = qe & 0xFF; qe >>= 8; /* Next_Index_MPS */ |
-+ nl = (unsigned char) qe & 0xFF; qe >>= 8; /* Next_Index_LPS + Switch_MPS */ |
-+ nm = (unsigned char) qe & 0xFF; qe >>= 8; /* Next_Index_MPS */ |
- |
- /* Decode & estimation procedures per sections D.2.4 & D.2.5 */ |
- temp = e->a - qe; |
-Index: jdhuff.c |
+@@ -854,3 +854,6 @@ |
+ pop ebp |
+ ret |
+ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jiss2red-64.asm |
=================================================================== |
---- jdhuff.c (revision 1541) |
-+++ jdhuff.c (working copy) |
-@@ -662,7 +662,7 @@ |
- d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn]; |
- register int s, k, r, l; |
+--- simd/jiss2red-64.asm (revision 829) |
++++ simd/jiss2red-64.asm (working copy) |
+@@ -1,5 +1,5 @@ |
+ ; |
+-; jiss2red.asm - reduced-size IDCT (64-bit SSE2) |
++; jiss2red-64.asm - reduced-size IDCT (64-bit SSE2) |
+ ; |
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
+ ; Copyright 2009 D. R. Commander |
+@@ -73,7 +73,7 @@ |
+ SECTION SEG_CONST |
-- HUFF_DECODE_FAST(s, l, dctbl); |
-+ HUFF_DECODE_FAST(s, l, dctbl, slow_decode_mcu); |
- if (s) { |
- FILL_BIT_BUFFER_FAST |
- r = GET_BITS(s); |
-@@ -679,7 +679,7 @@ |
- if (entropy->ac_needed[blkn]) { |
- |
- for (k = 1; k < DCTSIZE2; k++) { |
-- HUFF_DECODE_FAST(s, l, actbl); |
-+ HUFF_DECODE_FAST(s, l, actbl, slow_decode_mcu); |
- r = s >> 4; |
- s &= 15; |
- |
-@@ -698,7 +698,7 @@ |
- } else { |
+ alignz 16 |
+- global EXTN(jconst_idct_red_sse2) |
++ global EXTN(jconst_idct_red_sse2) PRIVATE |
- for (k = 1; k < DCTSIZE2; k++) { |
-- HUFF_DECODE_FAST(s, l, actbl); |
-+ HUFF_DECODE_FAST(s, l, actbl, slow_decode_mcu); |
- r = s >> 4; |
- s &= 15; |
+ EXTN(jconst_idct_red_sse2): |
-@@ -715,6 +715,7 @@ |
- } |
+@@ -114,7 +114,7 @@ |
+ %define WK_NUM 2 |
- if (cinfo->unread_marker != 0) { |
-+slow_decode_mcu: |
- cinfo->unread_marker = 0; |
- return FALSE; |
- } |
-@@ -742,7 +743,7 @@ |
- * this module, since we'll just re-assign them on the next call.) |
- */ |
+ align 16 |
+- global EXTN(jsimd_idct_4x4_sse2) |
++ global EXTN(jsimd_idct_4x4_sse2) PRIVATE |
--#define BUFSIZE (DCTSIZE2 * 2) |
-+#define BUFSIZE (DCTSIZE2 * 2u) |
+ EXTN(jsimd_idct_4x4_sse2): |
+ push rbp |
+@@ -121,7 +121,7 @@ |
+ mov rax,rsp ; rax = original rbp |
+ sub rsp, byte 4 |
+ and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits |
+- mov [rsp],eax |
++ mov [rsp],rax |
+ mov rbp,rsp ; rbp = aligned rbp |
+ lea rsp, [wk(0)] |
+ collect_args |
+@@ -413,13 +413,14 @@ |
+ ; r13 = JDIMENSION output_col |
- METHODDEF(boolean) |
- decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) |
-Index: jdhuff.h |
+ align 16 |
+- global EXTN(jsimd_idct_2x2_sse2) |
++ global EXTN(jsimd_idct_2x2_sse2) PRIVATE |
+ |
+ EXTN(jsimd_idct_2x2_sse2): |
+ push rbp |
++ mov rax,rsp |
+ mov rbp,rsp |
++ collect_args |
+ push rbx |
+- collect_args |
+ |
+ ; ---- Pass 1: process columns from input. |
+ |
+@@ -565,7 +566,11 @@ |
+ mov WORD [rdx+rax*SIZEOF_JSAMPLE], bx |
+ mov WORD [rsi+rax*SIZEOF_JSAMPLE], cx |
+ |
++ pop rbx |
+ uncollect_args |
+- pop rbx |
+ pop rbp |
+ ret |
++ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jiss2red.asm |
=================================================================== |
---- jdhuff.h (revision 1541) |
-+++ jdhuff.h (working copy) |
-@@ -208,7 +208,7 @@ |
- } \ |
- } |
+--- simd/jiss2red.asm (revision 829) |
++++ simd/jiss2red.asm (working copy) |
+@@ -72,7 +72,7 @@ |
+ SECTION SEG_CONST |
--#define HUFF_DECODE_FAST(s,nb,htbl) \ |
-+#define HUFF_DECODE_FAST(s,nb,htbl,slowlabel) \ |
- FILL_BIT_BUFFER_FAST; \ |
- s = PEEK_BITS(HUFF_LOOKAHEAD); \ |
- s = htbl->lookup[s]; \ |
-@@ -225,7 +225,9 @@ |
- s |= GET_BITS(1); \ |
- nb++; \ |
- } \ |
-- s = htbl->pub->huffval[ (int) (s + htbl->valoffset[nb]) & 0xFF ]; \ |
-+ if (nb > 16) \ |
-+ goto slowlabel; \ |
-+ s = htbl->pub->huffval[ (int) (s + htbl->valoffset[nb]) ]; \ |
- } |
+ alignz 16 |
+- global EXTN(jconst_idct_red_sse2) |
++ global EXTN(jconst_idct_red_sse2) PRIVATE |
- /* Out-of-line case for Huffman code fetching */ |
+ EXTN(jconst_idct_red_sse2): |
-Index: jchuff.c |
-=================================================================== |
---- jchuff.c (revision 1219) |
-+++ jchuff.c (revision 1220) |
-@@ -22,8 +22,36 @@ |
- #include "jchuff.h" /* Declarations shared with jcphuff.c */ |
- #include <limits.h> |
+@@ -113,7 +113,7 @@ |
+ %define WK_NUM 2 |
-+/* |
-+ * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be |
-+ * used for bit counting rather than the lookup table. This will reduce the |
-+ * memory footprint by 64k, which is important for some mobile applications |
-+ * that create many isolated instances of libjpeg-turbo (web browsers, for |
-+ * instance.) This may improve performance on some mobile platforms as well. |
-+ * This feature is enabled by default only on ARM processors, because some x86 |
-+ * chips have a slow implementation of bsr, and the use of clz/bsr cannot be |
-+ * shown to have a significant performance impact even on the x86 chips that |
-+ * have a fast implementation of it. When building for ARMv6, you can |
-+ * explicitly disable the use of clz/bsr by adding -mthumb to the compiler |
-+ * flags (this defines __thumb__). |
-+ */ |
-+ |
-+/* NOTE: Both GCC and Clang define __GNUC__ */ |
-+#if defined __GNUC__ && defined __arm__ |
-+#if !defined __thumb__ || defined __thumb2__ |
-+#define USE_CLZ_INTRINSIC |
-+#endif |
-+#endif |
-+ |
-+#ifdef USE_CLZ_INTRINSIC |
-+#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x)) |
-+#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0) |
-+#else |
- static unsigned char jpeg_nbits_table[65536]; |
- static int jpeg_nbits_table_init = 0; |
-+#define JPEG_NBITS(x) (jpeg_nbits_table[x]) |
-+#define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x) |
-+#endif |
+ align 16 |
+- global EXTN(jsimd_idct_4x4_sse2) |
++ global EXTN(jsimd_idct_4x4_sse2) PRIVATE |
- #ifndef min |
- #define min(a,b) ((a)<(b)?(a):(b)) |
-@@ -272,6 +300,7 @@ |
- dtbl->ehufsi[i] = huffsize[p]; |
- } |
+ EXTN(jsimd_idct_4x4_sse2): |
+ push ebp |
+@@ -424,7 +424,7 @@ |
+ %define output_col(b) (b)+20 ; JDIMENSION output_col |
+ |
+ align 16 |
+- global EXTN(jsimd_idct_2x2_sse2) |
++ global EXTN(jsimd_idct_2x2_sse2) PRIVATE |
+ |
+ EXTN(jsimd_idct_2x2_sse2): |
+ push ebp |
+@@ -589,3 +589,6 @@ |
+ pop ebp |
+ ret |
+ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jisseflt.asm |
+=================================================================== |
+--- simd/jisseflt.asm (revision 829) |
++++ simd/jisseflt.asm (working copy) |
+@@ -37,7 +37,7 @@ |
+ SECTION SEG_CONST |
-+#ifndef USE_CLZ_INTRINSIC |
- if(!jpeg_nbits_table_init) { |
- for(i = 0; i < 65536; i++) { |
- int nbits = 0, temp = i; |
-@@ -280,6 +309,7 @@ |
- } |
- jpeg_nbits_table_init = 1; |
- } |
-+#endif |
- } |
+ alignz 16 |
+- global EXTN(jconst_idct_float_sse) |
++ global EXTN(jconst_idct_float_sse) PRIVATE |
+ EXTN(jconst_idct_float_sse): |
-@@ -482,7 +512,7 @@ |
- temp2 += temp3; |
+@@ -73,7 +73,7 @@ |
+ ; FAST_FLOAT workspace[DCTSIZE2] |
- /* Find the number of bits needed for the magnitude of the coefficient */ |
-- nbits = jpeg_nbits_table[temp]; |
-+ nbits = JPEG_NBITS(temp); |
+ align 16 |
+- global EXTN(jsimd_idct_float_sse) |
++ global EXTN(jsimd_idct_float_sse) PRIVATE |
- /* Emit the Huffman-coded symbol for the number of bits */ |
- code = dctbl->ehufco[nbits]; |
-@@ -516,7 +546,7 @@ |
- temp ^= temp3; \ |
- temp -= temp3; \ |
- temp2 += temp3; \ |
-- nbits = jpeg_nbits_table[temp]; \ |
-+ nbits = JPEG_NBITS_NONZERO(temp); \ |
- /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \ |
- while (r > 15) { \ |
- EMIT_BITS(code_0xf0, size_0xf0) \ |
-Index: simd/jsimd_arm64.c |
+ EXTN(jsimd_idct_float_sse): |
+ push ebp |
+@@ -567,3 +567,6 @@ |
+ pop ebp |
+ ret |
+ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jsimd.h |
=================================================================== |
---- /dev/null |
-+++ simd/jsimd_arm64.c |
-@@ -0,0 +1,544 @@ |
-+/* |
-+ * jsimd_arm64.c |
-+ * |
-+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
-+ * Copyright 2009-2011, 2013-2014 D. R. Commander |
-+ * |
-+ * Based on the x86 SIMD extension for IJG JPEG library, |
-+ * Copyright (C) 1999-2006, MIYASAKA Masaru. |
+--- simd/jsimd.h (revision 829) |
++++ simd/jsimd.h (working copy) |
+@@ -2,19 +2,22 @@ |
+ * simd/jsimd.h |
+ * |
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
++ * Copyright 2011 D. R. Commander |
+ * |
+ * Based on the x86 SIMD extension for IJG JPEG library, |
+ * Copyright (C) 1999-2006, MIYASAKA Masaru. |
+ * For conditions of distribution and use, see copyright notice in jsimdext.inc |
-+ * |
-+ * This file contains the interface between the "normal" portions |
-+ * of the library and the SIMD implementations when running on a |
-+ * 64-bit ARM architecture. |
-+ */ |
-+ |
-+#define JPEG_INTERNALS |
-+#include "../jinclude.h" |
-+#include "../jpeglib.h" |
-+#include "../jsimd.h" |
-+#include "../jdct.h" |
-+#include "../jsimddct.h" |
-+#include "jsimd.h" |
-+ |
-+#include <stdio.h> |
-+#include <string.h> |
-+#include <ctype.h> |
-+ |
-+static unsigned int simd_support = ~0; |
-+ |
-+/* |
-+ * Check what SIMD accelerations are supported. |
-+ * |
-+ * FIXME: This code is racy under a multi-threaded environment. |
-+ */ |
+ * |
+ */ |
+ |
+ /* Bitmask for supported acceleration methods */ |
+ |
+-#define JSIMD_NONE 0x00 |
+-#define JSIMD_MMX 0x01 |
+-#define JSIMD_3DNOW 0x02 |
+-#define JSIMD_SSE 0x04 |
+-#define JSIMD_SSE2 0x08 |
++#define JSIMD_NONE 0x00 |
++#define JSIMD_MMX 0x01 |
++#define JSIMD_3DNOW 0x02 |
++#define JSIMD_SSE 0x04 |
++#define JSIMD_SSE2 0x08 |
++#define JSIMD_ARM_NEON 0x10 |
+ |
+ /* Short forms of external names for systems with brain-damaged linkers. */ |
+ |
+@@ -27,6 +30,13 @@ |
+ #define jsimd_extbgrx_ycc_convert_mmx jSEXTBGRXYCCM |
+ #define jsimd_extxbgr_ycc_convert_mmx jSEXTXBGRYCCM |
+ #define jsimd_extxrgb_ycc_convert_mmx jSEXTXRGBYCCM |
++#define jsimd_rgb_gray_convert_mmx jSRGBGRYM |
++#define jsimd_extrgb_gray_convert_mmx jSEXTRGBGRYM |
++#define jsimd_extrgbx_gray_convert_mmx jSEXTRGBXGRYM |
++#define jsimd_extbgr_gray_convert_mmx jSEXTBGRGRYM |
++#define jsimd_extbgrx_gray_convert_mmx jSEXTBGRXGRYM |
++#define jsimd_extxbgr_gray_convert_mmx jSEXTXBGRGRYM |
++#define jsimd_extxrgb_gray_convert_mmx jSEXTXRGBGRYM |
+ #define jsimd_ycc_rgb_convert_mmx jSYCCRGBM |
+ #define jsimd_ycc_extrgb_convert_mmx jSYCCEXTRGBM |
+ #define jsimd_ycc_extrgbx_convert_mmx jSYCCEXTRGBXM |
+@@ -42,6 +52,14 @@ |
+ #define jsimd_extbgrx_ycc_convert_sse2 jSEXTBGRXYCCS2 |
+ #define jsimd_extxbgr_ycc_convert_sse2 jSEXTXBGRYCCS2 |
+ #define jsimd_extxrgb_ycc_convert_sse2 jSEXTXRGBYCCS2 |
++#define jconst_rgb_gray_convert_sse2 jSCRGBGRYS2 |
++#define jsimd_rgb_gray_convert_sse2 jSRGBGRYS2 |
++#define jsimd_extrgb_gray_convert_sse2 jSEXTRGBGRYS2 |
++#define jsimd_extrgbx_gray_convert_sse2 jSEXTRGBXGRYS2 |
++#define jsimd_extbgr_gray_convert_sse2 jSEXTBGRGRYS2 |
++#define jsimd_extbgrx_gray_convert_sse2 jSEXTBGRXGRYS2 |
++#define jsimd_extxbgr_gray_convert_sse2 jSEXTXBGRGRYS2 |
++#define jsimd_extxrgb_gray_convert_sse2 jSEXTXRGBGRYS2 |
+ #define jconst_ycc_rgb_convert_sse2 jSCYCCRGBS2 |
+ #define jsimd_ycc_rgb_convert_sse2 jSYCCRGBS2 |
+ #define jsimd_ycc_extrgb_convert_sse2 jSYCCEXTRGBS2 |
+@@ -162,6 +180,35 @@ |
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
+ JDIMENSION output_row, int num_rows)); |
+ |
++EXTERN(void) jsimd_rgb_gray_convert_mmx |
++ JPP((JDIMENSION img_width, |
++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
++ JDIMENSION output_row, int num_rows)); |
++EXTERN(void) jsimd_extrgb_gray_convert_mmx |
++ JPP((JDIMENSION img_width, |
++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
++ JDIMENSION output_row, int num_rows)); |
++EXTERN(void) jsimd_extrgbx_gray_convert_mmx |
++ JPP((JDIMENSION img_width, |
++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
++ JDIMENSION output_row, int num_rows)); |
++EXTERN(void) jsimd_extbgr_gray_convert_mmx |
++ JPP((JDIMENSION img_width, |
++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
++ JDIMENSION output_row, int num_rows)); |
++EXTERN(void) jsimd_extbgrx_gray_convert_mmx |
++ JPP((JDIMENSION img_width, |
++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
++ JDIMENSION output_row, int num_rows)); |
++EXTERN(void) jsimd_extxbgr_gray_convert_mmx |
++ JPP((JDIMENSION img_width, |
++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
++ JDIMENSION output_row, int num_rows)); |
++EXTERN(void) jsimd_extxrgb_gray_convert_mmx |
++ JPP((JDIMENSION img_width, |
++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
++ JDIMENSION output_row, int num_rows)); |
+ |
-+/* |
-+ * ARMv8 architectures support NEON extensions by default. |
-+ * It is no longer optional as it was with ARMv7. |
-+ */ |
+ EXTERN(void) jsimd_ycc_rgb_convert_mmx |
+ JPP((JDIMENSION out_width, |
+ JSAMPIMAGE input_buf, JDIMENSION input_row, |
+@@ -221,6 +268,36 @@ |
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
+ JDIMENSION output_row, int num_rows)); |
+ |
++extern const int jconst_rgb_gray_convert_sse2[]; |
++EXTERN(void) jsimd_rgb_gray_convert_sse2 |
++ JPP((JDIMENSION img_width, |
++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
++ JDIMENSION output_row, int num_rows)); |
++EXTERN(void) jsimd_extrgb_gray_convert_sse2 |
++ JPP((JDIMENSION img_width, |
++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
++ JDIMENSION output_row, int num_rows)); |
++EXTERN(void) jsimd_extrgbx_gray_convert_sse2 |
++ JPP((JDIMENSION img_width, |
++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
++ JDIMENSION output_row, int num_rows)); |
++EXTERN(void) jsimd_extbgr_gray_convert_sse2 |
++ JPP((JDIMENSION img_width, |
++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
++ JDIMENSION output_row, int num_rows)); |
++EXTERN(void) jsimd_extbgrx_gray_convert_sse2 |
++ JPP((JDIMENSION img_width, |
++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
++ JDIMENSION output_row, int num_rows)); |
++EXTERN(void) jsimd_extxbgr_gray_convert_sse2 |
++ JPP((JDIMENSION img_width, |
++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
++ JDIMENSION output_row, int num_rows)); |
++EXTERN(void) jsimd_extxrgb_gray_convert_sse2 |
++ JPP((JDIMENSION img_width, |
++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
++ JDIMENSION output_row, int num_rows)); |
+ |
+ extern const int jconst_ycc_rgb_convert_sse2[]; |
+ EXTERN(void) jsimd_ycc_rgb_convert_sse2 |
+ JPP((JDIMENSION out_width, |
+@@ -251,6 +328,64 @@ |
+ JSAMPIMAGE input_buf, JDIMENSION input_row, |
+ JSAMPARRAY output_buf, int num_rows)); |
+ |
++EXTERN(void) jsimd_rgb_ycc_convert_neon |
++ JPP((JDIMENSION img_width, |
++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
++ JDIMENSION output_row, int num_rows)); |
++EXTERN(void) jsimd_extrgb_ycc_convert_neon |
++ JPP((JDIMENSION img_width, |
++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
++ JDIMENSION output_row, int num_rows)); |
++EXTERN(void) jsimd_extrgbx_ycc_convert_neon |
++ JPP((JDIMENSION img_width, |
++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
++ JDIMENSION output_row, int num_rows)); |
++EXTERN(void) jsimd_extbgr_ycc_convert_neon |
++ JPP((JDIMENSION img_width, |
++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
++ JDIMENSION output_row, int num_rows)); |
++EXTERN(void) jsimd_extbgrx_ycc_convert_neon |
++ JPP((JDIMENSION img_width, |
++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
++ JDIMENSION output_row, int num_rows)); |
++EXTERN(void) jsimd_extxbgr_ycc_convert_neon |
++ JPP((JDIMENSION img_width, |
++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
++ JDIMENSION output_row, int num_rows)); |
++EXTERN(void) jsimd_extxrgb_ycc_convert_neon |
++ JPP((JDIMENSION img_width, |
++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
++ JDIMENSION output_row, int num_rows)); |
+ |
-+LOCAL(void) |
-+init_simd (void) |
-+{ |
-+ char *env = NULL; |
++EXTERN(void) jsimd_ycc_rgb_convert_neon |
++ JPP((JDIMENSION out_width, |
++ JSAMPIMAGE input_buf, JDIMENSION input_row, |
++ JSAMPARRAY output_buf, int num_rows)); |
++EXTERN(void) jsimd_ycc_extrgb_convert_neon |
++ JPP((JDIMENSION out_width, |
++ JSAMPIMAGE input_buf, JDIMENSION input_row, |
++ JSAMPARRAY output_buf, int num_rows)); |
++EXTERN(void) jsimd_ycc_extrgbx_convert_neon |
++ JPP((JDIMENSION out_width, |
++ JSAMPIMAGE input_buf, JDIMENSION input_row, |
++ JSAMPARRAY output_buf, int num_rows)); |
++EXTERN(void) jsimd_ycc_extbgr_convert_neon |
++ JPP((JDIMENSION out_width, |
++ JSAMPIMAGE input_buf, JDIMENSION input_row, |
++ JSAMPARRAY output_buf, int num_rows)); |
++EXTERN(void) jsimd_ycc_extbgrx_convert_neon |
++ JPP((JDIMENSION out_width, |
++ JSAMPIMAGE input_buf, JDIMENSION input_row, |
++ JSAMPARRAY output_buf, int num_rows)); |
++EXTERN(void) jsimd_ycc_extxbgr_convert_neon |
++ JPP((JDIMENSION out_width, |
++ JSAMPIMAGE input_buf, JDIMENSION input_row, |
++ JSAMPARRAY output_buf, int num_rows)); |
++EXTERN(void) jsimd_ycc_extxrgb_convert_neon |
++ JPP((JDIMENSION out_width, |
++ JSAMPIMAGE input_buf, JDIMENSION input_row, |
++ JSAMPARRAY output_buf, int num_rows)); |
+ |
-+ if (simd_support != ~0U) |
-+ return; |
+ /* SIMD Downsample */ |
+ EXTERN(void) jsimd_h2v2_downsample_mmx |
+ JPP((JDIMENSION image_width, int max_v_samp_factor, |
+@@ -387,6 +522,10 @@ |
+ JPP((JDIMENSION output_width, JSAMPIMAGE input_buf, |
+ JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)); |
+ |
++EXTERN(void) jsimd_h2v1_fancy_upsample_neon |
++ JPP((int max_v_samp_factor, JDIMENSION downsampled_width, |
++ JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)); |
+ |
-+ simd_support = 0; |
+ /* SIMD Sample Conversion */ |
+ EXTERN(void) jsimd_convsamp_mmx JPP((JSAMPARRAY sample_data, |
+ JDIMENSION start_col, |
+@@ -396,6 +535,10 @@ |
+ JDIMENSION start_col, |
+ DCTELEM * workspace)); |
+ |
++EXTERN(void) jsimd_convsamp_neon JPP((JSAMPARRAY sample_data, |
++ JDIMENSION start_col, |
++ DCTELEM * workspace)); |
+ |
-+ simd_support |= JSIMD_ARM_NEON; |
+ EXTERN(void) jsimd_convsamp_float_3dnow JPP((JSAMPARRAY sample_data, |
+ JDIMENSION start_col, |
+ FAST_FLOAT * workspace)); |
+@@ -417,6 +560,8 @@ |
+ extern const int jconst_fdct_islow_sse2[]; |
+ EXTERN(void) jsimd_fdct_ifast_sse2 JPP((DCTELEM * data)); |
+ |
++EXTERN(void) jsimd_fdct_ifast_neon JPP((DCTELEM * data)); |
+ |
-+ /* Force different settings through environment variables */ |
-+ env = getenv("JSIMD_FORCENEON"); |
-+ if ((env != NULL) && (strcmp(env, "1") == 0)) |
-+ simd_support &= JSIMD_ARM_NEON; |
-+ env = getenv("JSIMD_FORCENONE"); |
-+ if ((env != NULL) && (strcmp(env, "1") == 0)) |
-+ simd_support = 0; |
-+} |
+ EXTERN(void) jsimd_fdct_float_3dnow JPP((FAST_FLOAT * data)); |
+ |
+ extern const int jconst_fdct_float_sse[]; |
+@@ -431,6 +576,10 @@ |
+ DCTELEM * divisors, |
+ DCTELEM * workspace)); |
+ |
++EXTERN(void) jsimd_quantize_neon JPP((JCOEFPTR coef_block, |
++ DCTELEM * divisors, |
++ DCTELEM * workspace)); |
+ |
-+GLOBAL(int) |
-+jsimd_can_rgb_ycc (void) |
-+{ |
-+ init_simd(); |
+ EXTERN(void) jsimd_quantize_float_3dnow JPP((JCOEFPTR coef_block, |
+ FAST_FLOAT * divisors, |
+ FAST_FLOAT * workspace)); |
+@@ -463,6 +612,15 @@ |
+ JSAMPARRAY output_buf, |
+ JDIMENSION output_col)); |
+ |
++EXTERN(void) jsimd_idct_2x2_neon JPP((void * dct_table, |
++ JCOEFPTR coef_block, |
++ JSAMPARRAY output_buf, |
++ JDIMENSION output_col)); |
++EXTERN(void) jsimd_idct_4x4_neon JPP((void * dct_table, |
++ JCOEFPTR coef_block, |
++ JSAMPARRAY output_buf, |
++ JDIMENSION output_col)); |
+ |
-+ return 0; |
-+} |
+ /* SIMD Inverse DCT */ |
+ EXTERN(void) jsimd_idct_islow_mmx JPP((void * dct_table, |
+ JCOEFPTR coef_block, |
+@@ -484,6 +642,15 @@ |
+ JSAMPARRAY output_buf, |
+ JDIMENSION output_col)); |
+ |
++EXTERN(void) jsimd_idct_islow_neon JPP((void * dct_table, |
++ JCOEFPTR coef_block, |
++ JSAMPARRAY output_buf, |
++ JDIMENSION output_col)); |
++EXTERN(void) jsimd_idct_ifast_neon JPP((void * dct_table, |
++ JCOEFPTR coef_block, |
++ JSAMPARRAY output_buf, |
++ JDIMENSION output_col)); |
+ |
-+GLOBAL(int) |
+ EXTERN(void) jsimd_idct_float_3dnow JPP((void * dct_table, |
+ JCOEFPTR coef_block, |
+ JSAMPARRAY output_buf, |
+Index: simd/jsimd_i386.c |
+=================================================================== |
+--- simd/jsimd_i386.c (revision 829) |
++++ simd/jsimd_i386.c (working copy) |
+@@ -2,10 +2,11 @@ |
+ * jsimd_i386.c |
+ * |
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
+- * Copyright 2009 D. R. Commander |
++ * Copyright 2009-2011 D. R. Commander |
+ * |
+ * Based on the x86 SIMD extension for IJG JPEG library, |
+ * Copyright (C) 1999-2006, MIYASAKA Masaru. |
++ * For conditions of distribution and use, see copyright notice in jsimdext.inc |
+ * |
+ * This file contains the interface between the "normal" portions |
+ * of the library and the SIMD implementations when running on a |
+@@ -40,7 +41,7 @@ |
+ { |
+ char *env = NULL; |
+ |
+- if (simd_support != ~0) |
++ if (simd_support != ~0U) |
+ return; |
+ |
+ simd_support = jpeg_simd_cpu_support(); |
+@@ -51,15 +52,16 @@ |
+ simd_support &= JSIMD_MMX; |
+ env = getenv("JSIMD_FORCE3DNOW"); |
+ if ((env != NULL) && (strcmp(env, "1") == 0)) |
+- simd_support &= JSIMD_3DNOW; |
++ simd_support &= JSIMD_3DNOW|JSIMD_MMX; |
+ env = getenv("JSIMD_FORCESSE"); |
+ if ((env != NULL) && (strcmp(env, "1") == 0)) |
+- simd_support &= JSIMD_SSE; |
++ simd_support &= JSIMD_SSE|JSIMD_MMX; |
+ env = getenv("JSIMD_FORCESSE2"); |
+ if ((env != NULL) && (strcmp(env, "1") == 0)) |
+ simd_support &= JSIMD_SSE2; |
+ } |
+ |
++#ifndef JPEG_DECODE_ONLY |
+ GLOBAL(int) |
+ jsimd_can_rgb_ycc (void) |
+ { |
+@@ -81,8 +83,31 @@ |
+ |
+ return 0; |
+ } |
++#endif |
+ |
+ GLOBAL(int) |
+jsimd_can_rgb_gray (void) |
+{ |
+ init_simd(); |
+ |
-+ return 0; |
-+} |
-+ |
-+GLOBAL(int) |
-+jsimd_can_ycc_rgb (void) |
-+{ |
-+ init_simd(); |
-+ |
+ /* The code is optimised for these values only */ |
+ if (BITS_IN_JSAMPLE != 8) |
+ return 0; |
@@ -2116,2323 +15097,2031 @@ Index: simd/jsimd_arm64.c |
+ if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) |
+ return 0; |
+ |
-+ if (simd_support & JSIMD_ARM_NEON) |
++ if ((simd_support & JSIMD_SSE2) && |
++ IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) |
++ return 1; |
++ if (simd_support & JSIMD_MMX) |
+ return 1; |
+ |
+ return 0; |
+} |
+ |
+GLOBAL(int) |
-+jsimd_can_ycc_rgb565 (void) |
+ jsimd_can_ycc_rgb (void) |
+ { |
+ init_simd(); |
+@@ -104,6 +129,7 @@ |
+ return 0; |
+ } |
+ |
++#ifndef JPEG_DECODE_ONLY |
+ GLOBAL(void) |
+ jsimd_rgb_ycc_convert (j_compress_ptr cinfo, |
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
+@@ -119,6 +145,7 @@ |
+ mmxfct=jsimd_extrgb_ycc_convert_mmx; |
+ break; |
+ case JCS_EXT_RGBX: |
++ case JCS_EXT_RGBA: |
+ sse2fct=jsimd_extrgbx_ycc_convert_sse2; |
+ mmxfct=jsimd_extrgbx_ycc_convert_mmx; |
+ break; |
+@@ -127,14 +154,17 @@ |
+ mmxfct=jsimd_extbgr_ycc_convert_mmx; |
+ break; |
+ case JCS_EXT_BGRX: |
++ case JCS_EXT_BGRA: |
+ sse2fct=jsimd_extbgrx_ycc_convert_sse2; |
+ mmxfct=jsimd_extbgrx_ycc_convert_mmx; |
+ break; |
+ case JCS_EXT_XBGR: |
++ case JCS_EXT_ABGR: |
+ sse2fct=jsimd_extxbgr_ycc_convert_sse2; |
+ mmxfct=jsimd_extxbgr_ycc_convert_mmx; |
+ break; |
+ case JCS_EXT_XRGB: |
++ case JCS_EXT_ARGB: |
+ sse2fct=jsimd_extxrgb_ycc_convert_sse2; |
+ mmxfct=jsimd_extxrgb_ycc_convert_mmx; |
+ break; |
+@@ -152,8 +182,62 @@ |
+ mmxfct(cinfo->image_width, input_buf, |
+ output_buf, output_row, num_rows); |
+ } |
++#endif |
+ |
+ GLOBAL(void) |
++jsimd_rgb_gray_convert (j_compress_ptr cinfo, |
++ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
++ JDIMENSION output_row, int num_rows) |
+{ |
-+ init_simd(); |
++ void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
++ void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
++ |
++ switch(cinfo->in_color_space) |
++ { |
++ case JCS_EXT_RGB: |
++ sse2fct=jsimd_extrgb_gray_convert_sse2; |
++ mmxfct=jsimd_extrgb_gray_convert_mmx; |
++ break; |
++ case JCS_EXT_RGBX: |
++ case JCS_EXT_RGBA: |
++ sse2fct=jsimd_extrgbx_gray_convert_sse2; |
++ mmxfct=jsimd_extrgbx_gray_convert_mmx; |
++ break; |
++ case JCS_EXT_BGR: |
++ sse2fct=jsimd_extbgr_gray_convert_sse2; |
++ mmxfct=jsimd_extbgr_gray_convert_mmx; |
++ break; |
++ case JCS_EXT_BGRX: |
++ case JCS_EXT_BGRA: |
++ sse2fct=jsimd_extbgrx_gray_convert_sse2; |
++ mmxfct=jsimd_extbgrx_gray_convert_mmx; |
++ break; |
++ case JCS_EXT_XBGR: |
++ case JCS_EXT_ABGR: |
++ sse2fct=jsimd_extxbgr_gray_convert_sse2; |
++ mmxfct=jsimd_extxbgr_gray_convert_mmx; |
++ break; |
++ case JCS_EXT_XRGB: |
++ case JCS_EXT_ARGB: |
++ sse2fct=jsimd_extxrgb_gray_convert_sse2; |
++ mmxfct=jsimd_extxrgb_gray_convert_mmx; |
++ break; |
++ default: |
++ sse2fct=jsimd_rgb_gray_convert_sse2; |
++ mmxfct=jsimd_rgb_gray_convert_mmx; |
++ break; |
++ } |
++ |
++ if ((simd_support & JSIMD_SSE2) && |
++ IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) |
++ sse2fct(cinfo->image_width, input_buf, |
++ output_buf, output_row, num_rows); |
++ else if (simd_support & JSIMD_MMX) |
++ mmxfct(cinfo->image_width, input_buf, |
++ output_buf, output_row, num_rows); |
++} |
+ |
++GLOBAL(void) |
+ jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, |
+ JSAMPIMAGE input_buf, JDIMENSION input_row, |
+ JSAMPARRAY output_buf, int num_rows) |
+@@ -168,6 +252,7 @@ |
+ mmxfct=jsimd_ycc_extrgb_convert_mmx; |
+ break; |
+ case JCS_EXT_RGBX: |
++ case JCS_EXT_RGBA: |
+ sse2fct=jsimd_ycc_extrgbx_convert_sse2; |
+ mmxfct=jsimd_ycc_extrgbx_convert_mmx; |
+ break; |
+@@ -176,14 +261,17 @@ |
+ mmxfct=jsimd_ycc_extbgr_convert_mmx; |
+ break; |
+ case JCS_EXT_BGRX: |
++ case JCS_EXT_BGRA: |
+ sse2fct=jsimd_ycc_extbgrx_convert_sse2; |
+ mmxfct=jsimd_ycc_extbgrx_convert_mmx; |
+ break; |
+ case JCS_EXT_XBGR: |
++ case JCS_EXT_ABGR: |
+ sse2fct=jsimd_ycc_extxbgr_convert_sse2; |
+ mmxfct=jsimd_ycc_extxbgr_convert_mmx; |
+ break; |
+ case JCS_EXT_XRGB: |
++ case JCS_EXT_ARGB: |
+ sse2fct=jsimd_ycc_extxrgb_convert_sse2; |
+ mmxfct=jsimd_ycc_extxrgb_convert_mmx; |
+ break; |
+@@ -202,6 +290,7 @@ |
+ input_row, output_buf, num_rows); |
+ } |
+ |
++#ifndef JPEG_DECODE_ONLY |
+ GLOBAL(int) |
+ jsimd_can_h2v2_downsample (void) |
+ { |
+@@ -267,6 +356,7 @@ |
+ compptr->v_samp_factor, compptr->width_in_blocks, |
+ input_data, output_data); |
+ } |
++#endif |
+ |
+ GLOBAL(int) |
+ jsimd_can_h2v2_upsample (void) |
+@@ -382,7 +472,7 @@ |
+ { |
+ if ((simd_support & JSIMD_SSE2) && |
+ IS_ALIGNED_SSE(jconst_fancy_upsample_sse2)) |
+- jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, |
++ jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, |
+ compptr->downsampled_width, input_data, output_data_ptr); |
+ else if (simd_support & JSIMD_MMX) |
+ jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor, |
+@@ -460,6 +550,7 @@ |
+ mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx; |
+ break; |
+ case JCS_EXT_RGBX: |
++ case JCS_EXT_RGBA: |
+ sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2; |
+ mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx; |
+ break; |
+@@ -468,14 +559,17 @@ |
+ mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx; |
+ break; |
+ case JCS_EXT_BGRX: |
++ case JCS_EXT_BGRA: |
+ sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2; |
+ mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx; |
+ break; |
+ case JCS_EXT_XBGR: |
++ case JCS_EXT_ABGR: |
+ sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2; |
+ mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx; |
+ break; |
+ case JCS_EXT_XRGB: |
++ case JCS_EXT_ARGB: |
+ sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2; |
+ mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx; |
+ break; |
+@@ -510,6 +604,7 @@ |
+ mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx; |
+ break; |
+ case JCS_EXT_RGBX: |
++ case JCS_EXT_RGBA: |
+ sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2; |
+ mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx; |
+ break; |
+@@ -518,14 +613,17 @@ |
+ mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx; |
+ break; |
+ case JCS_EXT_BGRX: |
++ case JCS_EXT_BGRA: |
+ sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2; |
+ mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx; |
+ break; |
+ case JCS_EXT_XBGR: |
++ case JCS_EXT_ABGR: |
+ sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2; |
+ mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx; |
+ break; |
+ case JCS_EXT_XRGB: |
++ case JCS_EXT_ARGB: |
+ sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2; |
+ mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx; |
+ break; |
+@@ -544,6 +642,7 @@ |
+ in_row_group_ctr, output_buf); |
+ } |
+ |
++#ifndef JPEG_DECODE_ONLY |
+ GLOBAL(int) |
+ jsimd_can_convsamp (void) |
+ { |
+@@ -763,6 +862,7 @@ |
+ else if (simd_support & JSIMD_3DNOW) |
+ jsimd_quantize_float_3dnow(coef_block, divisors, workspace); |
+ } |
++#endif |
+ |
+ GLOBAL(int) |
+ jsimd_can_idct_2x2 (void) |
+@@ -953,4 +1053,3 @@ |
+ jsimd_idct_float_3dnow(compptr->dct_table, coef_block, |
+ output_buf, output_col); |
+ } |
+- |
+Index: simd/jsimd_x86_64.c |
+=================================================================== |
+--- simd/jsimd_x86_64.c (revision 829) |
++++ simd/jsimd_x86_64.c (working copy) |
+@@ -2,10 +2,11 @@ |
+ * jsimd_x86_64.c |
+ * |
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
+- * Copyright 2009 D. R. Commander |
++ * Copyright 2009-2011 D. R. Commander |
+ * |
+ * Based on the x86 SIMD extension for IJG JPEG library, |
+ * Copyright (C) 1999-2006, MIYASAKA Masaru. |
++ * For conditions of distribution and use, see copyright notice in jsimdext.inc |
+ * |
+ * This file contains the interface between the "normal" portions |
+ * of the library and the SIMD implementations when running on a |
+@@ -18,16 +19,17 @@ |
+ #include "../jsimd.h" |
+ #include "../jdct.h" |
+ #include "../jsimddct.h" |
+-#include "simd/jsimd.h" |
++#include "jsimd.h" |
+ |
+ /* |
+ * In the PIC cases, we have no guarantee that constants will keep |
+ * their alignment. This macro allows us to verify it at runtime. |
+ */ |
+-#define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0) |
++#define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0) |
+ |
+ #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */ |
+ |
++#ifndef JPEG_DECODE_ONLY |
+ GLOBAL(int) |
+ jsimd_can_rgb_ycc (void) |
+ { |
+@@ -44,8 +46,26 @@ |
+ |
+ return 1; |
+ } |
++#endif |
+ |
+ GLOBAL(int) |
++jsimd_can_rgb_gray (void) |
++{ |
+ /* The code is optimised for these values only */ |
+ if (BITS_IN_JSAMPLE != 8) |
+ return 0; |
+ if (sizeof(JDIMENSION) != 4) |
+ return 0; |
++ if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) |
++ return 0; |
+ |
-+ if (simd_support & JSIMD_ARM_NEON) |
-+ return 1; |
-+ |
-+ return 0; |
-+} |
++ if (!IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2)) |
++ return 0; |
+ |
-+GLOBAL(void) |
-+jsimd_rgb_ycc_convert (j_compress_ptr cinfo, |
-+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
-+ JDIMENSION output_row, int num_rows) |
-+{ |
++ return 1; |
+} |
+ |
-+GLOBAL(void) |
++GLOBAL(int) |
+ jsimd_can_ycc_rgb (void) |
+ { |
+ /* The code is optimised for these values only */ |
+@@ -62,6 +82,7 @@ |
+ return 1; |
+ } |
+ |
++#ifndef JPEG_DECODE_ONLY |
+ GLOBAL(void) |
+ jsimd_rgb_ycc_convert (j_compress_ptr cinfo, |
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
+@@ -75,6 +96,7 @@ |
+ sse2fct=jsimd_extrgb_ycc_convert_sse2; |
+ break; |
+ case JCS_EXT_RGBX: |
++ case JCS_EXT_RGBA: |
+ sse2fct=jsimd_extrgbx_ycc_convert_sse2; |
+ break; |
+ case JCS_EXT_BGR: |
+@@ -81,12 +103,15 @@ |
+ sse2fct=jsimd_extbgr_ycc_convert_sse2; |
+ break; |
+ case JCS_EXT_BGRX: |
++ case JCS_EXT_BGRA: |
+ sse2fct=jsimd_extbgrx_ycc_convert_sse2; |
+ break; |
+ case JCS_EXT_XBGR: |
++ case JCS_EXT_ABGR: |
+ sse2fct=jsimd_extxbgr_ycc_convert_sse2; |
+ break; |
+ case JCS_EXT_XRGB: |
++ case JCS_EXT_ARGB: |
+ sse2fct=jsimd_extxrgb_ycc_convert_sse2; |
+ break; |
+ default: |
+@@ -96,8 +121,48 @@ |
+ |
+ sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); |
+ } |
++#endif |
+ |
+ GLOBAL(void) |
+jsimd_rgb_gray_convert (j_compress_ptr cinfo, |
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf, |
+ JDIMENSION output_row, int num_rows) |
+{ |
-+} |
-+ |
-+GLOBAL(void) |
-+jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, |
-+ JSAMPIMAGE input_buf, JDIMENSION input_row, |
-+ JSAMPARRAY output_buf, int num_rows) |
-+{ |
-+ void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); |
++ void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
+ |
-+ switch(cinfo->out_color_space) { |
++ switch(cinfo->in_color_space) |
++ { |
+ case JCS_EXT_RGB: |
-+ neonfct=jsimd_ycc_extrgb_convert_neon; |
++ sse2fct=jsimd_extrgb_gray_convert_sse2; |
+ break; |
+ case JCS_EXT_RGBX: |
+ case JCS_EXT_RGBA: |
-+ neonfct=jsimd_ycc_extrgbx_convert_neon; |
++ sse2fct=jsimd_extrgbx_gray_convert_sse2; |
+ break; |
+ case JCS_EXT_BGR: |
-+ neonfct=jsimd_ycc_extbgr_convert_neon; |
++ sse2fct=jsimd_extbgr_gray_convert_sse2; |
+ break; |
+ case JCS_EXT_BGRX: |
+ case JCS_EXT_BGRA: |
-+ neonfct=jsimd_ycc_extbgrx_convert_neon; |
++ sse2fct=jsimd_extbgrx_gray_convert_sse2; |
+ break; |
+ case JCS_EXT_XBGR: |
+ case JCS_EXT_ABGR: |
-+ neonfct=jsimd_ycc_extxbgr_convert_neon; |
++ sse2fct=jsimd_extxbgr_gray_convert_sse2; |
+ break; |
+ case JCS_EXT_XRGB: |
+ case JCS_EXT_ARGB: |
-+ neonfct=jsimd_ycc_extxrgb_convert_neon; |
++ sse2fct=jsimd_extxrgb_gray_convert_sse2; |
+ break; |
+ default: |
-+ neonfct=jsimd_ycc_extrgb_convert_neon; |
++ sse2fct=jsimd_rgb_gray_convert_sse2; |
+ break; |
+ } |
+ |
-+ if (simd_support & JSIMD_ARM_NEON) |
-+ neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); |
-+} |
-+ |
-+GLOBAL(void) |
-+jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, |
-+ JSAMPIMAGE input_buf, JDIMENSION input_row, |
-+ JSAMPARRAY output_buf, int num_rows) |
-+{ |
-+ if (simd_support & JSIMD_ARM_NEON) |
-+ jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row, |
-+ output_buf, num_rows); |
-+} |
-+ |
-+GLOBAL(int) |
-+jsimd_can_h2v2_downsample (void) |
-+{ |
-+ init_simd(); |
-+ |
-+ return 0; |
-+} |
-+ |
-+GLOBAL(int) |
-+jsimd_can_h2v1_downsample (void) |
-+{ |
-+ init_simd(); |
-+ |
-+ return 0; |
-+} |
-+ |
-+GLOBAL(void) |
-+jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, |
-+ JSAMPARRAY input_data, JSAMPARRAY output_data) |
-+{ |
-+} |
-+ |
-+GLOBAL(void) |
-+jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, |
-+ JSAMPARRAY input_data, JSAMPARRAY output_data) |
-+{ |
-+} |
-+ |
-+GLOBAL(int) |
-+jsimd_can_h2v2_upsample (void) |
-+{ |
-+ init_simd(); |
-+ |
-+ return 0; |
-+} |
-+ |
-+GLOBAL(int) |
-+jsimd_can_h2v1_upsample (void) |
-+{ |
-+ init_simd(); |
-+ |
-+ return 0; |
-+} |
-+ |
-+GLOBAL(void) |
-+jsimd_h2v2_upsample (j_decompress_ptr cinfo, |
-+ jpeg_component_info * compptr, |
-+ JSAMPARRAY input_data, |
-+ JSAMPARRAY * output_data_ptr) |
-+{ |
-+} |
-+ |
-+GLOBAL(void) |
-+jsimd_h2v1_upsample (j_decompress_ptr cinfo, |
-+ jpeg_component_info * compptr, |
-+ JSAMPARRAY input_data, |
-+ JSAMPARRAY * output_data_ptr) |
-+{ |
-+} |
-+ |
-+GLOBAL(int) |
-+jsimd_can_h2v2_fancy_upsample (void) |
-+{ |
-+ init_simd(); |
-+ |
-+ return 0; |
-+} |
-+ |
-+GLOBAL(int) |
-+jsimd_can_h2v1_fancy_upsample (void) |
-+{ |
-+ init_simd(); |
-+ |
-+ return 0; |
-+} |
-+ |
-+GLOBAL(void) |
-+jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, |
-+ jpeg_component_info * compptr, |
-+ JSAMPARRAY input_data, |
-+ JSAMPARRAY * output_data_ptr) |
-+{ |
-+} |
-+ |
-+GLOBAL(void) |
-+jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, |
-+ jpeg_component_info * compptr, |
-+ JSAMPARRAY input_data, |
-+ JSAMPARRAY * output_data_ptr) |
-+{ |
-+} |
-+ |
-+GLOBAL(int) |
-+jsimd_can_h2v2_merged_upsample (void) |
-+{ |
-+ init_simd(); |
-+ |
-+ return 0; |
-+} |
-+ |
-+GLOBAL(int) |
-+jsimd_can_h2v1_merged_upsample (void) |
-+{ |
-+ init_simd(); |
-+ |
-+ return 0; |
-+} |
-+ |
-+GLOBAL(void) |
-+jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, |
-+ JSAMPIMAGE input_buf, |
-+ JDIMENSION in_row_group_ctr, |
-+ JSAMPARRAY output_buf) |
-+{ |
-+} |
-+ |
-+GLOBAL(void) |
-+jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, |
-+ JSAMPIMAGE input_buf, |
-+ JDIMENSION in_row_group_ctr, |
-+ JSAMPARRAY output_buf) |
-+{ |
-+} |
-+ |
-+GLOBAL(int) |
-+jsimd_can_convsamp (void) |
-+{ |
-+ init_simd(); |
-+ |
-+ return 0; |
-+} |
-+ |
-+GLOBAL(int) |
-+jsimd_can_convsamp_float (void) |
-+{ |
-+ init_simd(); |
-+ |
-+ return 0; |
-+} |
-+ |
-+GLOBAL(void) |
-+jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, |
-+ DCTELEM * workspace) |
-+{ |
++ sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); |
+} |
+ |
+GLOBAL(void) |
-+jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, |
-+ FAST_FLOAT * workspace) |
-+{ |
-+} |
-+ |
-+GLOBAL(int) |
-+jsimd_can_fdct_islow (void) |
-+{ |
-+ init_simd(); |
+ jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, |
+ JSAMPIMAGE input_buf, JDIMENSION input_row, |
+ JSAMPARRAY output_buf, int num_rows) |
+@@ -110,6 +175,7 @@ |
+ sse2fct=jsimd_ycc_extrgb_convert_sse2; |
+ break; |
+ case JCS_EXT_RGBX: |
++ case JCS_EXT_RGBA: |
+ sse2fct=jsimd_ycc_extrgbx_convert_sse2; |
+ break; |
+ case JCS_EXT_BGR: |
+@@ -116,12 +182,15 @@ |
+ sse2fct=jsimd_ycc_extbgr_convert_sse2; |
+ break; |
+ case JCS_EXT_BGRX: |
++ case JCS_EXT_BGRA: |
+ sse2fct=jsimd_ycc_extbgrx_convert_sse2; |
+ break; |
+ case JCS_EXT_XBGR: |
++ case JCS_EXT_ABGR: |
+ sse2fct=jsimd_ycc_extxbgr_convert_sse2; |
+ break; |
+ case JCS_EXT_XRGB: |
++ case JCS_EXT_ARGB: |
+ sse2fct=jsimd_ycc_extxrgb_convert_sse2; |
+ break; |
+ default: |
+@@ -132,6 +201,7 @@ |
+ sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); |
+ } |
+ |
++#ifndef JPEG_DECODE_ONLY |
+ GLOBAL(int) |
+ jsimd_can_h2v2_downsample (void) |
+ { |
+@@ -177,6 +247,7 @@ |
+ compptr->width_in_blocks, |
+ input_data, output_data); |
+ } |
++#endif |
+ |
+ GLOBAL(int) |
+ jsimd_can_h2v2_upsample (void) |
+@@ -260,7 +331,7 @@ |
+ JSAMPARRAY input_data, |
+ JSAMPARRAY * output_data_ptr) |
+ { |
+- jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor, |
++ jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor, |
+ compptr->downsampled_width, |
+ input_data, output_data_ptr); |
+ } |
+@@ -320,6 +391,7 @@ |
+ sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2; |
+ break; |
+ case JCS_EXT_RGBX: |
++ case JCS_EXT_RGBA: |
+ sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2; |
+ break; |
+ case JCS_EXT_BGR: |
+@@ -326,12 +398,15 @@ |
+ sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2; |
+ break; |
+ case JCS_EXT_BGRX: |
++ case JCS_EXT_BGRA: |
+ sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2; |
+ break; |
+ case JCS_EXT_XBGR: |
++ case JCS_EXT_ABGR: |
+ sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2; |
+ break; |
+ case JCS_EXT_XRGB: |
++ case JCS_EXT_ARGB: |
+ sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2; |
+ break; |
+ default: |
+@@ -356,6 +431,7 @@ |
+ sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2; |
+ break; |
+ case JCS_EXT_RGBX: |
++ case JCS_EXT_RGBA: |
+ sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2; |
+ break; |
+ case JCS_EXT_BGR: |
+@@ -362,12 +438,15 @@ |
+ sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2; |
+ break; |
+ case JCS_EXT_BGRX: |
++ case JCS_EXT_BGRA: |
+ sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2; |
+ break; |
+ case JCS_EXT_XBGR: |
++ case JCS_EXT_ABGR: |
+ sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2; |
+ break; |
+ case JCS_EXT_XRGB: |
++ case JCS_EXT_ARGB: |
+ sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2; |
+ break; |
+ default: |
+@@ -378,6 +457,7 @@ |
+ sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf); |
+ } |
+ |
++#ifndef JPEG_DECODE_ONLY |
+ GLOBAL(int) |
+ jsimd_can_convsamp (void) |
+ { |
+@@ -528,6 +608,7 @@ |
+ { |
+ jsimd_quantize_float_sse2(coef_block, divisors, workspace); |
+ } |
++#endif |
+ |
+ GLOBAL(int) |
+ jsimd_can_idct_2x2 (void) |
+@@ -677,4 +758,3 @@ |
+ jsimd_idct_float_sse2(compptr->dct_table, coef_block, |
+ output_buf, output_col); |
+ } |
+- |
+Index: simd/jsimdcfg.inc.h |
+=================================================================== |
+--- simd/jsimdcfg.inc.h (revision 829) |
++++ simd/jsimdcfg.inc.h (working copy) |
+@@ -15,26 +15,54 @@ |
+ #include "../jmorecfg.h" |
+ #include "jsimd.h" |
+ |
+-#define define(var) %define _cpp_protection_##var |
+-#define definev(var) %define _cpp_protection_##var var |
+- |
+ ; |
+ ; -- jpeglib.h |
+ ; |
+ |
+-definev(DCTSIZE) |
+-definev(DCTSIZE2) |
++%define _cpp_protection_DCTSIZE DCTSIZE |
++%define _cpp_protection_DCTSIZE2 DCTSIZE2 |
+ |
+ ; |
+ ; -- jmorecfg.h |
+ ; |
+ |
+-definev(RGB_RED) |
+-definev(RGB_GREEN) |
+-definev(RGB_BLUE) |
++%define _cpp_protection_RGB_RED RGB_RED |
++%define _cpp_protection_RGB_GREEN RGB_GREEN |
++%define _cpp_protection_RGB_BLUE RGB_BLUE |
++%define _cpp_protection_RGB_PIXELSIZE RGB_PIXELSIZE |
+ |
+-definev(RGB_PIXELSIZE) |
++%define _cpp_protection_EXT_RGB_RED EXT_RGB_RED |
++%define _cpp_protection_EXT_RGB_GREEN EXT_RGB_GREEN |
++%define _cpp_protection_EXT_RGB_BLUE EXT_RGB_BLUE |
++%define _cpp_protection_EXT_RGB_PIXELSIZE EXT_RGB_PIXELSIZE |
+ |
++%define _cpp_protection_EXT_RGBX_RED EXT_RGBX_RED |
++%define _cpp_protection_EXT_RGBX_GREEN EXT_RGBX_GREEN |
++%define _cpp_protection_EXT_RGBX_BLUE EXT_RGBX_BLUE |
++%define _cpp_protection_EXT_RGBX_PIXELSIZE EXT_RGBX_PIXELSIZE |
+ |
-+ return 0; |
-+} |
++%define _cpp_protection_EXT_BGR_RED EXT_BGR_RED |
++%define _cpp_protection_EXT_BGR_GREEN EXT_BGR_GREEN |
++%define _cpp_protection_EXT_BGR_BLUE EXT_BGR_BLUE |
++%define _cpp_protection_EXT_BGR_PIXELSIZE EXT_BGR_PIXELSIZE |
+ |
-+GLOBAL(int) |
-+jsimd_can_fdct_ifast (void) |
-+{ |
-+ init_simd(); |
++%define _cpp_protection_EXT_BGRX_RED EXT_BGRX_RED |
++%define _cpp_protection_EXT_BGRX_GREEN EXT_BGRX_GREEN |
++%define _cpp_protection_EXT_BGRX_BLUE EXT_BGRX_BLUE |
++%define _cpp_protection_EXT_BGRX_PIXELSIZE EXT_BGRX_PIXELSIZE |
+ |
-+ return 0; |
-+} |
++%define _cpp_protection_EXT_XBGR_RED EXT_XBGR_RED |
++%define _cpp_protection_EXT_XBGR_GREEN EXT_XBGR_GREEN |
++%define _cpp_protection_EXT_XBGR_BLUE EXT_XBGR_BLUE |
++%define _cpp_protection_EXT_XBGR_PIXELSIZE EXT_XBGR_PIXELSIZE |
+ |
-+GLOBAL(int) |
-+jsimd_can_fdct_float (void) |
-+{ |
-+ init_simd(); |
++%define _cpp_protection_EXT_XRGB_RED EXT_XRGB_RED |
++%define _cpp_protection_EXT_XRGB_GREEN EXT_XRGB_GREEN |
++%define _cpp_protection_EXT_XRGB_BLUE EXT_XRGB_BLUE |
++%define _cpp_protection_EXT_XRGB_PIXELSIZE EXT_XRGB_PIXELSIZE |
+ |
-+ return 0; |
-+} |
++%define RGBX_FILLER_0XFF 1 |
+ |
-+GLOBAL(void) |
-+jsimd_fdct_islow (DCTELEM * data) |
-+{ |
-+} |
+ ; Representation of a single sample (pixel element value). |
+ ; On this SIMD implementation, this must be 'unsigned char'. |
+ ; |
+@@ -42,7 +70,7 @@ |
+ %define JSAMPLE byte ; unsigned char |
+ %define SIZEOF_JSAMPLE SIZEOF_BYTE ; sizeof(JSAMPLE) |
+ |
+-definev(CENTERJSAMPLE) |
++%define _cpp_protection_CENTERJSAMPLE CENTERJSAMPLE |
+ |
+ ; Representation of a DCT frequency coefficient. |
+ ; On this SIMD implementation, this must be 'short'. |
+@@ -95,74 +123,74 @@ |
+ ; -- jsimd.h |
+ ; |
+ |
+-definev(JSIMD_NONE) |
+-definev(JSIMD_MMX) |
+-definev(JSIMD_3DNOW) |
+-definev(JSIMD_SSE) |
+-definev(JSIMD_SSE2) |
++%define _cpp_protection_JSIMD_NONE JSIMD_NONE |
++%define _cpp_protection_JSIMD_MMX JSIMD_MMX |
++%define _cpp_protection_JSIMD_3DNOW JSIMD_3DNOW |
++%define _cpp_protection_JSIMD_SSE JSIMD_SSE |
++%define _cpp_protection_JSIMD_SSE2 JSIMD_SSE2 |
+ |
+ ; Short forms of external names for systems with brain-damaged linkers. |
+ ; |
+ #ifdef NEED_SHORT_EXTERNAL_NAMES |
+-definev(jpeg_simd_cpu_support) |
+-definev(jsimd_rgb_ycc_convert_mmx) |
+-definev(jsimd_ycc_rgb_convert_mmx) |
+-definev(jconst_rgb_ycc_convert_sse2) |
+-definev(jsimd_rgb_ycc_convert_sse2) |
+-definev(jconst_ycc_rgb_convert_sse2) |
+-definev(jsimd_ycc_rgb_convert_sse2) |
+-definev(jsimd_h2v2_downsample_mmx) |
+-definev(jsimd_h2v1_downsample_mmx) |
+-definev(jsimd_h2v2_downsample_sse2) |
+-definev(jsimd_h2v1_downsample_sse2) |
+-definev(jsimd_h2v2_upsample_mmx) |
+-definev(jsimd_h2v1_upsample_mmx) |
+-definev(jsimd_h2v1_fancy_upsample_mmx) |
+-definev(jsimd_h2v2_fancy_upsample_mmx) |
+-definev(jsimd_h2v1_merged_upsample_mmx) |
+-definev(jsimd_h2v2_merged_upsample_mmx) |
+-definev(jsimd_h2v2_upsample_sse2) |
+-definev(jsimd_h2v1_upsample_sse2) |
+-definev(jconst_fancy_upsample_sse2) |
+-definev(jsimd_h2v1_fancy_upsample_sse2) |
+-definev(jsimd_h2v2_fancy_upsample_sse2) |
+-definev(jconst_merged_upsample_sse2) |
+-definev(jsimd_h2v1_merged_upsample_sse2) |
+-definev(jsimd_h2v2_merged_upsample_sse2) |
+-definev(jsimd_convsamp_mmx) |
+-definev(jsimd_convsamp_sse2) |
+-definev(jsimd_convsamp_float_3dnow) |
+-definev(jsimd_convsamp_float_sse) |
+-definev(jsimd_convsamp_float_sse2) |
+-definev(jsimd_fdct_islow_mmx) |
+-definev(jsimd_fdct_ifast_mmx) |
+-definev(jconst_fdct_islow_sse2) |
+-definev(jsimd_fdct_islow_sse2) |
+-definev(jconst_fdct_ifast_sse2) |
+-definev(jsimd_fdct_ifast_sse2) |
+-definev(jsimd_fdct_float_3dnow) |
+-definev(jconst_fdct_float_sse) |
+-definev(jsimd_fdct_float_sse) |
+-definev(jsimd_quantize_mmx) |
+-definev(jsimd_quantize_sse2) |
+-definev(jsimd_quantize_float_3dnow) |
+-definev(jsimd_quantize_float_sse) |
+-definev(jsimd_quantize_float_sse2) |
+-definev(jsimd_idct_2x2_mmx) |
+-definev(jsimd_idct_4x4_mmx) |
+-definev(jconst_idct_red_sse2) |
+-definev(jsimd_idct_2x2_sse2) |
+-definev(jsimd_idct_4x4_sse2) |
+-definev(jsimd_idct_islow_mmx) |
+-definev(jsimd_idct_ifast_mmx) |
+-definev(jconst_idct_islow_sse2) |
+-definev(jsimd_idct_islow_sse2) |
+-definev(jconst_idct_ifast_sse2) |
+-definev(jsimd_idct_ifast_sse2) |
+-definev(jsimd_idct_float_3dnow) |
+-definev(jconst_idct_float_sse) |
+-definev(jsimd_idct_float_sse) |
+-definev(jconst_idct_float_sse2) |
+-definev(jsimd_idct_float_sse2) |
++%define _cpp_protection_jpeg_simd_cpu_support jpeg_simd_cpu_support |
++%define _cpp_protection_jsimd_rgb_ycc_convert_mmx jsimd_rgb_ycc_convert_mmx |
++%define _cpp_protection_jsimd_ycc_rgb_convert_mmx jsimd_ycc_rgb_convert_mmx |
++%define _cpp_protection_jconst_rgb_ycc_convert_sse2 jconst_rgb_ycc_convert_sse2 |
++%define _cpp_protection_jsimd_rgb_ycc_convert_sse2 jsimd_rgb_ycc_convert_sse2 |
++%define _cpp_protection_jconst_ycc_rgb_convert_sse2 jconst_ycc_rgb_convert_sse2 |
++%define _cpp_protection_jsimd_ycc_rgb_convert_sse2 jsimd_ycc_rgb_convert_sse2 |
++%define _cpp_protection_jsimd_h2v2_downsample_mmx jsimd_h2v2_downsample_mmx |
++%define _cpp_protection_jsimd_h2v1_downsample_mmx jsimd_h2v1_downsample_mmx |
++%define _cpp_protection_jsimd_h2v2_downsample_sse2 jsimd_h2v2_downsample_sse2 |
++%define _cpp_protection_jsimd_h2v1_downsample_sse2 jsimd_h2v1_downsample_sse2 |
++%define _cpp_protection_jsimd_h2v2_upsample_mmx jsimd_h2v2_upsample_mmx |
++%define _cpp_protection_jsimd_h2v1_upsample_mmx jsimd_h2v1_upsample_mmx |
++%define _cpp_protection_jsimd_h2v1_fancy_upsample_mmx jsimd_h2v1_fancy_upsample_mmx |
++%define _cpp_protection_jsimd_h2v2_fancy_upsample_mmx jsimd_h2v2_fancy_upsample_mmx |
++%define _cpp_protection_jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_merged_upsample_mmx |
++%define _cpp_protection_jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_merged_upsample_mmx |
++%define _cpp_protection_jsimd_h2v2_upsample_sse2 jsimd_h2v2_upsample_sse2 |
++%define _cpp_protection_jsimd_h2v1_upsample_sse2 jsimd_h2v1_upsample_sse2 |
++%define _cpp_protection_jconst_fancy_upsample_sse2 jconst_fancy_upsample_sse2 |
++%define _cpp_protection_jsimd_h2v1_fancy_upsample_sse2 jsimd_h2v1_fancy_upsample_sse2 |
++%define _cpp_protection_jsimd_h2v2_fancy_upsample_sse2 jsimd_h2v2_fancy_upsample_sse2 |
++%define _cpp_protection_jconst_merged_upsample_sse2 jconst_merged_upsample_sse2 |
++%define _cpp_protection_jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_merged_upsample_sse2 |
++%define _cpp_protection_jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_merged_upsample_sse2 |
++%define _cpp_protection_jsimd_convsamp_mmx jsimd_convsamp_mmx |
++%define _cpp_protection_jsimd_convsamp_sse2 jsimd_convsamp_sse2 |
++%define _cpp_protection_jsimd_convsamp_float_3dnow jsimd_convsamp_float_3dnow |
++%define _cpp_protection_jsimd_convsamp_float_sse jsimd_convsamp_float_sse |
++%define _cpp_protection_jsimd_convsamp_float_sse2 jsimd_convsamp_float_sse2 |
++%define _cpp_protection_jsimd_fdct_islow_mmx jsimd_fdct_islow_mmx |
++%define _cpp_protection_jsimd_fdct_ifast_mmx jsimd_fdct_ifast_mmx |
++%define _cpp_protection_jconst_fdct_islow_sse2 jconst_fdct_islow_sse2 |
++%define _cpp_protection_jsimd_fdct_islow_sse2 jsimd_fdct_islow_sse2 |
++%define _cpp_protection_jconst_fdct_ifast_sse2 jconst_fdct_ifast_sse2 |
++%define _cpp_protection_jsimd_fdct_ifast_sse2 jsimd_fdct_ifast_sse2 |
++%define _cpp_protection_jsimd_fdct_float_3dnow jsimd_fdct_float_3dnow |
++%define _cpp_protection_jconst_fdct_float_sse jconst_fdct_float_sse |
++%define _cpp_protection_jsimd_fdct_float_sse jsimd_fdct_float_sse |
++%define _cpp_protection_jsimd_quantize_mmx jsimd_quantize_mmx |
++%define _cpp_protection_jsimd_quantize_sse2 jsimd_quantize_sse2 |
++%define _cpp_protection_jsimd_quantize_float_3dnow jsimd_quantize_float_3dnow |
++%define _cpp_protection_jsimd_quantize_float_sse jsimd_quantize_float_sse |
++%define _cpp_protection_jsimd_quantize_float_sse2 jsimd_quantize_float_sse2 |
++%define _cpp_protection_jsimd_idct_2x2_mmx jsimd_idct_2x2_mmx |
++%define _cpp_protection_jsimd_idct_4x4_mmx jsimd_idct_4x4_mmx |
++%define _cpp_protection_jconst_idct_red_sse2 jconst_idct_red_sse2 |
++%define _cpp_protection_jsimd_idct_2x2_sse2 jsimd_idct_2x2_sse2 |
++%define _cpp_protection_jsimd_idct_4x4_sse2 jsimd_idct_4x4_sse2 |
++%define _cpp_protection_jsimd_idct_islow_mmx jsimd_idct_islow_mmx |
++%define _cpp_protection_jsimd_idct_ifast_mmx jsimd_idct_ifast_mmx |
++%define _cpp_protection_jconst_idct_islow_sse2 jconst_idct_islow_sse2 |
++%define _cpp_protection_jsimd_idct_islow_sse2 jsimd_idct_islow_sse2 |
++%define _cpp_protection_jconst_idct_ifast_sse2 jconst_idct_ifast_sse2 |
++%define _cpp_protection_jsimd_idct_ifast_sse2 jsimd_idct_ifast_sse2 |
++%define _cpp_protection_jsimd_idct_float_3dnow jsimd_idct_float_3dnow |
++%define _cpp_protection_jconst_idct_float_sse jconst_idct_float_sse |
++%define _cpp_protection_jsimd_idct_float_sse jsimd_idct_float_sse |
++%define _cpp_protection_jconst_idct_float_sse2 jconst_idct_float_sse2 |
++%define _cpp_protection_jsimd_idct_float_sse2 jsimd_idct_float_sse2 |
+ #endif /* NEED_SHORT_EXTERNAL_NAMES */ |
+ |
+Index: simd/jsimdcpu.asm |
+=================================================================== |
+--- simd/jsimdcpu.asm (revision 829) |
++++ simd/jsimdcpu.asm (working copy) |
+@@ -29,7 +29,7 @@ |
+ ; |
+ |
+ align 16 |
+- global EXTN(jpeg_simd_cpu_support) |
++ global EXTN(jpeg_simd_cpu_support) PRIVATE |
+ |
+ EXTN(jpeg_simd_cpu_support): |
+ push ebx |
+@@ -100,3 +100,6 @@ |
+ pop ebx |
+ ret |
+ |
++; For some reason, the OS X linker does not honor the request to align the |
++; segment unless we do this. |
++ align 16 |
+Index: simd/jsimdext.inc |
+=================================================================== |
+--- simd/jsimdext.inc (revision 829) |
++++ simd/jsimdext.inc (working copy) |
+@@ -2,6 +2,7 @@ |
+ ; jsimdext.inc - common declarations |
+ ; |
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
++; Copyright 2010 D. R. Commander |
+ ; |
+ ; Based on |
+ ; x86 SIMD extension for IJG JPEG library - version 1.02 |
+@@ -37,9 +38,28 @@ |
+ |
+ ; -- segment definition -- |
+ ; |
++%ifdef __YASM_VER__ |
++%define SEG_TEXT .text align=16 |
++%define SEG_CONST .rdata align=16 |
++%else |
+ %define SEG_TEXT .text align=16 public use32 class=CODE |
+ %define SEG_CONST .rdata align=16 public use32 class=CONST |
++%endif |
+ |
++%elifdef WIN64 ; ----(nasm -fwin64 -DWIN64 ...)-------- |
++; * Microsoft Visual C++ |
+ |
-+GLOBAL(void) |
-+jsimd_fdct_ifast (DCTELEM * data) |
-+{ |
-+} |
++; -- segment definition -- |
++; |
++%ifdef __YASM_VER__ |
++%define SEG_TEXT .text align=16 |
++%define SEG_CONST .rdata align=16 |
++%else |
++%define SEG_TEXT .text align=16 public use64 class=CODE |
++%define SEG_CONST .rdata align=16 public use64 class=CONST |
++%endif |
++%define EXTN(name) name ; foo() -> foo |
+ |
-+GLOBAL(void) |
-+jsimd_fdct_float (FAST_FLOAT * data) |
-+{ |
-+} |
+ %elifdef OBJ32 ; ----(nasm -fobj -DOBJ32 ...)---------- |
+ ; * Borland C++ (Win32) |
+ |
+@@ -53,6 +73,12 @@ |
+ ; * *BSD family Unix using elf format |
+ ; * Unix System V, including Solaris x86, UnixWare and SCO Unix |
+ |
++; PIC is the default on Linux |
++%define PIC |
+ |
-+GLOBAL(int) |
-+jsimd_can_quantize (void) |
-+{ |
-+ init_simd(); |
++; mark stack as non-executable |
++section .note.GNU-stack noalloc noexec nowrite progbits |
+ |
-+ return 0; |
-+} |
+ ; -- segment definition -- |
+ ; |
+ %ifdef __x86_64__ |
+@@ -280,7 +306,44 @@ |
+ %endmacro |
+ |
+ %ifdef __x86_64__ |
+ |
-+GLOBAL(int) |
-+jsimd_can_quantize_float (void) |
-+{ |
-+ init_simd(); |
++%ifdef WIN64 |
+ |
-+ return 0; |
-+} |
+ %imacro collect_args 0 |
++ push r12 |
++ push r13 |
++ push r14 |
++ push r15 |
++ mov r10, rcx |
++ mov r11, rdx |
++ mov r12, r8 |
++ mov r13, r9 |
++ mov r14, [rax+48] |
++ mov r15, [rax+56] |
++ push rsi |
++ push rdi |
++ sub rsp, SIZEOF_XMMWORD |
++ movaps XMMWORD [rsp], xmm6 |
++ sub rsp, SIZEOF_XMMWORD |
++ movaps XMMWORD [rsp], xmm7 |
++%endmacro |
+ |
-+GLOBAL(void) |
-+jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors, |
-+ DCTELEM * workspace) |
-+{ |
-+} |
++%imacro uncollect_args 0 |
++ movaps xmm7, XMMWORD [rsp] |
++ add rsp, SIZEOF_XMMWORD |
++ movaps xmm6, XMMWORD [rsp] |
++ add rsp, SIZEOF_XMMWORD |
++ pop rdi |
++ pop rsi |
++ pop r15 |
++ pop r14 |
++ pop r13 |
++ pop r12 |
++%endmacro |
+ |
-+GLOBAL(void) |
-+jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, |
-+ FAST_FLOAT * workspace) |
-+{ |
-+} |
++%else |
+ |
-+GLOBAL(int) |
-+jsimd_can_idct_2x2 (void) |
-+{ |
-+ init_simd(); |
++%imacro collect_args 0 |
+ push r10 |
+ push r11 |
+ push r12 |
+@@ -306,9 +369,21 @@ |
+ |
+ %endif |
+ |
++%endif |
+ |
-+ /* The code is optimised for these values only */ |
-+ if (DCTSIZE != 8) |
-+ return 0; |
-+ if (sizeof(JCOEF) != 2) |
-+ return 0; |
-+ if (BITS_IN_JSAMPLE != 8) |
-+ return 0; |
-+ if (sizeof(JDIMENSION) != 4) |
-+ return 0; |
-+ if (sizeof(ISLOW_MULT_TYPE) != 2) |
-+ return 0; |
+ ; -------------------------------------------------------------------------- |
+ ; Defines picked up from the C headers |
+ ; |
+ %include "jsimdcfg.inc" |
+ |
++; Begin chromium edits |
++%ifdef MACHO ; ----(nasm -fmacho -DMACHO ...)-------- |
++%define PRIVATE :private_extern |
++%elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------ |
++%define PRIVATE :hidden |
++%else |
++%define PRIVATE |
++%endif |
++; End chromium edits |
+ |
-+ if (simd_support & JSIMD_ARM_NEON) |
-+ return 1; |
+ ; -------------------------------------------------------------------------- |
+Index: turbojpeg.h |
+=================================================================== |
+--- turbojpeg.h (revision 829) |
++++ turbojpeg.h (working copy) |
+@@ -1,231 +1,932 @@ |
+-/* Copyright (C)2004 Landmark Graphics Corporation |
+- * Copyright (C)2005, 2006 Sun Microsystems, Inc. |
+- * Copyright (C)2009 D. R. Commander |
++/* |
++ * Copyright (C)2009-2013 D. R. Commander. All Rights Reserved. |
+ * |
+- * This library is free software and may be redistributed and/or modified under |
+- * the terms of the wxWindows Library License, Version 3.1 or (at your option) |
+- * any later version. The full license is in the LICENSE.txt file included |
+- * with this distribution. |
++ * Redistribution and use in source and binary forms, with or without |
++ * modification, are permitted provided that the following conditions are met: |
+ * |
+- * This library is distributed in the hope that it will be useful, |
+- * but WITHOUT ANY WARRANTY; without even the implied warranty of |
+- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
+- * wxWindows Library License for more details. |
++ * - Redistributions of source code must retain the above copyright notice, |
++ * this list of conditions and the following disclaimer. |
++ * - Redistributions in binary form must reproduce the above copyright notice, |
++ * this list of conditions and the following disclaimer in the documentation |
++ * and/or other materials provided with the distribution. |
++ * - Neither the name of the libjpeg-turbo Project nor the names of its |
++ * contributors may be used to endorse or promote products derived from this |
++ * software without specific prior written permission. |
++ * |
++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", |
++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE |
++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
++ * POSSIBILITY OF SUCH DAMAGE. |
+ */ |
+ |
+-#if (defined(_MSC_VER) || defined(__CYGWIN__) || defined(__MINGW32__)) && defined(_WIN32) && defined(DLLDEFINE) |
++#ifndef __TURBOJPEG_H__ |
++#define __TURBOJPEG_H__ |
+ |
-+ return 0; |
-+} |
++#if defined(_WIN32) && defined(DLLDEFINE) |
+ #define DLLEXPORT __declspec(dllexport) |
+ #else |
+ #define DLLEXPORT |
+ #endif |
+- |
+ #define DLLCALL |
+ |
+-/* Subsampling */ |
+-#define NUMSUBOPT 4 |
+ |
+-enum {TJ_444=0, TJ_422, TJ_420, TJ_GRAYSCALE}; |
++/** |
++ * @addtogroup TurboJPEG |
++ * TurboJPEG API. This API provides an interface for generating, decoding, and |
++ * transforming planar YUV and JPEG images in memory. |
++ * |
++ * @{ |
++ */ |
+ |
+-/* Flags */ |
+-#define TJ_BGR 1 |
+-#define TJ_BOTTOMUP 2 |
+-#define TJ_FORCEMMX 8 /* Force IPP to use MMX code even if SSE available */ |
+-#define TJ_FORCESSE 16 /* Force IPP to use SSE1 code even if SSE2 available */ |
+-#define TJ_FORCESSE2 32 /* Force IPP to use SSE2 code (useful if auto-detect is not working properly) */ |
+-#define TJ_ALPHAFIRST 64 /* BGR buffer is ABGR and RGB buffer is ARGB */ |
+-#define TJ_FORCESSE3 128 /* Force IPP to use SSE3 code (useful if auto-detect is not working properly) */ |
+-#define TJ_FASTUPSAMPLE 256 /* Use fast, inaccurate 4:2:2 and 4:2:0 YUV upsampling routines in libjpeg decompressor */ |
+ |
++/** |
++ * The number of chrominance subsampling options |
++ */ |
++#define TJ_NUMSAMP 5 |
+ |
-+GLOBAL(int) |
-+jsimd_can_idct_4x4 (void) |
++/** |
++ * Chrominance subsampling options. |
++ * When an image is converted from the RGB to the YCbCr colorspace as part of |
++ * the JPEG compression process, some of the Cb and Cr (chrominance) components |
++ * can be discarded or averaged together to produce a smaller image with little |
++ * perceptible loss of image clarity (the human eye is more sensitive to small |
++ * changes in brightness than small changes in color.) This is called |
++ * "chrominance subsampling". |
++ * <p> |
++ * NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the |
++ * convention of the digital video community, the TurboJPEG API uses "YUV" to |
++ * refer to an image format consisting of Y, Cb, and Cr image planes. |
++ */ |
++enum TJSAMP |
+{ |
-+ init_simd(); |
++ /** |
++ * 4:4:4 chrominance subsampling (no chrominance subsampling). The JPEG or |
++ * YUV image will contain one chrominance component for every pixel in the |
++ * source image. |
++ */ |
++ TJSAMP_444=0, |
++ /** |
++ * 4:2:2 chrominance subsampling. The JPEG or YUV image will contain one |
++ * chrominance component for every 2x1 block of pixels in the source image. |
++ */ |
++ TJSAMP_422, |
++ /** |
++ * 4:2:0 chrominance subsampling. The JPEG or YUV image will contain one |
++ * chrominance component for every 2x2 block of pixels in the source image. |
++ */ |
++ TJSAMP_420, |
++ /** |
++ * Grayscale. The JPEG or YUV image will contain no chrominance components. |
++ */ |
++ TJSAMP_GRAY, |
++ /** |
++ * 4:4:0 chrominance subsampling. The JPEG or YUV image will contain one |
++ * chrominance component for every 1x2 block of pixels in the source image. |
++ * Note that 4:4:0 subsampling is not fully accelerated in libjpeg-turbo. |
++ */ |
++ TJSAMP_440 |
++}; |
+ |
-+ /* The code is optimised for these values only */ |
-+ if (DCTSIZE != 8) |
-+ return 0; |
-+ if (sizeof(JCOEF) != 2) |
-+ return 0; |
-+ if (BITS_IN_JSAMPLE != 8) |
-+ return 0; |
-+ if (sizeof(JDIMENSION) != 4) |
-+ return 0; |
-+ if (sizeof(ISLOW_MULT_TYPE) != 2) |
-+ return 0; |
++/** |
++ * MCU block width (in pixels) for a given level of chrominance subsampling. |
++ * MCU block sizes: |
++ * - 8x8 for no subsampling or grayscale |
++ * - 16x8 for 4:2:2 |
++ * - 8x16 for 4:4:0 |
++ * - 16x16 for 4:2:0 |
++ */ |
++static const int tjMCUWidth[TJ_NUMSAMP] = {8, 16, 16, 8, 8}; |
+ |
-+ if (simd_support & JSIMD_ARM_NEON) |
-+ return 1; |
++/** |
++ * MCU block height (in pixels) for a given level of chrominance subsampling. |
++ * MCU block sizes: |
++ * - 8x8 for no subsampling or grayscale |
++ * - 16x8 for 4:2:2 |
++ * - 8x16 for 4:4:0 |
++ * - 16x16 for 4:2:0 |
++ */ |
++static const int tjMCUHeight[TJ_NUMSAMP] = {8, 8, 16, 8, 16}; |
+ |
-+ return 0; |
-+} |
+ |
-+GLOBAL(void) |
-+jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
-+ JCOEFPTR coef_block, JSAMPARRAY output_buf, |
-+ JDIMENSION output_col) |
-+{ |
-+ if (simd_support & JSIMD_ARM_NEON) |
-+ jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, |
-+ output_col); |
-+} |
++/** |
++ * The number of pixel formats |
++ */ |
++#define TJ_NUMPF 11 |
+ |
-+GLOBAL(void) |
-+jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
-+ JCOEFPTR coef_block, JSAMPARRAY output_buf, |
-+ JDIMENSION output_col) |
++/** |
++ * Pixel formats |
++ */ |
++enum TJPF |
+{ |
-+ if (simd_support & JSIMD_ARM_NEON) |
-+ jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, |
-+ output_col); |
-+} |
++ /** |
++ * RGB pixel format. The red, green, and blue components in the image are |
++ * stored in 3-byte pixels in the order R, G, B from lowest to highest byte |
++ * address within each pixel. |
++ */ |
++ TJPF_RGB=0, |
++ /** |
++ * BGR pixel format. The red, green, and blue components in the image are |
++ * stored in 3-byte pixels in the order B, G, R from lowest to highest byte |
++ * address within each pixel. |
++ */ |
++ TJPF_BGR, |
++ /** |
++ * RGBX pixel format. The red, green, and blue components in the image are |
++ * stored in 4-byte pixels in the order R, G, B from lowest to highest byte |
++ * address within each pixel. The X component is ignored when compressing |
++ * and undefined when decompressing. |
++ */ |
++ TJPF_RGBX, |
++ /** |
++ * BGRX pixel format. The red, green, and blue components in the image are |
++ * stored in 4-byte pixels in the order B, G, R from lowest to highest byte |
++ * address within each pixel. The X component is ignored when compressing |
++ * and undefined when decompressing. |
++ */ |
++ TJPF_BGRX, |
++ /** |
++ * XBGR pixel format. The red, green, and blue components in the image are |
++ * stored in 4-byte pixels in the order R, G, B from highest to lowest byte |
++ * address within each pixel. The X component is ignored when compressing |
++ * and undefined when decompressing. |
++ */ |
++ TJPF_XBGR, |
++ /** |
++ * XRGB pixel format. The red, green, and blue components in the image are |
++ * stored in 4-byte pixels in the order B, G, R from highest to lowest byte |
++ * address within each pixel. The X component is ignored when compressing |
++ * and undefined when decompressing. |
++ */ |
++ TJPF_XRGB, |
++ /** |
++ * Grayscale pixel format. Each 1-byte pixel represents a luminance |
++ * (brightness) level from 0 to 255. |
++ */ |
++ TJPF_GRAY, |
++ /** |
++ * RGBA pixel format. This is the same as @ref TJPF_RGBX, except that when |
++ * decompressing, the X component is guaranteed to be 0xFF, which can be |
++ * interpreted as an opaque alpha channel. |
++ */ |
++ TJPF_RGBA, |
++ /** |
++ * BGRA pixel format. This is the same as @ref TJPF_BGRX, except that when |
++ * decompressing, the X component is guaranteed to be 0xFF, which can be |
++ * interpreted as an opaque alpha channel. |
++ */ |
++ TJPF_BGRA, |
++ /** |
++ * ABGR pixel format. This is the same as @ref TJPF_XBGR, except that when |
++ * decompressing, the X component is guaranteed to be 0xFF, which can be |
++ * interpreted as an opaque alpha channel. |
++ */ |
++ TJPF_ABGR, |
++ /** |
++ * ARGB pixel format. This is the same as @ref TJPF_XRGB, except that when |
++ * decompressing, the X component is guaranteed to be 0xFF, which can be |
++ * interpreted as an opaque alpha channel. |
++ */ |
++ TJPF_ARGB |
++}; |
+ |
-+GLOBAL(int) |
-+jsimd_can_idct_islow (void) |
-+{ |
-+ init_simd(); |
++/** |
++ * Red offset (in bytes) for a given pixel format. This specifies the number |
++ * of bytes that the red component is offset from the start of the pixel. For |
++ * instance, if a pixel of format TJ_BGRX is stored in <tt>char pixel[]</tt>, |
++ * then the red component will be <tt>pixel[tjRedOffset[TJ_BGRX]]</tt>. |
++ */ |
++static const int tjRedOffset[TJ_NUMPF] = {0, 2, 0, 2, 3, 1, 0, 0, 2, 3, 1}; |
++/** |
++ * Green offset (in bytes) for a given pixel format. This specifies the number |
++ * of bytes that the green component is offset from the start of the pixel. |
++ * For instance, if a pixel of format TJ_BGRX is stored in |
++ * <tt>char pixel[]</tt>, then the green component will be |
++ * <tt>pixel[tjGreenOffset[TJ_BGRX]]</tt>. |
++ */ |
++static const int tjGreenOffset[TJ_NUMPF] = {1, 1, 1, 1, 2, 2, 0, 1, 1, 2, 2}; |
++/** |
++ * Blue offset (in bytes) for a given pixel format. This specifies the number |
++ * of bytes that the Blue component is offset from the start of the pixel. For |
++ * instance, if a pixel of format TJ_BGRX is stored in <tt>char pixel[]</tt>, |
++ * then the blue component will be <tt>pixel[tjBlueOffset[TJ_BGRX]]</tt>. |
++ */ |
++static const int tjBlueOffset[TJ_NUMPF] = {2, 0, 2, 0, 1, 3, 0, 2, 0, 1, 3}; |
+ |
-+ /* The code is optimised for these values only */ |
-+ if (DCTSIZE != 8) |
-+ return 0; |
-+ if (sizeof(JCOEF) != 2) |
-+ return 0; |
-+ if (BITS_IN_JSAMPLE != 8) |
-+ return 0; |
-+ if (sizeof(JDIMENSION) != 4) |
-+ return 0; |
-+ if (sizeof(ISLOW_MULT_TYPE) != 2) |
-+ return 0; |
++/** |
++ * Pixel size (in bytes) for a given pixel format. |
++ */ |
++static const int tjPixelSize[TJ_NUMPF] = {3, 3, 4, 4, 4, 4, 1, 4, 4, 4, 4}; |
+ |
-+ if (simd_support & JSIMD_ARM_NEON) |
-+ return 1; |
+ |
-+ return 0; |
-+} |
++/** |
++ * The uncompressed source/destination image is stored in bottom-up (Windows, |
++ * OpenGL) order, not top-down (X11) order. |
++ */ |
++#define TJFLAG_BOTTOMUP 2 |
++/** |
++ * Turn off CPU auto-detection and force TurboJPEG to use MMX code (if the |
++ * underlying codec supports it.) |
++ */ |
++#define TJFLAG_FORCEMMX 8 |
++/** |
++ * Turn off CPU auto-detection and force TurboJPEG to use SSE code (if the |
++ * underlying codec supports it.) |
++ */ |
++#define TJFLAG_FORCESSE 16 |
++/** |
++ * Turn off CPU auto-detection and force TurboJPEG to use SSE2 code (if the |
++ * underlying codec supports it.) |
++ */ |
++#define TJFLAG_FORCESSE2 32 |
++/** |
++ * Turn off CPU auto-detection and force TurboJPEG to use SSE3 code (if the |
++ * underlying codec supports it.) |
++ */ |
++#define TJFLAG_FORCESSE3 128 |
++/** |
++ * When decompressing an image that was compressed using chrominance |
++ * subsampling, use the fastest chrominance upsampling algorithm available in |
++ * the underlying codec. The default is to use smooth upsampling, which |
++ * creates a smooth transition between neighboring chrominance components in |
++ * order to reduce upsampling artifacts in the decompressed image. |
++ */ |
++#define TJFLAG_FASTUPSAMPLE 256 |
++/** |
++ * Disable buffer (re)allocation. If passed to #tjCompress2() or |
++ * #tjTransform(), this flag will cause those functions to generate an error if |
++ * the JPEG image buffer is invalid or too small rather than attempting to |
++ * allocate or reallocate that buffer. This reproduces the behavior of earlier |
++ * versions of TurboJPEG. |
++ */ |
++#define TJFLAG_NOREALLOC 1024 |
++/** |
++ * Use the fastest DCT/IDCT algorithm available in the underlying codec. The |
++ * default if this flag is not specified is implementation-specific. For |
++ * example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast |
++ * algorithm by default when compressing, because this has been shown to have |
++ * only a very slight effect on accuracy, but it uses the accurate algorithm |
++ * when decompressing, because this has been shown to have a larger effect. |
++ */ |
++#define TJFLAG_FASTDCT 2048 |
++/** |
++ * Use the most accurate DCT/IDCT algorithm available in the underlying codec. |
++ * The default if this flag is not specified is implementation-specific. For |
++ * example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast |
++ * algorithm by default when compressing, because this has been shown to have |
++ * only a very slight effect on accuracy, but it uses the accurate algorithm |
++ * when decompressing, because this has been shown to have a larger effect. |
++ */ |
++#define TJFLAG_ACCURATEDCT 4096 |
+ |
-+GLOBAL(int) |
-+jsimd_can_idct_ifast (void) |
-+{ |
-+ init_simd(); |
+ |
-+ /* The code is optimised for these values only */ |
-+ if (DCTSIZE != 8) |
-+ return 0; |
-+ if (sizeof(JCOEF) != 2) |
-+ return 0; |
-+ if (BITS_IN_JSAMPLE != 8) |
-+ return 0; |
-+ if (sizeof(JDIMENSION) != 4) |
-+ return 0; |
-+ if (sizeof(IFAST_MULT_TYPE) != 2) |
-+ return 0; |
-+ if (IFAST_SCALE_BITS != 2) |
-+ return 0; |
++/** |
++ * The number of transform operations |
++ */ |
++#define TJ_NUMXOP 8 |
+ |
-+ if (simd_support & JSIMD_ARM_NEON) |
-+ return 1; |
++/** |
++ * Transform operations for #tjTransform() |
++ */ |
++enum TJXOP |
++{ |
++ /** |
++ * Do not transform the position of the image pixels |
++ */ |
++ TJXOP_NONE=0, |
++ /** |
++ * Flip (mirror) image horizontally. This transform is imperfect if there |
++ * are any partial MCU blocks on the right edge (see #TJXOPT_PERFECT.) |
++ */ |
++ TJXOP_HFLIP, |
++ /** |
++ * Flip (mirror) image vertically. This transform is imperfect if there are |
++ * any partial MCU blocks on the bottom edge (see #TJXOPT_PERFECT.) |
++ */ |
++ TJXOP_VFLIP, |
++ /** |
++ * Transpose image (flip/mirror along upper left to lower right axis.) This |
++ * transform is always perfect. |
++ */ |
++ TJXOP_TRANSPOSE, |
++ /** |
++ * Transverse transpose image (flip/mirror along upper right to lower left |
++ * axis.) This transform is imperfect if there are any partial MCU blocks in |
++ * the image (see #TJXOPT_PERFECT.) |
++ */ |
++ TJXOP_TRANSVERSE, |
++ /** |
++ * Rotate image clockwise by 90 degrees. This transform is imperfect if |
++ * there are any partial MCU blocks on the bottom edge (see |
++ * #TJXOPT_PERFECT.) |
++ */ |
++ TJXOP_ROT90, |
++ /** |
++ * Rotate image 180 degrees. This transform is imperfect if there are any |
++ * partial MCU blocks in the image (see #TJXOPT_PERFECT.) |
++ */ |
++ TJXOP_ROT180, |
++ /** |
++ * Rotate image counter-clockwise by 90 degrees. This transform is imperfect |
++ * if there are any partial MCU blocks on the right edge (see |
++ * #TJXOPT_PERFECT.) |
++ */ |
++ TJXOP_ROT270 |
++}; |
+ |
-+ return 0; |
-+} |
+ |
-+GLOBAL(int) |
-+jsimd_can_idct_float (void) |
-+{ |
-+ init_simd(); |
++/** |
++ * This option will cause #tjTransform() to return an error if the transform is |
++ * not perfect. Lossless transforms operate on MCU blocks, whose size depends |
++ * on the level of chrominance subsampling used (see #tjMCUWidth |
++ * and #tjMCUHeight.) If the image's width or height is not evenly divisible |
++ * by the MCU block size, then there will be partial MCU blocks on the right |
++ * and/or bottom edges. It is not possible to move these partial MCU blocks to |
++ * the top or left of the image, so any transform that would require that is |
++ * "imperfect." If this option is not specified, then any partial MCU blocks |
++ * that cannot be transformed will be left in place, which will create |
++ * odd-looking strips on the right or bottom edge of the image. |
++ */ |
++#define TJXOPT_PERFECT 1 |
++/** |
++ * This option will cause #tjTransform() to discard any partial MCU blocks that |
++ * cannot be transformed. |
++ */ |
++#define TJXOPT_TRIM 2 |
++/** |
++ * This option will enable lossless cropping. See #tjTransform() for more |
++ * information. |
++ */ |
++#define TJXOPT_CROP 4 |
++/** |
++ * This option will discard the color data in the input image and produce |
++ * a grayscale output image. |
++ */ |
++#define TJXOPT_GRAY 8 |
++/** |
++ * This option will prevent #tjTransform() from outputting a JPEG image for |
++ * this particular transform (this can be used in conjunction with a custom |
++ * filter to capture the transformed DCT coefficients without transcoding |
++ * them.) |
++ */ |
++#define TJXOPT_NOOUTPUT 16 |
+ |
-+ return 0; |
-+} |
+ |
-+GLOBAL(void) |
-+jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
-+ JCOEFPTR coef_block, JSAMPARRAY output_buf, |
-+ JDIMENSION output_col) |
++/** |
++ * Scaling factor |
++ */ |
++typedef struct |
+{ |
-+ if (simd_support & JSIMD_ARM_NEON) |
-+ jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, |
-+ output_col); |
-+} |
++ /** |
++ * Numerator |
++ */ |
++ int num; |
++ /** |
++ * Denominator |
++ */ |
++ int denom; |
++} tjscalingfactor; |
+ |
-+GLOBAL(void) |
-+jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
-+ JCOEFPTR coef_block, JSAMPARRAY output_buf, |
-+ JDIMENSION output_col) |
++/** |
++ * Cropping region |
++ */ |
++typedef struct |
+{ |
-+ if (simd_support & JSIMD_ARM_NEON) |
-+ jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, |
-+ output_col); |
-+} |
++ /** |
++ * The left boundary of the cropping region. This must be evenly divisible |
++ * by the MCU block width (see #tjMCUWidth.) |
++ */ |
++ int x; |
++ /** |
++ * The upper boundary of the cropping region. This must be evenly divisible |
++ * by the MCU block height (see #tjMCUHeight.) |
++ */ |
++ int y; |
++ /** |
++ * The width of the cropping region. Setting this to 0 is the equivalent of |
++ * setting it to the width of the source JPEG image - x. |
++ */ |
++ int w; |
++ /** |
++ * The height of the cropping region. Setting this to 0 is the equivalent of |
++ * setting it to the height of the source JPEG image - y. |
++ */ |
++ int h; |
++} tjregion; |
+ |
-+GLOBAL(void) |
-+jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, |
-+ JCOEFPTR coef_block, JSAMPARRAY output_buf, |
-+ JDIMENSION output_col) |
++/** |
++ * Lossless transform |
++ */ |
++typedef struct tjtransform |
+{ |
-+} |
-Index: simd/jsimd_arm64_neon.S |
-new file mode 100644 |
-=================================================================== |
---- /dev/null |
-+++ simd/jsimd_arm64_neon.S |
-@@ -0,0 +1,1861 @@ |
-+/* |
-+ * ARMv8 NEON optimizations for libjpeg-turbo |
++ /** |
++ * Cropping region |
++ */ |
++ tjregion r; |
++ /** |
++ * One of the @ref TJXOP "transform operations" |
++ */ |
++ int op; |
++ /** |
++ * The bitwise OR of one of more of the @ref TJXOPT_CROP "transform options" |
++ */ |
++ int options; |
++ /** |
++ * Arbitrary data that can be accessed within the body of the callback |
++ * function |
++ */ |
++ void *data; |
++ /** |
++ * A callback function that can be used to modify the DCT coefficients |
++ * after they are losslessly transformed but before they are transcoded to a |
++ * new JPEG image. This allows for custom filters or other transformations |
++ * to be applied in the frequency domain. |
++ * |
++ * @param coeffs pointer to an array of transformed DCT coefficients. (NOTE: |
++ * this pointer is not guaranteed to be valid once the callback |
++ * returns, so applications wishing to hand off the DCT coefficients |
++ * to another function or library should make a copy of them within |
++ * the body of the callback.) |
++ * @param arrayRegion #tjregion structure containing the width and height of |
++ * the array pointed to by <tt>coeffs</tt> as well as its offset |
++ * relative to the component plane. TurboJPEG implementations may |
++ * choose to split each component plane into multiple DCT coefficient |
++ * arrays and call the callback function once for each array. |
++ * @param planeRegion #tjregion structure containing the width and height of |
++ * the component plane to which <tt>coeffs</tt> belongs |
++ * @param componentID ID number of the component plane to which |
++ * <tt>coeffs</tt> belongs (Y, Cb, and Cr have, respectively, ID's of |
++ * 0, 1, and 2 in typical JPEG images.) |
++ * @param transformID ID number of the transformed image to which |
++ * <tt>coeffs</tt> belongs. This is the same as the index of the |
++ * transform in the <tt>transforms</tt> array that was passed to |
++ * #tjTransform(). |
++ * @param transform a pointer to a #tjtransform structure that specifies the |
++ * parameters and/or cropping region for this transform |
++ * |
++ * @return 0 if the callback was successful, or -1 if an error occurred. |
++ */ |
++ int (*customFilter)(short *coeffs, tjregion arrayRegion, |
++ tjregion planeRegion, int componentIndex, int transformIndex, |
++ struct tjtransform *transform); |
++} tjtransform; |
++ |
++/** |
++ * TurboJPEG instance handle |
++ */ |
+ typedef void* tjhandle; |
+ |
+-#define TJPAD(p) (((p)+3)&(~3)) |
+-#ifndef max |
+- #define max(a,b) ((a)>(b)?(a):(b)) |
+-#endif |
+ |
++/** |
++ * Pad the given width to the nearest 32-bit boundary |
++ */ |
++#define TJPAD(width) (((width)+3)&(~3)) |
++ |
++/** |
++ * Compute the scaled value of <tt>dimension</tt> using the given scaling |
++ * factor. This macro performs the integer equivalent of <tt>ceil(dimension * |
++ * scalingFactor)</tt>. |
++ */ |
++#define TJSCALED(dimension, scalingFactor) ((dimension * scalingFactor.num \ |
++ + scalingFactor.denom - 1) / scalingFactor.denom) |
++ |
++ |
+ #ifdef __cplusplus |
+ extern "C" { |
+ #endif |
+ |
+-/* API follows */ |
+ |
++/** |
++ * Create a TurboJPEG compressor instance. |
++ * |
++ * @return a handle to the newly-created instance, or NULL if an error |
++ * occurred (see #tjGetErrorStr().) |
++ */ |
++DLLEXPORT tjhandle DLLCALL tjInitCompress(void); |
+ |
+-/* |
+- tjhandle tjInitCompress(void) |
+ |
+- Creates a new JPEG compressor instance, allocates memory for the structures, |
+- and returns a handle to the instance. Most applications will only |
+- need to call this once at the beginning of the program or once for each |
+- concurrent thread. Don't try to create a new instance every time you |
+- compress an image, because this will cause performance to suffer. |
+- |
+- RETURNS: NULL on error |
++/** |
++ * Compress an RGB or grayscale image into a JPEG image. |
+ * |
-+ * Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies). |
-+ * All rights reserved. |
-+ * Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> |
-+ * Copyright (C) 2013-2014, Linaro Limited |
-+ * Author: Ragesh Radhakrishnan <ragesh.r@linaro.org> |
++ * @param handle a handle to a TurboJPEG compressor or transformer instance |
++ * @param srcBuf pointer to an image buffer containing RGB or grayscale pixels |
++ * to be compressed |
++ * @param width width (in pixels) of the source image |
++ * @param pitch bytes per line of the source image. Normally, this should be |
++ * <tt>width * #tjPixelSize[pixelFormat]</tt> if the image is unpadded, |
++ * or <tt>#TJPAD(width * #tjPixelSize[pixelFormat])</tt> if each line of |
++ * the image is padded to the nearest 32-bit boundary, as is the case |
++ * for Windows bitmaps. You can also be clever and use this parameter |
++ * to skip lines, etc. Setting this parameter to 0 is the equivalent of |
++ * setting it to <tt>width * #tjPixelSize[pixelFormat]</tt>. |
++ * @param height height (in pixels) of the source image |
++ * @param pixelFormat pixel format of the source image (see @ref TJPF |
++ * "Pixel formats".) |
++ * @param jpegBuf address of a pointer to an image buffer that will receive the |
++ * JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer |
++ * to accommodate the size of the JPEG image. Thus, you can choose to: |
++ * -# pre-allocate the JPEG buffer with an arbitrary size using |
++ * #tjAlloc() and let TurboJPEG grow the buffer as needed, |
++ * -# set <tt>*jpegBuf</tt> to NULL to tell TurboJPEG to allocate the |
++ * buffer for you, or |
++ * -# pre-allocate the buffer to a "worst case" size determined by |
++ * calling #tjBufSize(). This should ensure that the buffer never has |
++ * to be re-allocated (setting #TJFLAG_NOREALLOC guarantees this.) |
++ * . |
++ * If you choose option 1, <tt>*jpegSize</tt> should be set to the |
++ * size of your pre-allocated buffer. In any case, unless you have |
++ * set #TJFLAG_NOREALLOC, you should always check <tt>*jpegBuf</tt> upon |
++ * return from this function, as it may have changed. |
++ * @param jpegSize pointer to an unsigned long variable that holds the size of |
++ * the JPEG image buffer. If <tt>*jpegBuf</tt> points to a |
++ * pre-allocated buffer, then <tt>*jpegSize</tt> should be set to the |
++ * size of the buffer. Upon return, <tt>*jpegSize</tt> will contain the |
++ * size of the JPEG image (in bytes.) |
++ * @param jpegSubsamp the level of chrominance subsampling to be used when |
++ * generating the JPEG image (see @ref TJSAMP |
++ * "Chrominance subsampling options".) |
++ * @param jpegQual the image quality of the generated JPEG image (1 = worst, |
++ 100 = best) |
++ * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP |
++ * "flags". |
+ * |
-+ * This software is provided 'as-is', without any express or implied |
-+ * warranty. In no event will the authors be held liable for any damages |
-+ * arising from the use of this software. |
++ * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) |
+ */ |
+-DLLEXPORT tjhandle DLLCALL tjInitCompress(void); |
++DLLEXPORT int DLLCALL tjCompress2(tjhandle handle, unsigned char *srcBuf, |
++ int width, int pitch, int height, int pixelFormat, unsigned char **jpegBuf, |
++ unsigned long *jpegSize, int jpegSubsamp, int jpegQual, int flags); |
+ |
+ |
+-/* |
+- int tjCompress(tjhandle j, |
+- unsigned char *srcbuf, int width, int pitch, int height, int pixelsize, |
+- unsigned char *dstbuf, unsigned long *size, |
+- int jpegsubsamp, int jpegqual, int flags) |
++/** |
++ * The maximum size of the buffer (in bytes) required to hold a JPEG image with |
++ * the given parameters. The number of bytes returned by this function is |
++ * larger than the size of the uncompressed source image. The reason for this |
++ * is that the JPEG format uses 16-bit coefficients, and it is thus possible |
++ * for a very high-quality JPEG image with very high-frequency content to |
++ * expand rather than compress when converted to the JPEG format. Such images |
++ * represent a very rare corner case, but since there is no way to predict the |
++ * size of a JPEG image prior to compression, the corner case has to be |
++ * handled. |
+ * |
-+ * Permission is granted to anyone to use this software for any purpose, |
-+ * including commercial applications, and to alter it and redistribute it |
-+ * freely, subject to the following restrictions: |
++ * @param width width of the image (in pixels) |
++ * @param height height of the image (in pixels) |
++ * @param jpegSubsamp the level of chrominance subsampling to be used when |
++ * generating the JPEG image (see @ref TJSAMP |
++ * "Chrominance subsampling options".) |
+ * |
-+ * 1. The origin of this software must not be misrepresented; you must not |
-+ * claim that you wrote the original software. If you use this software |
-+ * in a product, an acknowledgment in the product documentation would be |
-+ * appreciated but is not required. |
-+ * 2. Altered source versions must be plainly marked as such, and must not be |
-+ * misrepresented as being the original software. |
-+ * 3. This notice may not be removed or altered from any source distribution. |
++ * @return the maximum size of the buffer (in bytes) required to hold the |
++ * image, or -1 if the arguments are out of bounds. |
+ */ |
-+ |
-+#if defined(__linux__) && defined(__ELF__) |
-+.section .note.GNU-stack,"",%progbits /* mark stack as non-executable */ |
-+#endif |
-+ |
-+.text |
-+.arch armv8-a+fp+simd |
-+ |
-+ |
-+#define RESPECT_STRICT_ALIGNMENT 1 |
-+ |
-+ |
-+/*****************************************************************************/ |
-+ |
-+/* Supplementary macro for setting function attributes */ |
-+.macro asm_function fname |
-+#ifdef __APPLE__ |
-+ .globl _\fname |
-+_\fname: |
-+#else |
-+ .global \fname |
-+#ifdef __ELF__ |
-+ .hidden \fname |
-+ .type \fname, %function |
-+#endif |
-+\fname: |
-+#endif |
-+.endm |
-+ |
-+/* Transpose elements of single 128 bit registers */ |
-+.macro transpose_single x0,x1,xi,xilen,literal |
-+ ins \xi\xilen[0], \x0\xilen[0] |
-+ ins \x1\xilen[0], \x0\xilen[1] |
-+ trn1 \x0\literal, \x0\literal, \x1\literal |
-+ trn2 \x1\literal, \xi\literal, \x1\literal |
-+.endm |
-+ |
-+/* Transpose elements of 2 differnet registers */ |
-+.macro transpose x0,x1,xi,xilen,literal |
-+ mov \xi\xilen, \x0\xilen |
-+ trn1 \x0\literal, \x0\literal, \x1\literal |
-+ trn2 \x1\literal, \xi\literal, \x1\literal |
-+.endm |
-+ |
-+/* Transpose a block of 4x4 coefficients in four 64-bit registers */ |
-+.macro transpose_4x4_32 x0,x0len x1,x1len x2,x2len x3,x3len,xi,xilen |
-+ mov \xi\xilen, \x0\xilen |
-+ trn1 \x0\x0len, \x0\x0len, \x2\x2len |
-+ trn2 \x2\x2len, \xi\x0len, \x2\x2len |
-+ mov \xi\xilen, \x1\xilen |
-+ trn1 \x1\x1len, \x1\x1len, \x3\x3len |
-+ trn2 \x3\x3len, \xi\x1len, \x3\x3len |
-+.endm |
-+ |
-+.macro transpose_4x4_16 x0,x0len x1,x1len, x2,x2len, x3,x3len,xi,xilen |
-+ mov \xi\xilen, \x0\xilen |
-+ trn1 \x0\x0len, \x0\x0len, \x1\x1len |
-+ trn2 \x1\x2len, \xi\x0len, \x1\x2len |
-+ mov \xi\xilen, \x2\xilen |
-+ trn1 \x2\x2len, \x2\x2len, \x3\x3len |
-+ trn2 \x3\x2len, \xi\x1len, \x3\x3len |
-+.endm |
-+ |
-+.macro transpose_4x4 x0, x1, x2, x3,x5 |
-+ transpose_4x4_16 \x0,.4h, \x1,.4h, \x2,.4h,\x3,.4h,\x5,.16b |
-+ transpose_4x4_32 \x0,.2s, \x1,.2s, \x2,.2s,\x3,.2s,\x5,.16b |
-+.endm |
-+ |
-+ |
-+#define CENTERJSAMPLE 128 |
-+ |
-+/*****************************************************************************/ |
-+ |
-+/* |
-+ * Perform dequantization and inverse DCT on one block of coefficients. |
++DLLEXPORT unsigned long DLLCALL tjBufSize(int width, int height, |
++ int jpegSubsamp); |
+ |
+- [INPUT] j = instance handle previously returned from a call to |
+- tjInitCompress() |
+- [INPUT] srcbuf = pointer to user-allocated image buffer containing pixels in |
+- RGB(A) or BGR(A) form |
+- [INPUT] width = width (in pixels) of the source image |
+- [INPUT] pitch = bytes per line of the source image (width*pixelsize if the |
+- bitmap is unpadded, else TJPAD(width*pixelsize) if each line of the bitmap |
+- is padded to the nearest 32-bit boundary, such as is the case for Windows |
+- bitmaps. You can also be clever and use this parameter to skip lines, etc., |
+- as long as the pitch is greater than 0.) |
+- [INPUT] height = height (in pixels) of the source image |
+- [INPUT] pixelsize = size (in bytes) of each pixel in the source image |
+- RGBA and BGRA: 4, RGB and BGR: 3 |
+- [INPUT] dstbuf = pointer to user-allocated image buffer which will receive |
+- the JPEG image. Use the macro TJBUFSIZE(width, height) to determine |
+- the appropriate size for this buffer based on the image width and height. |
+- [OUTPUT] size = pointer to unsigned long which receives the size (in bytes) |
+- of the compressed image |
+- [INPUT] jpegsubsamp = Specifies either 4:2:0, 4:2:2, or 4:4:4 subsampling. |
+- When the image is converted from the RGB to YCbCr colorspace as part of the |
+- JPEG compression process, every other Cb and Cr (chrominance) pixel can be |
+- discarded to produce a smaller image with little perceptible loss of |
+- image clarity (the human eye is more sensitive to small changes in |
+- brightness than small changes in color.) |
+ |
+- TJ_420: 4:2:0 subsampling. Discards every other Cb, Cr pixel in both |
+- horizontal and vertical directions. |
+- TJ_422: 4:2:2 subsampling. Discards every other Cb, Cr pixel only in |
+- the horizontal direction. |
+- TJ_444: no subsampling. |
+- TJ_GRAYSCALE: Generate grayscale JPEG image |
++/** |
++ * The size of the buffer (in bytes) required to hold a YUV planar image with |
++ * the given parameters. |
++ * |
++ * @param width width of the image (in pixels) |
++ * @param height height of the image (in pixels) |
++ * @param subsamp level of chrominance subsampling in the image (see |
++ * @ref TJSAMP "Chrominance subsampling options".) |
+ * |
-+ * GLOBAL(void) |
-+ * jsimd_idct_islow_neon (void * dct_table, JCOEFPTR coef_block, |
-+ * JSAMPARRAY output_buf, JDIMENSION output_col) |
++ * @return the size of the buffer (in bytes) required to hold the image, or |
++ * -1 if the arguments are out of bounds. |
+ */ |
++DLLEXPORT unsigned long DLLCALL tjBufSizeYUV(int width, int height, |
++ int subsamp); |
+ |
+- [INPUT] jpegqual = JPEG quality (an integer between 0 and 100 inclusive.) |
+- [INPUT] flags = the bitwise OR of one or more of the following |
+ |
+- TJ_BGR: The components of each pixel in the source image are stored in |
+- B,G,R order, not R,G,B |
+- TJ_BOTTOMUP: The source image is stored in bottom-up (Windows) order, |
+- not top-down |
+- TJ_FORCEMMX: Valid only for the Intel Performance Primitives implementation |
+- of this codec-- force IPP to use MMX code (bypass CPU auto-detection) |
+- TJ_FORCESSE: Valid only for the Intel Performance Primitives implementation |
+- of this codec-- force IPP to use SSE code (bypass CPU auto-detection) |
+- TJ_FORCESSE2: Valid only for the Intel Performance Primitives implementation |
+- of this codec-- force IPP to use SSE2 code (bypass CPU auto-detection) |
+- TJ_FORCESSE3: Valid only for the Intel Performance Primitives implementation |
+- of this codec-- force IPP to use SSE3 code (bypass CPU auto-detection) |
++/** |
++ * Encode an RGB or grayscale image into a YUV planar image. This function |
++ * uses the accelerated color conversion routines in TurboJPEG's underlying |
++ * codec to produce a planar YUV image that is suitable for X Video. |
++ * Specifically, if the chrominance components are subsampled along the |
++ * horizontal dimension, then the width of the luminance plane is padded to the |
++ * nearest multiple of 2 in the output image (same goes for the height of the |
++ * luminance plane, if the chrominance components are subsampled along the |
++ * vertical dimension.) Also, each line of each plane in the output image is |
++ * padded to 4 bytes. Although this will work with any subsampling option, it |
++ * is really only useful in combination with TJ_420, which produces an image |
++ * compatible with the I420 (AKA "YUV420P") format. |
++ * <p> |
++ * NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the |
++ * convention of the digital video community, the TurboJPEG API uses "YUV" to |
++ * refer to an image format consisting of Y, Cb, and Cr image planes. |
++ * |
++ * @param handle a handle to a TurboJPEG compressor or transformer instance |
++ * @param srcBuf pointer to an image buffer containing RGB or grayscale pixels |
++ * to be encoded |
++ * @param width width (in pixels) of the source image |
++ * @param pitch bytes per line of the source image. Normally, this should be |
++ * <tt>width * #tjPixelSize[pixelFormat]</tt> if the image is unpadded, |
++ * or <tt>#TJPAD(width * #tjPixelSize[pixelFormat])</tt> if each line of |
++ * the image is padded to the nearest 32-bit boundary, as is the case |
++ * for Windows bitmaps. You can also be clever and use this parameter |
++ * to skip lines, etc. Setting this parameter to 0 is the equivalent of |
++ * setting it to <tt>width * #tjPixelSize[pixelFormat]</tt>. |
++ * @param height height (in pixels) of the source image |
++ * @param pixelFormat pixel format of the source image (see @ref TJPF |
++ * "Pixel formats".) |
++ * @param dstBuf pointer to an image buffer that will receive the YUV image. |
++ * Use #tjBufSizeYUV() to determine the appropriate size for this buffer |
++ * based on the image width, height, and level of chrominance |
++ * subsampling. |
++ * @param subsamp the level of chrominance subsampling to be used when |
++ * generating the YUV image (see @ref TJSAMP |
++ * "Chrominance subsampling options".) |
++ * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP |
++ * "flags". |
++ * |
++ * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) |
++*/ |
++DLLEXPORT int DLLCALL tjEncodeYUV2(tjhandle handle, |
++ unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat, |
++ unsigned char *dstBuf, int subsamp, int flags); |
+ |
+- RETURNS: 0 on success, -1 on error |
+ |
-+#define FIX_0_298631336 (2446) |
-+#define FIX_0_390180644 (3196) |
-+#define FIX_0_541196100 (4433) |
-+#define FIX_0_765366865 (6270) |
-+#define FIX_0_899976223 (7373) |
-+#define FIX_1_175875602 (9633) |
-+#define FIX_1_501321110 (12299) |
-+#define FIX_1_847759065 (15137) |
-+#define FIX_1_961570560 (16069) |
-+#define FIX_2_053119869 (16819) |
-+#define FIX_2_562915447 (20995) |
-+#define FIX_3_072711026 (25172) |
-+ |
-+#define FIX_1_175875602_MINUS_1_961570560 (FIX_1_175875602 - FIX_1_961570560) |
-+#define FIX_1_175875602_MINUS_0_390180644 (FIX_1_175875602 - FIX_0_390180644) |
-+#define FIX_0_541196100_MINUS_1_847759065 (FIX_0_541196100 - FIX_1_847759065) |
-+#define FIX_3_072711026_MINUS_2_562915447 (FIX_3_072711026 - FIX_2_562915447) |
-+#define FIX_0_298631336_MINUS_0_899976223 (FIX_0_298631336 - FIX_0_899976223) |
-+#define FIX_1_501321110_MINUS_0_899976223 (FIX_1_501321110 - FIX_0_899976223) |
-+#define FIX_2_053119869_MINUS_2_562915447 (FIX_2_053119869 - FIX_2_562915447) |
-+#define FIX_0_541196100_PLUS_0_765366865 (FIX_0_541196100 + FIX_0_765366865) |
-+ |
-+/* |
-+ * Reference SIMD-friendly 1-D ISLOW iDCT C implementation. |
-+ * Uses some ideas from the comments in 'simd/jiss2int-64.asm' |
++/** |
++ * Create a TurboJPEG decompressor instance. |
++ * |
++ * @return a handle to the newly-created instance, or NULL if an error |
++ * occurred (see #tjGetErrorStr().) |
+ */ |
+-DLLEXPORT int DLLCALL tjCompress(tjhandle j, |
+- unsigned char *srcbuf, int width, int pitch, int height, int pixelsize, |
+- unsigned char *dstbuf, unsigned long *size, |
+- int jpegsubsamp, int jpegqual, int flags); |
++DLLEXPORT tjhandle DLLCALL tjInitDecompress(void); |
+ |
+-DLLEXPORT unsigned long DLLCALL TJBUFSIZE(int width, int height); |
+ |
+-/* |
+- tjhandle tjInitDecompress(void) |
++/** |
++ * Retrieve information about a JPEG image without decompressing it. |
++ * |
++ * @param handle a handle to a TurboJPEG decompressor or transformer instance |
++ * @param jpegBuf pointer to a buffer containing a JPEG image |
++ * @param jpegSize size of the JPEG image (in bytes) |
++ * @param width pointer to an integer variable that will receive the width (in |
++ * pixels) of the JPEG image |
++ * @param height pointer to an integer variable that will receive the height |
++ * (in pixels) of the JPEG image |
++ * @param jpegSubsamp pointer to an integer variable that will receive the |
++ * level of chrominance subsampling used when compressing the JPEG image |
++ * (see @ref TJSAMP "Chrominance subsampling options".) |
++ * |
++ * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) |
++*/ |
++DLLEXPORT int DLLCALL tjDecompressHeader2(tjhandle handle, |
++ unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height, |
++ int *jpegSubsamp); |
+ |
+- Creates a new JPEG decompressor instance, allocates memory for the |
+- structures, and returns a handle to the instance. Most applications will |
+- only need to call this once at the beginning of the program or once for each |
+- concurrent thread. Don't try to create a new instance every time you |
+- decompress an image, because this will cause performance to suffer. |
+ |
+- RETURNS: NULL on error |
++/** |
++ * Returns a list of fractional scaling factors that the JPEG decompressor in |
++ * this implementation of TurboJPEG supports. |
++ * |
++ * @param numscalingfactors pointer to an integer variable that will receive |
++ * the number of elements in the list |
++ * |
++ * @return a pointer to a list of fractional scaling factors, or NULL if an |
++ * error is encountered (see #tjGetErrorStr().) |
+ */ |
+-DLLEXPORT tjhandle DLLCALL tjInitDecompress(void); |
++DLLEXPORT tjscalingfactor* DLLCALL tjGetScalingFactors(int *numscalingfactors); |
+ |
+ |
+-/* |
+- int tjDecompressHeader(tjhandle j, |
+- unsigned char *srcbuf, unsigned long size, |
+- int *width, int *height) |
++/** |
++ * Decompress a JPEG image to an RGB or grayscale image. |
++ * |
++ * @param handle a handle to a TurboJPEG decompressor or transformer instance |
++ * @param jpegBuf pointer to a buffer containing the JPEG image to decompress |
++ * @param jpegSize size of the JPEG image (in bytes) |
++ * @param dstBuf pointer to an image buffer that will receive the decompressed |
++ * image. This buffer should normally be <tt>pitch * scaledHeight</tt> |
++ * bytes in size, where <tt>scaledHeight</tt> can be determined by |
++ * calling #TJSCALED() with the JPEG image height and one of the scaling |
++ * factors returned by #tjGetScalingFactors(). The <tt>dstBuf</tt> |
++ * pointer may also be used to decompress into a specific region of a |
++ * larger buffer. |
++ * @param width desired width (in pixels) of the destination image. If this is |
++ * different than the width of the JPEG image being decompressed, then |
++ * TurboJPEG will use scaling in the JPEG decompressor to generate the |
++ * largest possible image that will fit within the desired width. If |
++ * <tt>width</tt> is set to 0, then only the height will be considered |
++ * when determining the scaled image size. |
++ * @param pitch bytes per line of the destination image. Normally, this is |
++ * <tt>scaledWidth * #tjPixelSize[pixelFormat]</tt> if the decompressed |
++ * image is unpadded, else <tt>#TJPAD(scaledWidth * |
++ * #tjPixelSize[pixelFormat])</tt> if each line of the decompressed |
++ * image is padded to the nearest 32-bit boundary, as is the case for |
++ * Windows bitmaps. (NOTE: <tt>scaledWidth</tt> can be determined by |
++ * calling #TJSCALED() with the JPEG image width and one of the scaling |
++ * factors returned by #tjGetScalingFactors().) You can also be clever |
++ * and use the pitch parameter to skip lines, etc. Setting this |
++ * parameter to 0 is the equivalent of setting it to <tt>scaledWidth |
++ * * #tjPixelSize[pixelFormat]</tt>. |
++ * @param height desired height (in pixels) of the destination image. If this |
++ * is different than the height of the JPEG image being decompressed, |
++ * then TurboJPEG will use scaling in the JPEG decompressor to generate |
++ * the largest possible image that will fit within the desired height. |
++ * If <tt>height</tt> is set to 0, then only the width will be |
++ * considered when determining the scaled image size. |
++ * @param pixelFormat pixel format of the destination image (see @ref |
++ * TJPF "Pixel formats".) |
++ * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP |
++ * "flags". |
++ * |
++ * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) |
+ */ |
-+#define REF_1D_IDCT(xrow0, xrow1, xrow2, xrow3, xrow4, xrow5, xrow6, xrow7) \ |
-+{ \ |
-+ DCTELEM row0, row1, row2, row3, row4, row5, row6, row7; \ |
-+ INT32 q1, q2, q3, q4, q5, q6, q7; \ |
-+ INT32 tmp11_plus_tmp2, tmp11_minus_tmp2; \ |
-+ \ |
-+ /* 1-D iDCT input data */ \ |
-+ row0 = xrow0; \ |
-+ row1 = xrow1; \ |
-+ row2 = xrow2; \ |
-+ row3 = xrow3; \ |
-+ row4 = xrow4; \ |
-+ row5 = xrow5; \ |
-+ row6 = xrow6; \ |
-+ row7 = xrow7; \ |
-+ \ |
-+ q5 = row7 + row3; \ |
-+ q4 = row5 + row1; \ |
-+ q6 = MULTIPLY(q5, FIX_1_175875602_MINUS_1_961570560) + \ |
-+ MULTIPLY(q4, FIX_1_175875602); \ |
-+ q7 = MULTIPLY(q5, FIX_1_175875602) + \ |
-+ MULTIPLY(q4, FIX_1_175875602_MINUS_0_390180644); \ |
-+ q2 = MULTIPLY(row2, FIX_0_541196100) + \ |
-+ MULTIPLY(row6, FIX_0_541196100_MINUS_1_847759065); \ |
-+ q4 = q6; \ |
-+ q3 = ((INT32) row0 - (INT32) row4) << 13; \ |
-+ q6 += MULTIPLY(row5, -FIX_2_562915447) + \ |
-+ MULTIPLY(row3, FIX_3_072711026_MINUS_2_562915447); \ |
-+ /* now we can use q1 (reloadable constants have been used up) */ \ |
-+ q1 = q3 + q2; \ |
-+ q4 += MULTIPLY(row7, FIX_0_298631336_MINUS_0_899976223) + \ |
-+ MULTIPLY(row1, -FIX_0_899976223); \ |
-+ q5 = q7; \ |
-+ q1 = q1 + q6; \ |
-+ q7 += MULTIPLY(row7, -FIX_0_899976223) + \ |
-+ MULTIPLY(row1, FIX_1_501321110_MINUS_0_899976223); \ |
-+ \ |
-+ /* (tmp11 + tmp2) has been calculated (out_row1 before descale) */ \ |
-+ tmp11_plus_tmp2 = q1; \ |
-+ row1 = 0; \ |
-+ \ |
-+ q1 = q1 - q6; \ |
-+ q5 += MULTIPLY(row5, FIX_2_053119869_MINUS_2_562915447) + \ |
-+ MULTIPLY(row3, -FIX_2_562915447); \ |
-+ q1 = q1 - q6; \ |
-+ q6 = MULTIPLY(row2, FIX_0_541196100_PLUS_0_765366865) + \ |
-+ MULTIPLY(row6, FIX_0_541196100); \ |
-+ q3 = q3 - q2; \ |
-+ \ |
-+ /* (tmp11 - tmp2) has been calculated (out_row6 before descale) */ \ |
-+ tmp11_minus_tmp2 = q1; \ |
-+ \ |
-+ q1 = ((INT32) row0 + (INT32) row4) << 13; \ |
-+ q2 = q1 + q6; \ |
-+ q1 = q1 - q6; \ |
-+ \ |
-+ /* pick up the results */ \ |
-+ tmp0 = q4; \ |
-+ tmp1 = q5; \ |
-+ tmp2 = (tmp11_plus_tmp2 - tmp11_minus_tmp2) / 2; \ |
-+ tmp3 = q7; \ |
-+ tmp10 = q2; \ |
-+ tmp11 = (tmp11_plus_tmp2 + tmp11_minus_tmp2) / 2; \ |
-+ tmp12 = q3; \ |
-+ tmp13 = q1; \ |
-+} |
-+ |
-+#define XFIX_0_899976223 v0.4h[0] |
-+#define XFIX_0_541196100 v0.4h[1] |
-+#define XFIX_2_562915447 v0.4h[2] |
-+#define XFIX_0_298631336_MINUS_0_899976223 v0.4h[3] |
-+#define XFIX_1_501321110_MINUS_0_899976223 v1.4h[0] |
-+#define XFIX_2_053119869_MINUS_2_562915447 v1.4h[1] |
-+#define XFIX_0_541196100_PLUS_0_765366865 v1.4h[2] |
-+#define XFIX_1_175875602 v1.4h[3] |
-+#define XFIX_1_175875602_MINUS_0_390180644 v2.4h[0] |
-+#define XFIX_0_541196100_MINUS_1_847759065 v2.4h[1] |
-+#define XFIX_3_072711026_MINUS_2_562915447 v2.4h[2] |
-+#define XFIX_1_175875602_MINUS_1_961570560 v2.4h[3] |
-+ |
-+.balign 16 |
-+jsimd_idct_islow_neon_consts: |
-+ .short FIX_0_899976223 /* d0[0] */ |
-+ .short FIX_0_541196100 /* d0[1] */ |
-+ .short FIX_2_562915447 /* d0[2] */ |
-+ .short FIX_0_298631336_MINUS_0_899976223 /* d0[3] */ |
-+ .short FIX_1_501321110_MINUS_0_899976223 /* d1[0] */ |
-+ .short FIX_2_053119869_MINUS_2_562915447 /* d1[1] */ |
-+ .short FIX_0_541196100_PLUS_0_765366865 /* d1[2] */ |
-+ .short FIX_1_175875602 /* d1[3] */ |
-+ /* reloadable constants */ |
-+ .short FIX_1_175875602_MINUS_0_390180644 /* d2[0] */ |
-+ .short FIX_0_541196100_MINUS_1_847759065 /* d2[1] */ |
-+ .short FIX_3_072711026_MINUS_2_562915447 /* d2[2] */ |
-+ .short FIX_1_175875602_MINUS_1_961570560 /* d2[3] */ |
-+ |
-+asm_function jsimd_idct_islow_neon |
-+ |
-+ DCT_TABLE .req x0 |
-+ COEF_BLOCK .req x1 |
-+ OUTPUT_BUF .req x2 |
-+ OUTPUT_COL .req x3 |
-+ TMP1 .req x0 |
-+ TMP2 .req x1 |
-+ TMP3 .req x2 |
-+ TMP4 .req x15 |
-+ |
-+ ROW0L .req v16 |
-+ ROW0R .req v17 |
-+ ROW1L .req v18 |
-+ ROW1R .req v19 |
-+ ROW2L .req v20 |
-+ ROW2R .req v21 |
-+ ROW3L .req v22 |
-+ ROW3R .req v23 |
-+ ROW4L .req v24 |
-+ ROW4R .req v25 |
-+ ROW5L .req v26 |
-+ ROW5R .req v27 |
-+ ROW6L .req v28 |
-+ ROW6R .req v29 |
-+ ROW7L .req v30 |
-+ ROW7R .req v31 |
-+ /* Save all NEON registers and x15 (32 NEON registers * 8 bytes + 16) */ |
-+ sub sp, sp, 272 |
-+ str x15, [sp], 16 |
-+ adr x15, jsimd_idct_islow_neon_consts |
-+ st1 {v0.8b - v3.8b}, [sp], 32 |
-+ st1 {v4.8b - v7.8b}, [sp], 32 |
-+ st1 {v8.8b - v11.8b}, [sp], 32 |
-+ st1 {v12.8b - v15.8b}, [sp], 32 |
-+ st1 {v16.8b - v19.8b}, [sp], 32 |
-+ st1 {v20.8b - v23.8b}, [sp], 32 |
-+ st1 {v24.8b - v27.8b}, [sp], 32 |
-+ st1 {v28.8b - v31.8b}, [sp], 32 |
-+ ld1 {v16.4h, v17.4h, v18.4h, v19.4h}, [COEF_BLOCK], 32 |
-+ ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [DCT_TABLE], 32 |
-+ ld1 {v20.4h, v21.4h, v22.4h, v23.4h}, [COEF_BLOCK], 32 |
-+ mul v16.4h, v16.4h, v0.4h |
-+ mul v17.4h, v17.4h, v1.4h |
-+ ins v16.2d[1], v17.2d[0] /* 128 bit q8 */ |
-+ ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [DCT_TABLE], 32 |
-+ mul v18.4h, v18.4h, v2.4h |
-+ mul v19.4h, v19.4h, v3.4h |
-+ ins v18.2d[1], v19.2d[0] /* 128 bit q9 */ |
-+ ld1 {v24.4h, v25.4h, v26.4h, v27.4h}, [COEF_BLOCK], 32 |
-+ mul v20.4h, v20.4h, v4.4h |
-+ mul v21.4h, v21.4h, v5.4h |
-+ ins v20.2d[1], v21.2d[0] /* 128 bit q10 */ |
-+ ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [DCT_TABLE], 32 |
-+ mul v22.4h, v22.4h, v6.4h |
-+ mul v23.4h, v23.4h, v7.4h |
-+ ins v22.2d[1], v23.2d[0] /* 128 bit q11 */ |
-+ ld1 {v28.4h, v29.4h, v30.4h, v31.4h}, [COEF_BLOCK] |
-+ mul v24.4h, v24.4h, v0.4h |
-+ mul v25.4h, v25.4h, v1.4h |
-+ ins v24.2d[1], v25.2d[0] /* 128 bit q12 */ |
-+ ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [DCT_TABLE], 32 |
-+ mul v28.4h, v28.4h, v4.4h |
-+ mul v29.4h, v29.4h, v5.4h |
-+ ins v28.2d[1], v29.2d[0] /* 128 bit q14 */ |
-+ mul v26.4h, v26.4h, v2.4h |
-+ mul v27.4h, v27.4h, v3.4h |
-+ ins v26.2d[1], v27.2d[0] /* 128 bit q13 */ |
-+ ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [x15] /* load constants */ |
-+ add x15, x15, #16 |
-+ mul v30.4h, v30.4h, v6.4h |
-+ mul v31.4h, v31.4h, v7.4h |
-+ ins v30.2d[1], v31.2d[0] /* 128 bit q15 */ |
-+ /* Go to the bottom of the stack */ |
-+ sub sp, sp, 352 |
-+ stp x4, x5, [sp], 16 |
-+ st1 {v8.4h - v11.4h}, [sp], 32 /* save NEON registers */ |
-+ st1 {v12.4h - v15.4h}, [sp], 32 |
-+ /* 1-D IDCT, pass 1, left 4x8 half */ |
-+ add v4.4h, ROW7L.4h, ROW3L.4h |
-+ add v5.4h, ROW5L.4h, ROW1L.4h |
-+ smull v12.4s, v4.4h, XFIX_1_175875602_MINUS_1_961570560 |
-+ smlal v12.4s, v5.4h, XFIX_1_175875602 |
-+ smull v14.4s, v4.4h, XFIX_1_175875602 |
-+ /* Check for the zero coefficients in the right 4x8 half */ |
-+ smlal v14.4s, v5.4h, XFIX_1_175875602_MINUS_0_390180644 |
-+ ssubl v6.4s, ROW0L.4h, ROW4L.4h |
-+ ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 1 * 8))] |
-+ smull v4.4s, ROW2L.4h, XFIX_0_541196100 |
-+ smlal v4.4s, ROW6L.4h, XFIX_0_541196100_MINUS_1_847759065 |
-+ orr x0, x4, x5 |
-+ mov v8.16b, v12.16b |
-+ smlsl v12.4s, ROW5L.4h, XFIX_2_562915447 |
-+ ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 2 * 8))] |
-+ smlal v12.4s, ROW3L.4h, XFIX_3_072711026_MINUS_2_562915447 |
-+ shl v6.4s, v6.4s, #13 |
-+ orr x0, x0, x4 |
-+ smlsl v8.4s, ROW1L.4h, XFIX_0_899976223 |
-+ orr x0, x0 , x5 |
-+ add v2.4s, v6.4s, v4.4s |
-+ ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 3 * 8))] |
-+ mov v10.16b, v14.16b |
-+ add v2.4s, v2.4s, v12.4s |
-+ orr x0, x0, x4 |
-+ smlsl v14.4s, ROW7L.4h, XFIX_0_899976223 |
-+ orr x0, x0, x5 |
-+ smlal v14.4s, ROW1L.4h, XFIX_1_501321110_MINUS_0_899976223 |
-+ rshrn ROW1L.4h, v2.4s, #11 |
-+ ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 4 * 8))] |
-+ sub v2.4s, v2.4s, v12.4s |
-+ smlal v10.4s, ROW5L.4h, XFIX_2_053119869_MINUS_2_562915447 |
-+ orr x0, x0, x4 |
-+ smlsl v10.4s, ROW3L.4h, XFIX_2_562915447 |
-+ orr x0, x0, x5 |
-+ sub v2.4s, v2.4s, v12.4s |
-+ smull v12.4s, ROW2L.4h, XFIX_0_541196100_PLUS_0_765366865 |
-+ ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 5 * 8))] |
-+ smlal v12.4s, ROW6L.4h, XFIX_0_541196100 |
-+ sub v6.4s, v6.4s, v4.4s |
-+ orr x0, x0, x4 |
-+ rshrn ROW6L.4h, v2.4s, #11 |
-+ orr x0, x0, x5 |
-+ add v2.4s, v6.4s, v10.4s |
-+ ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 6 * 8))] |
-+ sub v6.4s, v6.4s, v10.4s |
-+ saddl v10.4s, ROW0L.4h, ROW4L.4h |
-+ orr x0, x0, x4 |
-+ rshrn ROW2L.4h, v2.4s, #11 |
-+ orr x0, x0, x5 |
-+ rshrn ROW5L.4h, v6.4s, #11 |
-+ ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 7 * 8))] |
-+ shl v10.4s, v10.4s, #13 |
-+ smlal v8.4s, ROW7L.4h, XFIX_0_298631336_MINUS_0_899976223 |
-+ orr x0, x0, x4 |
-+ add v4.4s, v10.4s, v12.4s |
-+ orr x0, x0, x5 |
-+ cmp x0, #0 /* orrs instruction removed */ |
-+ sub v2.4s, v10.4s, v12.4s |
-+ add v12.4s, v4.4s, v14.4s |
-+ ldp w4, w5, [COEF_BLOCK, #(-96 + 2 * (4 + 0 * 8))] |
-+ sub v4.4s, v4.4s, v14.4s |
-+ add v10.4s, v2.4s, v8.4s |
-+ orr x0, x4, x5 |
-+ sub v6.4s, v2.4s, v8.4s |
-+ /* pop {x4, x5} */ |
-+ sub sp, sp, 80 |
-+ ldp x4, x5, [sp], 16 |
-+ rshrn ROW7L.4h, v4.4s, #11 |
-+ rshrn ROW3L.4h, v10.4s, #11 |
-+ rshrn ROW0L.4h, v12.4s, #11 |
-+ rshrn ROW4L.4h, v6.4s, #11 |
-+ |
-+ beq 3f /* Go to do some special handling for the sparse right 4x8 half */ |
-+ |
-+ /* 1-D IDCT, pass 1, right 4x8 half */ |
-+ ld1 {v2.4h}, [x15] /* reload constants */ |
-+ add v10.4h, ROW7R.4h, ROW3R.4h |
-+ add v8.4h, ROW5R.4h, ROW1R.4h |
-+ /* Transpose ROW6L <-> ROW7L (v3 available free register) */ |
-+ transpose ROW6L, ROW7L, v3, .16b, .4h |
-+ smull v12.4s, v10.4h, XFIX_1_175875602_MINUS_1_961570560 |
-+ smlal v12.4s, v8.4h, XFIX_1_175875602 |
-+ /* Transpose ROW2L <-> ROW3L (v3 available free register) */ |
-+ transpose ROW2L, ROW3L, v3, .16b, .4h |
-+ smull v14.4s, v10.4h, XFIX_1_175875602 |
-+ smlal v14.4s, v8.4h, XFIX_1_175875602_MINUS_0_390180644 |
-+ /* Transpose ROW0L <-> ROW1L (v3 available free register) */ |
-+ transpose ROW0L, ROW1L, v3, .16b, .4h |
-+ ssubl v6.4s, ROW0R.4h, ROW4R.4h |
-+ smull v4.4s, ROW2R.4h, XFIX_0_541196100 |
-+ smlal v4.4s, ROW6R.4h, XFIX_0_541196100_MINUS_1_847759065 |
-+ /* Transpose ROW4L <-> ROW5L (v3 available free register) */ |
-+ transpose ROW4L, ROW5L, v3, .16b, .4h |
-+ mov v8.16b, v12.16b |
-+ smlsl v12.4s, ROW5R.4h, XFIX_2_562915447 |
-+ smlal v12.4s, ROW3R.4h, XFIX_3_072711026_MINUS_2_562915447 |
-+ /* Transpose ROW1L <-> ROW3L (v3 available free register) */ |
-+ transpose ROW1L, ROW3L, v3, .16b, .2s |
-+ shl v6.4s, v6.4s, #13 |
-+ smlsl v8.4s, ROW1R.4h, XFIX_0_899976223 |
-+ /* Transpose ROW4L <-> ROW6L (v3 available free register) */ |
-+ transpose ROW4L, ROW6L, v3, .16b, .2s |
-+ add v2.4s, v6.4s, v4.4s |
-+ mov v10.16b, v14.16b |
-+ add v2.4s, v2.4s, v12.4s |
-+ /* Transpose ROW0L <-> ROW2L (v3 available free register) */ |
-+ transpose ROW0L, ROW2L, v3, .16b, .2s |
-+ smlsl v14.4s, ROW7R.4h, XFIX_0_899976223 |
-+ smlal v14.4s, ROW1R.4h, XFIX_1_501321110_MINUS_0_899976223 |
-+ rshrn ROW1R.4h, v2.4s, #11 |
-+ /* Transpose ROW5L <-> ROW7L (v3 available free register) */ |
-+ transpose ROW5L, ROW7L, v3, .16b, .2s |
-+ sub v2.4s, v2.4s, v12.4s |
-+ smlal v10.4s, ROW5R.4h, XFIX_2_053119869_MINUS_2_562915447 |
-+ smlsl v10.4s, ROW3R.4h, XFIX_2_562915447 |
-+ sub v2.4s, v2.4s, v12.4s |
-+ smull v12.4s, ROW2R.4h, XFIX_0_541196100_PLUS_0_765366865 |
-+ smlal v12.4s, ROW6R.4h, XFIX_0_541196100 |
-+ sub v6.4s, v6.4s, v4.4s |
-+ rshrn ROW6R.4h, v2.4s, #11 |
-+ add v2.4s, v6.4s, v10.4s |
-+ sub v6.4s, v6.4s, v10.4s |
-+ saddl v10.4s, ROW0R.4h, ROW4R.4h |
-+ rshrn ROW2R.4h, v2.4s, #11 |
-+ rshrn ROW5R.4h, v6.4s, #11 |
-+ shl v10.4s, v10.4s, #13 |
-+ smlal v8.4s, ROW7R.4h, XFIX_0_298631336_MINUS_0_899976223 |
-+ add v4.4s, v10.4s, v12.4s |
-+ sub v2.4s, v10.4s, v12.4s |
-+ add v12.4s, v4.4s, v14.4s |
-+ sub v4.4s, v4.4s, v14.4s |
-+ add v10.4s, v2.4s, v8.4s |
-+ sub v6.4s, v2.4s, v8.4s |
-+ rshrn ROW7R.4h, v4.4s, #11 |
-+ rshrn ROW3R.4h, v10.4s, #11 |
-+ rshrn ROW0R.4h, v12.4s, #11 |
-+ rshrn ROW4R.4h, v6.4s, #11 |
-+ /* Transpose right 4x8 half */ |
-+ transpose ROW6R, ROW7R, v3, .16b, .4h |
-+ transpose ROW2R, ROW3R, v3, .16b, .4h |
-+ transpose ROW0R, ROW1R, v3, .16b, .4h |
-+ transpose ROW4R, ROW5R, v3, .16b, .4h |
-+ transpose ROW1R, ROW3R, v3, .16b, .2s |
-+ transpose ROW4R, ROW6R, v3, .16b, .2s |
-+ transpose ROW0R, ROW2R, v3, .16b, .2s |
-+ transpose ROW5R, ROW7R, v3, .16b, .2s |
-+ |
-+1: /* 1-D IDCT, pass 2 (normal variant), left 4x8 half */ |
-+ ld1 {v2.4h}, [x15] /* reload constants */ |
-+ smull v12.4S, ROW1R.4h, XFIX_1_175875602 /* ROW5L.4h <-> ROW1R.4h */ |
-+ smlal v12.4s, ROW1L.4h, XFIX_1_175875602 |
-+ smlal v12.4s, ROW3R.4h, XFIX_1_175875602_MINUS_1_961570560 /* ROW7L.4h <-> ROW3R.4h */ |
-+ smlal v12.4s, ROW3L.4h, XFIX_1_175875602_MINUS_1_961570560 |
-+ smull v14.4s, ROW3R.4h, XFIX_1_175875602 /* ROW7L.4h <-> ROW3R.4h */ |
-+ smlal v14.4s, ROW3L.4h, XFIX_1_175875602 |
-+ smlal v14.4s, ROW1R.4h, XFIX_1_175875602_MINUS_0_390180644 /* ROW5L.4h <-> ROW1R.4h */ |
-+ smlal v14.4s, ROW1L.4h, XFIX_1_175875602_MINUS_0_390180644 |
-+ ssubl v6.4s, ROW0L.4h, ROW0R.4h /* ROW4L.4h <-> ROW0R.4h */ |
-+ smull v4.4s, ROW2L.4h, XFIX_0_541196100 |
-+ smlal v4.4s, ROW2R.4h, XFIX_0_541196100_MINUS_1_847759065 /* ROW6L.4h <-> ROW2R.4h */ |
-+ mov v8.16b, v12.16b |
-+ smlsl v12.4s, ROW1R.4h, XFIX_2_562915447 /* ROW5L.4h <-> ROW1R.4h */ |
-+ smlal v12.4s, ROW3L.4h, XFIX_3_072711026_MINUS_2_562915447 |
-+ shl v6.4s, v6.4s, #13 |
-+ smlsl v8.4s, ROW1L.4h, XFIX_0_899976223 |
-+ add v2.4s, v6.4s, v4.4s |
-+ mov v10.16b, v14.16b |
-+ add v2.4s, v2.4s, v12.4s |
-+ smlsl v14.4s, ROW3R.4h, XFIX_0_899976223 /* ROW7L.4h <-> ROW3R.4h */ |
-+ smlal v14.4s, ROW1L.4h, XFIX_1_501321110_MINUS_0_899976223 |
-+ shrn ROW1L.4h, v2.4s, #16 |
-+ sub v2.4s, v2.4s, v12.4s |
-+ smlal v10.4s, ROW1R.4h, XFIX_2_053119869_MINUS_2_562915447 /* ROW5L.4h <-> ROW1R.4h */ |
-+ smlsl v10.4s, ROW3L.4h, XFIX_2_562915447 |
-+ sub v2.4s, v2.4s, v12.4s |
-+ smull v12.4s, ROW2L.4h, XFIX_0_541196100_PLUS_0_765366865 |
-+ smlal v12.4s, ROW2R.4h, XFIX_0_541196100 /* ROW6L.4h <-> ROW2R.4h */ |
-+ sub v6.4s, v6.4s, v4.4s |
-+ shrn ROW2R.4h, v2.4s, #16 /* ROW6L.4h <-> ROW2R.4h */ |
-+ add v2.4s, v6.4s, v10.4s |
-+ sub v6.4s, v6.4s, v10.4s |
-+ saddl v10.4s, ROW0L.4h, ROW0R.4h /* ROW4L.4h <-> ROW0R.4h */ |
-+ shrn ROW2L.4h, v2.4s, #16 |
-+ shrn ROW1R.4h, v6.4s, #16 /* ROW5L.4h <-> ROW1R.4h */ |
-+ shl v10.4s, v10.4s, #13 |
-+ smlal v8.4s, ROW3R.4h, XFIX_0_298631336_MINUS_0_899976223 /* ROW7L.4h <-> ROW3R.4h */ |
-+ add v4.4s, v10.4s, v12.4s |
-+ sub v2.4s, v10.4s, v12.4s |
-+ add v12.4s, v4.4s, v14.4s |
-+ sub v4.4s, v4.4s, v14.4s |
-+ add v10.4s, v2.4s, v8.4s |
-+ sub v6.4s, v2.4s, v8.4s |
-+ shrn ROW3R.4h, v4.4s, #16 /* ROW7L.4h <-> ROW3R.4h */ |
-+ shrn ROW3L.4h, v10.4s, #16 |
-+ shrn ROW0L.4h, v12.4s, #16 |
-+ shrn ROW0R.4h, v6.4s, #16 /* ROW4L.4h <-> ROW0R.4h */ |
-+ /* 1-D IDCT, pass 2, right 4x8 half */ |
-+ ld1 {v2.4h}, [x15] /* reload constants */ |
-+ smull v12.4s, ROW5R.4h, XFIX_1_175875602 |
-+ smlal v12.4s, ROW5L.4h, XFIX_1_175875602 /* ROW5L.4h <-> ROW1R.4h */ |
-+ smlal v12.4s, ROW7R.4h, XFIX_1_175875602_MINUS_1_961570560 |
-+ smlal v12.4s, ROW7L.4h, XFIX_1_175875602_MINUS_1_961570560 /* ROW7L.4h <-> ROW3R.4h */ |
-+ smull v14.4s, ROW7R.4h, XFIX_1_175875602 |
-+ smlal v14.4s, ROW7L.4h, XFIX_1_175875602 /* ROW7L.4h <-> ROW3R.4h */ |
-+ smlal v14.4s, ROW5R.4h, XFIX_1_175875602_MINUS_0_390180644 |
-+ smlal v14.4s, ROW5L.4h, XFIX_1_175875602_MINUS_0_390180644 /* ROW5L.4h <-> ROW1R.4h */ |
-+ ssubl v6.4s, ROW4L.4h, ROW4R.4h /* ROW4L.4h <-> ROW0R.4h */ |
-+ smull v4.4s, ROW6L.4h, XFIX_0_541196100 /* ROW6L.4h <-> ROW2R.4h */ |
-+ smlal v4.4s, ROW6R.4h, XFIX_0_541196100_MINUS_1_847759065 |
-+ mov v8.16b, v12.16b |
-+ smlsl v12.4s, ROW5R.4h, XFIX_2_562915447 |
-+ smlal v12.4s, ROW7L.4h, XFIX_3_072711026_MINUS_2_562915447 /* ROW7L.4h <-> ROW3R.4h */ |
-+ shl v6.4s, v6.4s, #13 |
-+ smlsl v8.4s, ROW5L.4h, XFIX_0_899976223 /* ROW5L.4h <-> ROW1R.4h */ |
-+ add v2.4s, v6.4s, v4.4s |
-+ mov v10.16b, v14.16b |
-+ add v2.4s, v2.4s, v12.4s |
-+ smlsl v14.4s, ROW7R.4h, XFIX_0_899976223 |
-+ smlal v14.4s, ROW5L.4h, XFIX_1_501321110_MINUS_0_899976223 /* ROW5L.4h <-> ROW1R.4h */ |
-+ shrn ROW5L.4h, v2.4s, #16 /* ROW5L.4h <-> ROW1R.4h */ |
-+ sub v2.4s, v2.4s, v12.4s |
-+ smlal v10.4s, ROW5R.4h, XFIX_2_053119869_MINUS_2_562915447 |
-+ smlsl v10.4s, ROW7L.4h, XFIX_2_562915447 /* ROW7L.4h <-> ROW3R.4h */ |
-+ sub v2.4s, v2.4s, v12.4s |
-+ smull v12.4s, ROW6L.4h, XFIX_0_541196100_PLUS_0_765366865 /* ROW6L.4h <-> ROW2R.4h */ |
-+ smlal v12.4s, ROW6R.4h, XFIX_0_541196100 |
-+ sub v6.4s, v6.4s, v4.4s |
-+ shrn ROW6R.4h, v2.4s, #16 |
-+ add v2.4s, v6.4s, v10.4s |
-+ sub v6.4s, v6.4s, v10.4s |
-+ saddl v10.4s, ROW4L.4h, ROW4R.4h /* ROW4L.4h <-> ROW0R.4h */ |
-+ shrn ROW6L.4h, v2.4s, #16 /* ROW6L.4h <-> ROW2R.4h */ |
-+ shrn ROW5R.4h, v6.4s, #16 |
-+ shl v10.4s, v10.4s, #13 |
-+ smlal v8.4s, ROW7R.4h, XFIX_0_298631336_MINUS_0_899976223 |
-+ add v4.4s, v10.4s, v12.4s |
-+ sub v2.4s, v10.4s, v12.4s |
-+ add v12.4s, v4.4s, v14.4s |
-+ sub v4.4s, v4.4s, v14.4s |
-+ add v10.4s, v2.4s, v8.4s |
-+ sub v6.4s, v2.4s, v8.4s |
-+ shrn ROW7R.4h, v4.4s, #16 |
-+ shrn ROW7L.4h, v10.4s, #16 /* ROW7L.4h <-> ROW3R.4h */ |
-+ shrn ROW4L.4h, v12.4s, #16 /* ROW4L.4h <-> ROW0R.4h */ |
-+ shrn ROW4R.4h, v6.4s, #16 |
-+ |
-+2: /* Descale to 8-bit and range limit */ |
-+ ins v16.2d[1], v17.2d[0] |
-+ ins v18.2d[1], v19.2d[0] |
-+ ins v20.2d[1], v21.2d[0] |
-+ ins v22.2d[1], v23.2d[0] |
-+ sqrshrn v16.8b, v16.8h, #2 |
-+ sqrshrn2 v16.16b, v18.8h, #2 |
-+ sqrshrn v18.8b, v20.8h, #2 |
-+ sqrshrn2 v18.16b, v22.8h, #2 |
-+ |
-+ /* vpop {v8.4h - d15.4h} */ /* restore NEON registers */ |
-+ ld1 {v8.4h - v11.4h}, [sp], 32 |
-+ ld1 {v12.4h - v15.4h}, [sp], 32 |
-+ ins v24.2d[1], v25.2d[0] |
-+ |
-+ sqrshrn v20.8b, v24.8h, #2 |
-+ /* Transpose the final 8-bit samples and do signed->unsigned conversion */ |
-+ /* trn1 v16.8h, v16.8h, v18.8h */ |
-+ transpose v16, v18, v3, .16b, .8h |
-+ ins v26.2d[1], v27.2d[0] |
-+ ins v28.2d[1], v29.2d[0] |
-+ ins v30.2d[1], v31.2d[0] |
-+ sqrshrn2 v20.16b, v26.8h, #2 |
-+ sqrshrn v22.8b, v28.8h, #2 |
-+ movi v0.16b, #(CENTERJSAMPLE) |
-+ sqrshrn2 v22.16b, v30.8h, #2 |
-+ transpose_single v16, v17, v3, .2d, .8b |
-+ transpose_single v18, v19, v3, .2d, .8b |
-+ add v16.8b, v16.8b, v0.8b |
-+ add v17.8b, v17.8b, v0.8b |
-+ add v18.8b, v18.8b, v0.8b |
-+ add v19.8b, v19.8b, v0.8b |
-+ transpose v20, v22, v3, .16b, .8h |
-+ /* Store results to the output buffer */ |
-+ ldp TMP1, TMP2, [OUTPUT_BUF], 16 |
-+ add TMP1, TMP1, OUTPUT_COL |
-+ add TMP2, TMP2, OUTPUT_COL |
-+ st1 {v16.8b}, [TMP1] |
-+ transpose_single v20, v21, v3, .2d, .8b |
-+ st1 {v17.8b}, [TMP2] |
-+ ldp TMP1, TMP2, [OUTPUT_BUF], 16 |
-+ add TMP1, TMP1, OUTPUT_COL |
-+ add TMP2, TMP2, OUTPUT_COL |
-+ st1 {v18.8b}, [TMP1] |
-+ add v20.8b, v20.8b, v0.8b |
-+ add v21.8b, v21.8b, v0.8b |
-+ st1 {v19.8b}, [TMP2] |
-+ ldp TMP1, TMP2, [OUTPUT_BUF], 16 |
-+ ldp TMP3, TMP4, [OUTPUT_BUF] |
-+ add TMP1, TMP1, OUTPUT_COL |
-+ add TMP2, TMP2, OUTPUT_COL |
-+ add TMP3, TMP3, OUTPUT_COL |
-+ add TMP4, TMP4, OUTPUT_COL |
-+ transpose_single v22, v23, v3, .2d, .8b |
-+ st1 {v20.8b}, [TMP1] |
-+ add v22.8b, v22.8b, v0.8b |
-+ add v23.8b, v23.8b, v0.8b |
-+ st1 {v21.8b}, [TMP2] |
-+ st1 {v22.8b}, [TMP3] |
-+ st1 {v23.8b}, [TMP4] |
-+ ldr x15, [sp], 16 |
-+ ld1 {v0.8b - v3.8b}, [sp], 32 |
-+ ld1 {v4.8b - v7.8b}, [sp], 32 |
-+ ld1 {v8.8b - v11.8b}, [sp], 32 |
-+ ld1 {v12.8b - v15.8b}, [sp], 32 |
-+ ld1 {v16.8b - v19.8b}, [sp], 32 |
-+ ld1 {v20.8b - v23.8b}, [sp], 32 |
-+ ld1 {v24.8b - v27.8b}, [sp], 32 |
-+ ld1 {v28.8b - v31.8b}, [sp], 32 |
-+ blr x30 |
-+ |
-+3: /* Left 4x8 half is done, right 4x8 half contains mostly zeros */ |
-+ |
-+ /* Transpose left 4x8 half */ |
-+ transpose ROW6L, ROW7L, v3, .16b, .4h |
-+ transpose ROW2L, ROW3L, v3, .16b, .4h |
-+ transpose ROW0L, ROW1L, v3, .16b, .4h |
-+ transpose ROW4L, ROW5L, v3, .16b, .4h |
-+ shl ROW0R.4h, ROW0R.4h, #2 /* PASS1_BITS */ |
-+ transpose ROW1L, ROW3L, v3, .16b, .2s |
-+ transpose ROW4L, ROW6L, v3, .16b, .2s |
-+ transpose ROW0L, ROW2L, v3, .16b, .2s |
-+ transpose ROW5L, ROW7L, v3, .16b, .2s |
-+ cmp x0, #0 |
-+ beq 4f /* Right 4x8 half has all zeros, go to 'sparse' second pass */ |
-+ |
-+ /* Only row 0 is non-zero for the right 4x8 half */ |
-+ dup ROW1R.4h, ROW0R.4h[1] |
-+ dup ROW2R.4h, ROW0R.4h[2] |
-+ dup ROW3R.4h, ROW0R.4h[3] |
-+ dup ROW4R.4h, ROW0R.4h[0] |
-+ dup ROW5R.4h, ROW0R.4h[1] |
-+ dup ROW6R.4h, ROW0R.4h[2] |
-+ dup ROW7R.4h, ROW0R.4h[3] |
-+ dup ROW0R.4h, ROW0R.4h[0] |
-+ b 1b /* Go to 'normal' second pass */ |
-+ |
-+4: /* 1-D IDCT, pass 2 (sparse variant with zero rows 4-7), left 4x8 half */ |
-+ ld1 {v2.4h}, [x15] /* reload constants */ |
-+ smull v12.4s, ROW1L.4h, XFIX_1_175875602 |
-+ smlal v12.4s, ROW3L.4h, XFIX_1_175875602_MINUS_1_961570560 |
-+ smull v14.4s, ROW3L.4h, XFIX_1_175875602 |
-+ smlal v14.4s, ROW1L.4h, XFIX_1_175875602_MINUS_0_390180644 |
-+ smull v4.4s, ROW2L.4h, XFIX_0_541196100 |
-+ sshll v6.4s, ROW0L.4h, #13 |
-+ mov v8.16b, v12.16b |
-+ smlal v12.4s, ROW3L.4h, XFIX_3_072711026_MINUS_2_562915447 |
-+ smlsl v8.4s, ROW1L.4h, XFIX_0_899976223 |
-+ add v2.4s, v6.4s, v4.4s |
-+ mov v10.16b, v14.16b |
-+ smlal v14.4s, ROW1L.4h, XFIX_1_501321110_MINUS_0_899976223 |
-+ add v2.4s, v2.4s, v12.4s |
-+ add v12.4s, v12.4s, v12.4s |
-+ smlsl v10.4s, ROW3L.4h, XFIX_2_562915447 |
-+ shrn ROW1L.4h, v2.4s, #16 |
-+ sub v2.4s, v2.4s, v12.4s |
-+ smull v12.4s, ROW2L.4h, XFIX_0_541196100_PLUS_0_765366865 |
-+ sub v6.4s, v6.4s, v4.4s |
-+ shrn ROW2R.4h, v2.4s, #16 /* ROW6L.4h <-> ROW2R.4h */ |
-+ add v2.4s, v6.4s, v10.4s |
-+ sub v6.4s, v6.4s, v10.4s |
-+ sshll v10.4s, ROW0L.4h, #13 |
-+ shrn ROW2L.4h, v2.4s, #16 |
-+ shrn ROW1R.4h, v6.4s, #16 /* ROW5L.4h <-> ROW1R.4h */ |
-+ add v4.4s, v10.4s, v12.4s |
-+ sub v2.4s, v10.4s, v12.4s |
-+ add v12.4s, v4.4s, v14.4s |
-+ sub v4.4s, v4.4s, v14.4s |
-+ add v10.4s, v2.4s, v8.4s |
-+ sub v6.4s, v2.4s, v8.4s |
-+ shrn ROW3R.4h, v4.4s, #16 /* ROW7L.4h <-> ROW3R.4h */ |
-+ shrn ROW3L.4h, v10.4s, #16 |
-+ shrn ROW0L.4h, v12.4s, #16 |
-+ shrn ROW0R.4h, v6.4s, #16 /* ROW4L.4h <-> ROW0R.4h */ |
-+ /* 1-D IDCT, pass 2 (sparse variant with zero rows 4-7), right 4x8 half */ |
-+ ld1 {v2.4h}, [x15] /* reload constants */ |
-+ smull v12.4s, ROW5L.4h, XFIX_1_175875602 |
-+ smlal v12.4s, ROW7L.4h, XFIX_1_175875602_MINUS_1_961570560 |
-+ smull v14.4s, ROW7L.4h, XFIX_1_175875602 |
-+ smlal v14.4s, ROW5L.4h, XFIX_1_175875602_MINUS_0_390180644 |
-+ smull v4.4s, ROW6L.4h, XFIX_0_541196100 |
-+ sshll v6.4s, ROW4L.4h, #13 |
-+ mov v8.16b, v12.16b |
-+ smlal v12.4s, ROW7L.4h, XFIX_3_072711026_MINUS_2_562915447 |
-+ smlsl v8.4s, ROW5L.4h, XFIX_0_899976223 |
-+ add v2.4s, v6.4s, v4.4s |
-+ mov v10.16b, v14.16b |
-+ smlal v14.4s, ROW5L.4h, XFIX_1_501321110_MINUS_0_899976223 |
-+ add v2.4s, v2.4s, v12.4s |
-+ add v12.4s, v12.4s, v12.4s |
-+ smlsl v10.4s, ROW7L.4h, XFIX_2_562915447 |
-+ shrn ROW5L.4h, v2.4s, #16 /* ROW5L.4h <-> ROW1R.4h */ |
-+ sub v2.4s, v2.4s, v12.4s |
-+ smull v12.4s, ROW6L.4h, XFIX_0_541196100_PLUS_0_765366865 |
-+ sub v6.4s, v6.4s, v4.4s |
-+ shrn ROW6R.4h, v2.4s, #16 |
-+ add v2.4s, v6.4s, v10.4s |
-+ sub v6.4s, v6.4s, v10.4s |
-+ sshll v10.4s, ROW4L.4h, #13 |
-+ shrn ROW6L.4h, v2.4s, #16 /* ROW6L.4h <-> ROW2R.4h */ |
-+ shrn ROW5R.4h, v6.4s, #16 |
-+ add v4.4s, v10.4s, v12.4s |
-+ sub v2.4s, v10.4s, v12.4s |
-+ add v12.4s, v4.4s, v14.4s |
-+ sub v4.4s, v4.4s, v14.4s |
-+ add v10.4s, v2.4s, v8.4s |
-+ sub v6.4s, v2.4s, v8.4s |
-+ shrn ROW7R.4h, v4.4s, #16 |
-+ shrn ROW7L.4h, v10.4s, #16 /* ROW7L.4h <-> ROW3R.4h */ |
-+ shrn ROW4L.4h, v12.4s, #16 /* ROW4L.4h <-> ROW0R.4h */ |
-+ shrn ROW4R.4h, v6.4s, #16 |
-+ b 2b /* Go to epilogue */ |
-+ |
-+ .unreq DCT_TABLE |
-+ .unreq COEF_BLOCK |
-+ .unreq OUTPUT_BUF |
-+ .unreq OUTPUT_COL |
-+ .unreq TMP1 |
-+ .unreq TMP2 |
-+ .unreq TMP3 |
-+ .unreq TMP4 |
-+ |
-+ .unreq ROW0L |
-+ .unreq ROW0R |
-+ .unreq ROW1L |
-+ .unreq ROW1R |
-+ .unreq ROW2L |
-+ .unreq ROW2R |
-+ .unreq ROW3L |
-+ .unreq ROW3R |
-+ .unreq ROW4L |
-+ .unreq ROW4R |
-+ .unreq ROW5L |
-+ .unreq ROW5R |
-+ .unreq ROW6L |
-+ .unreq ROW6R |
-+ .unreq ROW7L |
-+ .unreq ROW7R |
-+ |
-+ |
-+/*****************************************************************************/ |
-+ |
-+/* |
-+ * jsimd_idct_ifast_neon |
++DLLEXPORT int DLLCALL tjDecompress2(tjhandle handle, |
++ unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, |
++ int width, int pitch, int height, int pixelFormat, int flags); |
+ |
+- [INPUT] j = instance handle previously returned from a call to |
+- tjInitDecompress() |
+- [INPUT] srcbuf = pointer to a user-allocated buffer containing the JPEG image |
+- to decompress |
+- [INPUT] size = size of the JPEG image buffer (in bytes) |
+- [OUTPUT] width = width (in pixels) of the JPEG image |
+- [OUTPUT] height = height (in pixels) of the JPEG image |
+ |
+- RETURNS: 0 on success, -1 on error |
+-*/ |
+-DLLEXPORT int DLLCALL tjDecompressHeader(tjhandle j, |
+- unsigned char *srcbuf, unsigned long size, |
+- int *width, int *height); |
++/** |
++ * Decompress a JPEG image to a YUV planar image. This function performs JPEG |
++ * decompression but leaves out the color conversion step, so a planar YUV |
++ * image is generated instead of an RGB image. The padding of the planes in |
++ * this image is the same as in the images generated by #tjEncodeYUV2(). Note |
++ * that, if the width or height of the image is not an even multiple of the MCU |
++ * block size (see #tjMCUWidth and #tjMCUHeight), then an intermediate buffer |
++ * copy will be performed within TurboJPEG. |
++ * <p> |
++ * NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the |
++ * convention of the digital video community, the TurboJPEG API uses "YUV" to |
++ * refer to an image format consisting of Y, Cb, and Cr image planes. |
+ * |
-+ * This function contains a fast, not so accurate integer implementation of |
-+ * the inverse DCT (Discrete Cosine Transform). It uses the same calculations |
-+ * and produces exactly the same output as IJG's original 'jpeg_idct_ifast' |
-+ * function from jidctfst.c |
++ * @param handle a handle to a TurboJPEG decompressor or transformer instance |
++ * @param jpegBuf pointer to a buffer containing the JPEG image to decompress |
++ * @param jpegSize size of the JPEG image (in bytes) |
++ * @param dstBuf pointer to an image buffer that will receive the YUV image. |
++ * Use #tjBufSizeYUV() to determine the appropriate size for this buffer |
++ * based on the image width, height, and level of subsampling. |
++ * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP |
++ * "flags". |
+ * |
-+ * Normally 1-D AAN DCT needs 5 multiplications and 29 additions. |
-+ * But in ARM NEON case some extra additions are required because VQDMULH |
-+ * instruction can't handle the constants larger than 1. So the expressions |
-+ * like "x * 1.082392200" have to be converted to "x * 0.082392200 + x", |
-+ * which introduces an extra addition. Overall, there are 6 extra additions |
-+ * per 1-D IDCT pass, totalling to 5 VQDMULH and 35 VADD/VSUB instructions. |
++ * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) |
+ */ |
-+ |
-+#define XFIX_1_082392200 v0.4h[0] |
-+#define XFIX_1_414213562 v0.4h[1] |
-+#define XFIX_1_847759065 v0.4h[2] |
-+#define XFIX_2_613125930 v0.4h[3] |
-+ |
-+.balign 16 |
-+jsimd_idct_ifast_neon_consts: |
-+ .short (277 * 128 - 256 * 128) /* XFIX_1_082392200 */ |
-+ .short (362 * 128 - 256 * 128) /* XFIX_1_414213562 */ |
-+ .short (473 * 128 - 256 * 128) /* XFIX_1_847759065 */ |
-+ .short (669 * 128 - 512 * 128) /* XFIX_2_613125930 */ |
-+ |
-+asm_function jsimd_idct_ifast_neon |
-+ |
-+ DCT_TABLE .req x0 |
-+ COEF_BLOCK .req x1 |
-+ OUTPUT_BUF .req x2 |
-+ OUTPUT_COL .req x3 |
-+ TMP1 .req x0 |
-+ TMP2 .req x1 |
-+ TMP3 .req x2 |
-+ TMP4 .req x22 |
-+ TMP5 .req x23 |
-+ |
-+ /* Load and dequantize coefficients into NEON registers |
-+ * with the following allocation: |
-+ * 0 1 2 3 | 4 5 6 7 |
-+ * ---------+-------- |
-+ * 0 | d16 | d17 ( v8.8h ) |
-+ * 1 | d18 | d19 ( v9.8h ) |
-+ * 2 | d20 | d21 ( v10.8h ) |
-+ * 3 | d22 | d23 ( v11.8h ) |
-+ * 4 | d24 | d25 ( v12.8h ) |
-+ * 5 | d26 | d27 ( v13.8h ) |
-+ * 6 | d28 | d29 ( v14.8h ) |
-+ * 7 | d30 | d31 ( v15.8h ) |
-+ */ |
-+ /* Save NEON registers used in fast IDCT */ |
-+ sub sp, sp, #176 |
-+ stp x22, x23, [sp], 16 |
-+ adr x23, jsimd_idct_ifast_neon_consts |
-+ st1 {v0.8b - v3.8b}, [sp], 32 |
-+ st1 {v4.8b - v7.8b}, [sp], 32 |
-+ st1 {v8.8b - v11.8b}, [sp], 32 |
-+ st1 {v12.8b - v15.8b}, [sp], 32 |
-+ st1 {v16.8b - v19.8b}, [sp], 32 |
-+ ld1 {v8.8h, v9.8h}, [COEF_BLOCK], 32 |
-+ ld1 {v0.8h, v1.8h}, [DCT_TABLE], 32 |
-+ ld1 {v10.8h, v11.8h}, [COEF_BLOCK], 32 |
-+ mul v8.8h, v8.8h, v0.8h |
-+ ld1 {v2.8h, v3.8h}, [DCT_TABLE], 32 |
-+ mul v9.8h, v9.8h, v1.8h |
-+ ld1 {v12.8h, v13.8h}, [COEF_BLOCK], 32 |
-+ mul v10.8h, v10.8h, v2.8h |
-+ ld1 {v0.8h, v1.8h}, [DCT_TABLE], 32 |
-+ mul v11.8h, v11.8h, v3.8h |
-+ ld1 {v14.8h, v15.8h}, [COEF_BLOCK], 32 |
-+ mul v12.8h, v12.8h, v0.8h |
-+ ld1 {v2.8h, v3.8h}, [DCT_TABLE], 32 |
-+ mul v14.8h, v14.8h, v2.8h |
-+ mul v13.8h, v13.8h, v1.8h |
-+ ld1 {v0.4h}, [x23] /* load constants */ |
-+ mul v15.8h, v15.8h, v3.8h |
-+ |
-+ /* 1-D IDCT, pass 1 */ |
-+ sub v2.8h, v10.8h, v14.8h |
-+ add v14.8h, v10.8h, v14.8h |
-+ sub v1.8h, v11.8h, v13.8h |
-+ add v13.8h, v11.8h, v13.8h |
-+ sub v5.8h, v9.8h, v15.8h |
-+ add v15.8h, v9.8h, v15.8h |
-+ sqdmulh v4.8h, v2.8h, XFIX_1_414213562 |
-+ sqdmulh v6.8h, v1.8h, XFIX_2_613125930 |
-+ add v3.8h, v1.8h, v1.8h |
-+ sub v1.8h, v5.8h, v1.8h |
-+ add v10.8h, v2.8h, v4.8h |
-+ sqdmulh v4.8h, v1.8h, XFIX_1_847759065 |
-+ sub v2.8h, v15.8h, v13.8h |
-+ add v3.8h, v3.8h, v6.8h |
-+ sqdmulh v6.8h, v2.8h, XFIX_1_414213562 |
-+ add v1.8h, v1.8h, v4.8h |
-+ sqdmulh v4.8h, v5.8h, XFIX_1_082392200 |
-+ sub v10.8h, v10.8h, v14.8h |
-+ add v2.8h, v2.8h, v6.8h |
-+ sub v6.8h, v8.8h, v12.8h |
-+ add v12.8h, v8.8h, v12.8h |
-+ add v9.8h, v5.8h, v4.8h |
-+ add v5.8h, v6.8h, v10.8h |
-+ sub v10.8h, v6.8h, v10.8h |
-+ add v6.8h, v15.8h, v13.8h |
-+ add v8.8h, v12.8h, v14.8h |
-+ sub v3.8h, v6.8h, v3.8h |
-+ sub v12.8h, v12.8h, v14.8h |
-+ sub v3.8h, v3.8h, v1.8h |
-+ sub v1.8h, v9.8h, v1.8h |
-+ add v2.8h, v3.8h, v2.8h |
-+ sub v15.8h, v8.8h, v6.8h |
-+ add v1.8h, v1.8h, v2.8h |
-+ add v8.8h, v8.8h, v6.8h |
-+ add v14.8h, v5.8h, v3.8h |
-+ sub v9.8h, v5.8h, v3.8h |
-+ sub v13.8h, v10.8h, v2.8h |
-+ add v10.8h, v10.8h, v2.8h |
-+ /* Transpose q8-q9 */ |
-+ mov v18.16b, v8.16b |
-+ trn1 v8.8h, v8.8h, v9.8h |
-+ trn2 v9.8h, v18.8h, v9.8h |
-+ sub v11.8h, v12.8h, v1.8h |
-+ /* Transpose q14-q15 */ |
-+ mov v18.16b, v14.16b |
-+ trn1 v14.8h, v14.8h, v15.8h |
-+ trn2 v15.8h, v18.8h, v15.8h |
-+ add v12.8h, v12.8h, v1.8h |
-+ /* Transpose q10-q11 */ |
-+ mov v18.16b, v10.16b |
-+ trn1 v10.8h, v10.8h, v11.8h |
-+ trn2 v11.8h, v18.8h, v11.8h |
-+ /* Transpose q12-q13 */ |
-+ mov v18.16b, v12.16b |
-+ trn1 v12.8h, v12.8h, v13.8h |
-+ trn2 v13.8h, v18.8h, v13.8h |
-+ /* Transpose q9-q11 */ |
-+ mov v18.16b, v9.16b |
-+ trn1 v9.4s, v9.4s, v11.4s |
-+ trn2 v11.4s, v18.4s, v11.4s |
-+ /* Transpose q12-q14 */ |
-+ mov v18.16b, v12.16b |
-+ trn1 v12.4s, v12.4s, v14.4s |
-+ trn2 v14.4s, v18.4s, v14.4s |
-+ /* Transpose q8-q10 */ |
-+ mov v18.16b, v8.16b |
-+ trn1 v8.4s, v8.4s, v10.4s |
-+ trn2 v10.4s, v18.4s, v10.4s |
-+ /* Transpose q13-q15 */ |
-+ mov v18.16b, v13.16b |
-+ trn1 v13.4s, v13.4s, v15.4s |
-+ trn2 v15.4s, v18.4s, v15.4s |
-+ /* vswp v14.4h, v10-MSB.4h */ |
-+ umov x22, v14.d[0] |
-+ ins v14.2d[0], v10.2d[1] |
-+ ins v10.2d[1], x22 |
-+ /* vswp v13.4h, v9MSB.4h */ |
-+ |
-+ umov x22, v13.d[0] |
-+ ins v13.2d[0], v9.2d[1] |
-+ ins v9.2d[1], x22 |
-+ /* 1-D IDCT, pass 2 */ |
-+ sub v2.8h, v10.8h, v14.8h |
-+ /* vswp v15.4h, v11MSB.4h */ |
-+ umov x22, v15.d[0] |
-+ ins v15.2d[0], v11.2d[1] |
-+ ins v11.2d[1], x22 |
-+ add v14.8h, v10.8h, v14.8h |
-+ /* vswp v12.4h, v8-MSB.4h */ |
-+ umov x22, v12.d[0] |
-+ ins v12.2d[0], v8.2d[1] |
-+ ins v8.2d[1], x22 |
-+ sub v1.8h, v11.8h, v13.8h |
-+ add v13.8h, v11.8h, v13.8h |
-+ sub v5.8h, v9.8h, v15.8h |
-+ add v15.8h, v9.8h, v15.8h |
-+ sqdmulh v4.8h, v2.8h, XFIX_1_414213562 |
-+ sqdmulh v6.8h, v1.8h, XFIX_2_613125930 |
-+ add v3.8h, v1.8h, v1.8h |
-+ sub v1.8h, v5.8h, v1.8h |
-+ add v10.8h, v2.8h, v4.8h |
-+ sqdmulh v4.8h, v1.8h, XFIX_1_847759065 |
-+ sub v2.8h, v15.8h, v13.8h |
-+ add v3.8h, v3.8h, v6.8h |
-+ sqdmulh v6.8h, v2.8h, XFIX_1_414213562 |
-+ add v1.8h, v1.8h, v4.8h |
-+ sqdmulh v4.8h, v5.8h, XFIX_1_082392200 |
-+ sub v10.8h, v10.8h, v14.8h |
-+ add v2.8h, v2.8h, v6.8h |
-+ sub v6.8h, v8.8h, v12.8h |
-+ add v12.8h, v8.8h, v12.8h |
-+ add v9.8h, v5.8h, v4.8h |
-+ add v5.8h, v6.8h, v10.8h |
-+ sub v10.8h, v6.8h, v10.8h |
-+ add v6.8h, v15.8h, v13.8h |
-+ add v8.8h, v12.8h, v14.8h |
-+ sub v3.8h, v6.8h, v3.8h |
-+ sub v12.8h, v12.8h, v14.8h |
-+ sub v3.8h, v3.8h, v1.8h |
-+ sub v1.8h, v9.8h, v1.8h |
-+ add v2.8h, v3.8h, v2.8h |
-+ sub v15.8h, v8.8h, v6.8h |
-+ add v1.8h, v1.8h, v2.8h |
-+ add v8.8h, v8.8h, v6.8h |
-+ add v14.8h, v5.8h, v3.8h |
-+ sub v9.8h, v5.8h, v3.8h |
-+ sub v13.8h, v10.8h, v2.8h |
-+ add v10.8h, v10.8h, v2.8h |
-+ sub v11.8h, v12.8h, v1.8h |
-+ add v12.8h, v12.8h, v1.8h |
-+ /* Descale to 8-bit and range limit */ |
-+ movi v0.16b, #0x80 |
-+ sqshrn v8.8b, v8.8h, #5 |
-+ sqshrn2 v8.16b, v9.8h, #5 |
-+ sqshrn v9.8b, v10.8h, #5 |
-+ sqshrn2 v9.16b, v11.8h, #5 |
-+ sqshrn v10.8b, v12.8h, #5 |
-+ sqshrn2 v10.16b, v13.8h, #5 |
-+ sqshrn v11.8b, v14.8h, #5 |
-+ sqshrn2 v11.16b, v15.8h, #5 |
-+ add v8.16b, v8.16b, v0.16b |
-+ add v9.16b, v9.16b, v0.16b |
-+ add v10.16b, v10.16b, v0.16b |
-+ add v11.16b, v11.16b, v0.16b |
-+ /* Transpose the final 8-bit samples */ |
-+ /* Transpose q8-q9 */ |
-+ mov v18.16b, v8.16b |
-+ trn1 v8.8h, v8.8h, v9.8h |
-+ trn2 v9.8h, v18.8h, v9.8h |
-+ /* Transpose q10-q11 */ |
-+ mov v18.16b, v10.16b |
-+ trn1 v10.8h, v10.8h, v11.8h |
-+ trn2 v11.8h, v18.8h, v11.8h |
-+ /* Transpose q8-q10 */ |
-+ mov v18.16b, v8.16b |
-+ trn1 v8.4s, v8.4s, v10.4s |
-+ trn2 v10.4s, v18.4s, v10.4s |
-+ /* Transpose q9-q11 */ |
-+ mov v18.16b, v9.16b |
-+ trn1 v9.4s, v9.4s, v11.4s |
-+ trn2 v11.4s, v18.4s, v11.4s |
-+ /* make copy */ |
-+ ins v17.2d[0], v8.2d[1] |
-+ /* Transpose d16-d17-msb */ |
-+ mov v18.16b, v8.16b |
-+ trn1 v8.8b, v8.8b, v17.8b |
-+ trn2 v17.8b, v18.8b, v17.8b |
-+ /* make copy */ |
-+ ins v19.2d[0], v9.2d[1] |
-+ mov v18.16b, v9.16b |
-+ trn1 v9.8b, v9.8b, v19.8b |
-+ trn2 v19.8b, v18.8b, v19.8b |
-+ /* Store results to the output buffer */ |
-+ ldp TMP1, TMP2, [OUTPUT_BUF], 16 |
-+ add TMP1, TMP1, OUTPUT_COL |
-+ add TMP2, TMP2, OUTPUT_COL |
-+ st1 {v8.8b}, [TMP1] |
-+ st1 {v17.8b}, [TMP2] |
-+ ldp TMP1, TMP2, [OUTPUT_BUF], 16 |
-+ add TMP1, TMP1, OUTPUT_COL |
-+ add TMP2, TMP2, OUTPUT_COL |
-+ st1 {v9.8b}, [TMP1] |
-+ /* make copy */ |
-+ ins v7.2d[0], v10.2d[1] |
-+ mov v18.16b, v10.16b |
-+ trn1 v10.8b, v10.8b, v7.8b |
-+ trn2 v7.8b, v18.8b, v7.8b |
-+ st1 {v19.8b}, [TMP2] |
-+ ldp TMP1, TMP2, [OUTPUT_BUF], 16 |
-+ ldp TMP4, TMP5, [OUTPUT_BUF], 16 |
-+ add TMP1, TMP1, OUTPUT_COL |
-+ add TMP2, TMP2, OUTPUT_COL |
-+ add TMP4, TMP4, OUTPUT_COL |
-+ add TMP5, TMP5, OUTPUT_COL |
-+ st1 {v10.8b}, [TMP1] |
-+ /* make copy */ |
-+ ins v16.2d[0], v11.2d[1] |
-+ mov v18.16b, v11.16b |
-+ trn1 v11.8b, v11.8b, v16.8b |
-+ trn2 v16.8b, v18.8b, v16.8b |
-+ st1 {v7.8b}, [TMP2] |
-+ st1 {v11.8b}, [TMP4] |
-+ st1 {v16.8b}, [TMP5] |
-+ sub sp, sp, #176 |
-+ ldp x22, x23, [sp], 16 |
-+ ld1 {v0.8b - v3.8b}, [sp], 32 |
-+ ld1 {v4.8b - v7.8b}, [sp], 32 |
-+ ld1 {v8.8b - v11.8b}, [sp], 32 |
-+ ld1 {v12.8b - v15.8b}, [sp], 32 |
-+ ld1 {v16.8b - v19.8b}, [sp], 32 |
-+ blr x30 |
-+ |
-+ .unreq DCT_TABLE |
-+ .unreq COEF_BLOCK |
-+ .unreq OUTPUT_BUF |
-+ .unreq OUTPUT_COL |
-+ .unreq TMP1 |
-+ .unreq TMP2 |
-+ .unreq TMP3 |
-+ .unreq TMP4 |
-+ |
-+ |
-+/*****************************************************************************/ |
-+ |
-+/* |
-+ * jsimd_idct_4x4_neon |
++DLLEXPORT int DLLCALL tjDecompressToYUV(tjhandle handle, |
++ unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, |
++ int flags); |
+ |
+ |
+-/* |
+- int tjDecompress(tjhandle j, |
+- unsigned char *srcbuf, unsigned long size, |
+- unsigned char *dstbuf, int width, int pitch, int height, int pixelsize, |
+- int flags) |
++/** |
++ * Create a new TurboJPEG transformer instance. |
+ * |
-+ * This function contains inverse-DCT code for getting reduced-size |
-+ * 4x4 pixels output from an 8x8 DCT block. It uses the same calculations |
-+ * and produces exactly the same output as IJG's original 'jpeg_idct_4x4' |
-+ * function from jpeg-6b (jidctred.c). |
++ * @return a handle to the newly-created instance, or NULL if an error |
++ * occurred (see #tjGetErrorStr().) |
++ */ |
++DLLEXPORT tjhandle DLLCALL tjInitTransform(void); |
+ |
+- [INPUT] j = instance handle previously returned from a call to |
+- tjInitDecompress() |
+- [INPUT] srcbuf = pointer to a user-allocated buffer containing the JPEG image |
+- to decompress |
+- [INPUT] size = size of the JPEG image buffer (in bytes) |
+- [INPUT] dstbuf = pointer to user-allocated image buffer which will receive |
+- the bitmap image. This buffer should normally be pitch*height |
+- bytes in size, although this pointer may also be used to decompress into |
+- a specific region of a larger buffer. |
+- [INPUT] width = width (in pixels) of the destination image |
+- [INPUT] pitch = bytes per line of the destination image (width*pixelsize if the |
+- bitmap is unpadded, else TJPAD(width*pixelsize) if each line of the bitmap |
+- is padded to the nearest 32-bit boundary, such as is the case for Windows |
+- bitmaps. You can also be clever and use this parameter to skip lines, etc., |
+- as long as the pitch is greater than 0.) |
+- [INPUT] height = height (in pixels) of the destination image |
+- [INPUT] pixelsize = size (in bytes) of each pixel in the destination image |
+- RGBA/RGBx and BGRA/BGRx: 4, RGB and BGR: 3 |
+- [INPUT] flags = the bitwise OR of one or more of the following |
+ |
+- TJ_BGR: The components of each pixel in the destination image should be |
+- written in B,G,R order, not R,G,B |
+- TJ_BOTTOMUP: The destination image should be stored in bottom-up |
+- (Windows) order, not top-down |
+- TJ_FORCEMMX: Valid only for the Intel Performance Primitives implementation |
+- of this codec-- force IPP to use MMX code (bypass CPU auto-detection) |
+- TJ_FORCESSE: Valid only for the Intel Performance Primitives implementation |
+- of this codec-- force IPP to use SSE code (bypass CPU auto-detection) |
+- TJ_FORCESSE2: Valid only for the Intel Performance Primitives implementation |
+- of this codec-- force IPP to use SSE2 code (bypass CPU auto-detection) |
++/** |
++ * Losslessly transform a JPEG image into another JPEG image. Lossless |
++ * transforms work by moving the raw coefficients from one JPEG image structure |
++ * to another without altering the values of the coefficients. While this is |
++ * typically faster than decompressing the image, transforming it, and |
++ * re-compressing it, lossless transforms are not free. Each lossless |
++ * transform requires reading and performing Huffman decoding on all of the |
++ * coefficients in the source image, regardless of the size of the destination |
++ * image. Thus, this function provides a means of generating multiple |
++ * transformed images from the same source or applying multiple |
++ * transformations simultaneously, in order to eliminate the need to read the |
++ * source coefficients multiple times. |
+ * |
-+ * NOTE: jpeg-8 has an improved implementation of 4x4 inverse-DCT, which |
-+ * requires much less arithmetic operations and hence should be faster. |
-+ * The primary purpose of this particular NEON optimized function is |
-+ * bit exact compatibility with jpeg-6b. |
++ * @param handle a handle to a TurboJPEG transformer instance |
++ * @param jpegBuf pointer to a buffer containing the JPEG image to transform |
++ * @param jpegSize size of the JPEG image (in bytes) |
++ * @param n the number of transformed JPEG images to generate |
++ * @param dstBufs pointer to an array of n image buffers. <tt>dstBufs[i]</tt> |
++ * will receive a JPEG image that has been transformed using the |
++ * parameters in <tt>transforms[i]</tt>. TurboJPEG has the ability to |
++ * reallocate the JPEG buffer to accommodate the size of the JPEG image. |
++ * Thus, you can choose to: |
++ * -# pre-allocate the JPEG buffer with an arbitrary size using |
++ * #tjAlloc() and let TurboJPEG grow the buffer as needed, |
++ * -# set <tt>dstBufs[i]</tt> to NULL to tell TurboJPEG to allocate the |
++ * buffer for you, or |
++ * -# pre-allocate the buffer to a "worst case" size determined by |
++ * calling #tjBufSize() with the transformed or cropped width and |
++ * height. This should ensure that the buffer never has to be |
++ * re-allocated (setting #TJFLAG_NOREALLOC guarantees this.) |
++ * . |
++ * If you choose option 1, <tt>dstSizes[i]</tt> should be set to |
++ * the size of your pre-allocated buffer. In any case, unless you have |
++ * set #TJFLAG_NOREALLOC, you should always check <tt>dstBufs[i]</tt> |
++ * upon return from this function, as it may have changed. |
++ * @param dstSizes pointer to an array of n unsigned long variables that will |
++ * receive the actual sizes (in bytes) of each transformed JPEG image. |
++ * If <tt>dstBufs[i]</tt> points to a pre-allocated buffer, then |
++ * <tt>dstSizes[i]</tt> should be set to the size of the buffer. Upon |
++ * return, <tt>dstSizes[i]</tt> will contain the size of the JPEG image |
++ * (in bytes.) |
++ * @param transforms pointer to an array of n #tjtransform structures, each of |
++ * which specifies the transform parameters and/or cropping region for |
++ * the corresponding transformed output image. |
++ * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP |
++ * "flags". |
+ * |
-+ * TODO: a bit better instructions scheduling can be achieved by expanding |
-+ * idct_helper/transpose_4x4 macros and reordering instructions, |
-+ * but readability will suffer somewhat. |
++ * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) |
+ */ |
-+ |
-+#define CONST_BITS 13 |
-+ |
-+#define FIX_0_211164243 (1730) /* FIX(0.211164243) */ |
-+#define FIX_0_509795579 (4176) /* FIX(0.509795579) */ |
-+#define FIX_0_601344887 (4926) /* FIX(0.601344887) */ |
-+#define FIX_0_720959822 (5906) /* FIX(0.720959822) */ |
-+#define FIX_0_765366865 (6270) /* FIX(0.765366865) */ |
-+#define FIX_0_850430095 (6967) /* FIX(0.850430095) */ |
-+#define FIX_0_899976223 (7373) /* FIX(0.899976223) */ |
-+#define FIX_1_061594337 (8697) /* FIX(1.061594337) */ |
-+#define FIX_1_272758580 (10426) /* FIX(1.272758580) */ |
-+#define FIX_1_451774981 (11893) /* FIX(1.451774981) */ |
-+#define FIX_1_847759065 (15137) /* FIX(1.847759065) */ |
-+#define FIX_2_172734803 (17799) /* FIX(2.172734803) */ |
-+#define FIX_2_562915447 (20995) /* FIX(2.562915447) */ |
-+#define FIX_3_624509785 (29692) /* FIX(3.624509785) */ |
-+ |
-+.balign 16 |
-+jsimd_idct_4x4_neon_consts: |
-+ .short FIX_1_847759065 /* v0.4h[0] */ |
-+ .short -FIX_0_765366865 /* v0.4h[1] */ |
-+ .short -FIX_0_211164243 /* v0.4h[2] */ |
-+ .short FIX_1_451774981 /* v0.4h[3] */ |
-+ .short -FIX_2_172734803 /* d1[0] */ |
-+ .short FIX_1_061594337 /* d1[1] */ |
-+ .short -FIX_0_509795579 /* d1[2] */ |
-+ .short -FIX_0_601344887 /* d1[3] */ |
-+ .short FIX_0_899976223 /* v2.4h[0] */ |
-+ .short FIX_2_562915447 /* v2.4h[1] */ |
-+ .short 1 << (CONST_BITS+1) /* v2.4h[2] */ |
-+ .short 0 /* v2.4h[3] */ |
-+ |
-+.macro idct_helper x4, x6, x8, x10, x12, x14, x16, shift, y26, y27, y28, y29 |
-+ smull v28.4s, \x4, v2.4h[2] |
-+ smlal v28.4s, \x8, v0.4h[0] |
-+ smlal v28.4s, \x14, v0.4h[1] |
-+ |
-+ smull v26.4s, \x16, v1.4h[2] |
-+ smlal v26.4s, \x12, v1.4h[3] |
-+ smlal v26.4s, \x10, v2.4h[0] |
-+ smlal v26.4s, \x6, v2.4h[1] |
-+ |
-+ smull v30.4s, \x4, v2.4h[2] |
-+ smlsl v30.4s, \x8, v0.4h[0] |
-+ smlsl v30.4s, \x14, v0.4h[1] |
-+ |
-+ smull v24.4s, \x16, v0.4h[2] |
-+ smlal v24.4s, \x12, v0.4h[3] |
-+ smlal v24.4s, \x10, v1.4h[0] |
-+ smlal v24.4s, \x6, v1.4h[1] |
-+ |
-+ add v20.4s, v28.4s, v26.4s |
-+ sub v28.4s, v28.4s, v26.4s |
-+ |
-+.if \shift > 16 |
-+ srshr v20.4s, v20.4s, #\shift |
-+ srshr v28.4s, v28.4s, #\shift |
-+ xtn \y26, v20.4s |
-+ xtn \y29, v28.4s |
-+.else |
-+ rshrn \y26, v20.4s, #\shift |
-+ rshrn \y29, v28.4s, #\shift |
-+.endif |
-+ |
-+ add v20.4s, v30.4s, v24.4s |
-+ sub v30.4s, v30.4s, v24.4s |
-+ |
-+.if \shift > 16 |
-+ srshr v20.4s, v20.4s, #\shift |
-+ srshr v30.4s, v30.4s, #\shift |
-+ xtn \y27, v20.4s |
-+ xtn \y28, v30.4s |
-+.else |
-+ rshrn \y27, v20.4s, #\shift |
-+ rshrn \y28, v30.4s, #\shift |
-+.endif |
-+ |
-+.endm |
-+ |
-+asm_function jsimd_idct_4x4_neon |
-+ |
-+ DCT_TABLE .req x0 |
-+ COEF_BLOCK .req x1 |
-+ OUTPUT_BUF .req x2 |
-+ OUTPUT_COL .req x3 |
-+ TMP1 .req x0 |
-+ TMP2 .req x1 |
-+ TMP3 .req x2 |
-+ TMP4 .req x15 |
-+ |
-+ /* Save all used NEON registers */ |
-+ sub sp, sp, 272 |
-+ str x15, [sp], 16 |
-+ /* Load constants (v3.4h is just used for padding) */ |
-+ adr TMP4, jsimd_idct_4x4_neon_consts |
-+ st1 {v0.8b - v3.8b}, [sp], 32 |
-+ st1 {v4.8b - v7.8b}, [sp], 32 |
-+ st1 {v8.8b - v11.8b}, [sp], 32 |
-+ st1 {v12.8b - v15.8b}, [sp], 32 |
-+ st1 {v16.8b - v19.8b}, [sp], 32 |
-+ st1 {v20.8b - v23.8b}, [sp], 32 |
-+ st1 {v24.8b - v27.8b}, [sp], 32 |
-+ st1 {v28.8b - v31.8b}, [sp], 32 |
-+ ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [TMP4] |
-+ |
-+ /* Load all COEF_BLOCK into NEON registers with the following allocation: |
-+ * 0 1 2 3 | 4 5 6 7 |
-+ * ---------+-------- |
-+ * 0 | v4.4h | v5.4h |
-+ * 1 | v6.4h | v7.4h |
-+ * 2 | v8.4h | v9.4h |
-+ * 3 | v10.4h | v11.4h |
-+ * 4 | - | - |
-+ * 5 | v12.4h | v13.4h |
-+ * 6 | v14.4h | v15.4h |
-+ * 7 | v16.4h | v17.4h |
-+ */ |
-+ ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [COEF_BLOCK], 32 |
-+ ld1 {v8.4h, v9.4h, v10.4h, v11.4h}, [COEF_BLOCK], 32 |
-+ add COEF_BLOCK, COEF_BLOCK, #16 |
-+ ld1 {v12.4h, v13.4h, v14.4h, v15.4h}, [COEF_BLOCK], 32 |
-+ ld1 {v16.4h, v17.4h}, [COEF_BLOCK], 16 |
-+ /* dequantize */ |
-+ ld1 {v18.4h, v19.4h, v20.4h, v21.4h}, [DCT_TABLE], 32 |
-+ mul v4.4h, v4.4h, v18.4h |
-+ mul v5.4h, v5.4h, v19.4h |
-+ ins v4.2d[1], v5.2d[0] /* 128 bit q4 */ |
-+ ld1 {v22.4h, v23.4h, v24.4h, v25.4h}, [DCT_TABLE], 32 |
-+ mul v6.4h, v6.4h, v20.4h |
-+ mul v7.4h, v7.4h, v21.4h |
-+ ins v6.2d[1], v7.2d[0] /* 128 bit q6 */ |
-+ mul v8.4h, v8.4h, v22.4h |
-+ mul v9.4h, v9.4h, v23.4h |
-+ ins v8.2d[1], v9.2d[0] /* 128 bit q8 */ |
-+ add DCT_TABLE, DCT_TABLE, #16 |
-+ ld1 {v26.4h, v27.4h, v28.4h, v29.4h}, [DCT_TABLE], 32 |
-+ mul v10.4h, v10.4h, v24.4h |
-+ mul v11.4h, v11.4h, v25.4h |
-+ ins v10.2d[1], v11.2d[0] /* 128 bit q10 */ |
-+ mul v12.4h, v12.4h, v26.4h |
-+ mul v13.4h, v13.4h, v27.4h |
-+ ins v12.2d[1], v13.2d[0] /* 128 bit q12 */ |
-+ ld1 {v30.4h, v31.4h}, [DCT_TABLE], 16 |
-+ mul v14.4h, v14.4h, v28.4h |
-+ mul v15.4h, v15.4h, v29.4h |
-+ ins v14.2d[1], v15.2d[0] /* 128 bit q14 */ |
-+ mul v16.4h, v16.4h, v30.4h |
-+ mul v17.4h, v17.4h, v31.4h |
-+ ins v16.2d[1], v17.2d[0] /* 128 bit q16 */ |
-+ |
-+ /* Pass 1 */ |
-+ idct_helper v4.4h, v6.4h, v8.4h, v10.4h, v12.4h, v14.4h, v16.4h, 12, v4.4h, v6.4h, v8.4h, v10.4h |
-+ transpose_4x4 v4, v6, v8, v10, v3 |
-+ ins v10.2d[1], v11.2d[0] |
-+ idct_helper v5.4h, v7.4h, v9.4h, v11.4h, v13.4h, v15.4h, v17.4h, 12, v5.4h, v7.4h, v9.4h, v11.4h |
-+ transpose_4x4 v5, v7, v9, v11, v3 |
-+ ins v10.2d[1], v11.2d[0] |
-+ /* Pass 2 */ |
-+ idct_helper v4.4h, v6.4h, v8.4h, v10.4h, v7.4h, v9.4h, v11.4h, 19, v26.4h, v27.4h, v28.4h, v29.4h |
-+ transpose_4x4 v26, v27, v28, v29, v3 |
-+ |
-+ /* Range limit */ |
-+ movi v30.8h, #0x80 |
-+ ins v26.2d[1], v27.2d[0] |
-+ ins v28.2d[1], v29.2d[0] |
-+ add v26.8h, v26.8h, v30.8h |
-+ add v28.8h, v28.8h, v30.8h |
-+ sqxtun v26.8b, v26.8h |
-+ sqxtun v27.8b, v28.8h |
-+ |
-+ /* Store results to the output buffer */ |
-+ ldp TMP1, TMP2, [OUTPUT_BUF], 16 |
-+ ldp TMP3, TMP4, [OUTPUT_BUF] |
-+ add TMP1, TMP1, OUTPUT_COL |
-+ add TMP2, TMP2, OUTPUT_COL |
-+ add TMP3, TMP3, OUTPUT_COL |
-+ add TMP4, TMP4, OUTPUT_COL |
-+ |
-+#if defined(__ARMEL__) && !RESPECT_STRICT_ALIGNMENT |
-+ /* We can use much less instructions on little endian systems if the |
-+ * OS kernel is not configured to trap unaligned memory accesses |
-+ */ |
-+ st1 {v26.s}[0], [TMP1], 4 |
-+ st1 {v27.s}[0], [TMP3], 4 |
-+ st1 {v26.s}[1], [TMP2], 4 |
-+ st1 {v27.s}[1], [TMP4], 4 |
-+#else |
-+ st1 {v26.b}[0], [TMP1], 1 |
-+ st1 {v27.b}[0], [TMP3], 1 |
-+ st1 {v26.b}[1], [TMP1], 1 |
-+ st1 {v27.b}[1], [TMP3], 1 |
-+ st1 {v26.b}[2], [TMP1], 1 |
-+ st1 {v27.b}[2], [TMP3], 1 |
-+ st1 {v26.b}[3], [TMP1], 1 |
-+ st1 {v27.b}[3], [TMP3], 1 |
-+ |
-+ st1 {v26.b}[4], [TMP2], 1 |
-+ st1 {v27.b}[4], [TMP4], 1 |
-+ st1 {v26.b}[5], [TMP2], 1 |
-+ st1 {v27.b}[5], [TMP4], 1 |
-+ st1 {v26.b}[6], [TMP2], 1 |
-+ st1 {v27.b}[6], [TMP4], 1 |
-+ st1 {v26.b}[7], [TMP2], 1 |
-+ st1 {v27.b}[7], [TMP4], 1 |
-+#endif |
-+ |
-+ /* vpop {v8.4h - v15.4h} ;not available */ |
-+ sub sp, sp, #272 |
-+ ldr x15, [sp], 16 |
-+ ld1 {v0.8b - v3.8b}, [sp], 32 |
-+ ld1 {v4.8b - v7.8b}, [sp], 32 |
-+ ld1 {v8.8b - v11.8b}, [sp], 32 |
-+ ld1 {v12.8b - v15.8b}, [sp], 32 |
-+ ld1 {v16.8b - v19.8b}, [sp], 32 |
-+ ld1 {v20.8b - v23.8b}, [sp], 32 |
-+ ld1 {v24.8b - v27.8b}, [sp], 32 |
-+ ld1 {v28.8b - v31.8b}, [sp], 32 |
-+ blr x30 |
-+ |
-+ .unreq DCT_TABLE |
-+ .unreq COEF_BLOCK |
-+ .unreq OUTPUT_BUF |
-+ .unreq OUTPUT_COL |
-+ .unreq TMP1 |
-+ .unreq TMP2 |
-+ .unreq TMP3 |
-+ .unreq TMP4 |
-+ |
-+.purgem idct_helper |
-+ |
-+ |
-+/*****************************************************************************/ |
-+ |
-+/* |
-+ * jsimd_idct_2x2_neon |
++DLLEXPORT int DLLCALL tjTransform(tjhandle handle, unsigned char *jpegBuf, |
++ unsigned long jpegSize, int n, unsigned char **dstBufs, |
++ unsigned long *dstSizes, tjtransform *transforms, int flags); |
+ |
+- RETURNS: 0 on success, -1 on error |
+-*/ |
+-DLLEXPORT int DLLCALL tjDecompress(tjhandle j, |
+- unsigned char *srcbuf, unsigned long size, |
+- unsigned char *dstbuf, int width, int pitch, int height, int pixelsize, |
+- int flags); |
+ |
++/** |
++ * Destroy a TurboJPEG compressor, decompressor, or transformer instance. |
+ * |
-+ * This function contains inverse-DCT code for getting reduced-size |
-+ * 2x2 pixels output from an 8x8 DCT block. It uses the same calculations |
-+ * and produces exactly the same output as IJG's original 'jpeg_idct_2x2' |
-+ * function from jpeg-6b (jidctred.c). |
++ * @param handle a handle to a TurboJPEG compressor, decompressor or |
++ * transformer instance |
+ * |
-+ * NOTE: jpeg-8 has an improved implementation of 2x2 inverse-DCT, which |
-+ * requires much less arithmetic operations and hence should be faster. |
-+ * The primary purpose of this particular NEON optimized function is |
-+ * bit exact compatibility with jpeg-6b. |
++ * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().) |
+ */ |
++DLLEXPORT int DLLCALL tjDestroy(tjhandle handle); |
+ |
+-/* |
+- int tjDestroy(tjhandle h) |
+ |
+- Frees structures associated with a compression or decompression instance |
+- |
+- [INPUT] h = instance handle (returned from a previous call to |
+- tjInitCompress() or tjInitDecompress() |
++/** |
++ * Allocate an image buffer for use with TurboJPEG. You should always use |
++ * this function to allocate the JPEG destination buffer(s) for #tjCompress2() |
++ * and #tjTransform() unless you are disabling automatic buffer |
++ * (re)allocation (by setting #TJFLAG_NOREALLOC.) |
++ * |
++ * @param bytes the number of bytes to allocate |
++ * |
++ * @return a pointer to a newly-allocated buffer with the specified number of |
++ * bytes |
++ * |
++ * @sa tjFree() |
++ */ |
++DLLEXPORT unsigned char* DLLCALL tjAlloc(int bytes); |
+ |
+- RETURNS: 0 on success, -1 on error |
+-*/ |
+-DLLEXPORT int DLLCALL tjDestroy(tjhandle h); |
+ |
++/** |
++ * Free an image buffer previously allocated by TurboJPEG. You should always |
++ * use this function to free JPEG destination buffer(s) that were automatically |
++ * (re)allocated by #tjCompress2() or #tjTransform() or that were manually |
++ * allocated using #tjAlloc(). |
++ * |
++ * @param buffer address of the buffer to free |
++ * |
++ * @sa tjAlloc() |
++ */ |
++DLLEXPORT void DLLCALL tjFree(unsigned char *buffer); |
+ |
+-/* |
+- char *tjGetErrorStr(void) |
+- |
+- Returns a descriptive error message explaining why the last command failed |
+-*/ |
+ |
-+.balign 8 |
-+jsimd_idct_2x2_neon_consts: |
-+ .short -FIX_0_720959822 /* v14[0] */ |
-+ .short FIX_0_850430095 /* v14[1] */ |
-+ .short -FIX_1_272758580 /* v14[2] */ |
-+ .short FIX_3_624509785 /* v14[3] */ |
-+ |
-+.macro idct_helper x4, x6, x10, x12, x16, shift, y26, y27 |
-+ sshll v15.4s, \x4, #15 |
-+ smull v26.4s, \x6, v14.4h[3] |
-+ smlal v26.4s, \x10, v14.4h[2] |
-+ smlal v26.4s, \x12, v14.4h[1] |
-+ smlal v26.4s, \x16, v14.4h[0] |
-+ |
-+ add v20.4s, v15.4s, v26.4s |
-+ sub v15.4s, v15.4s, v26.4s |
-+ |
-+.if \shift > 16 |
-+ srshr v20.4s, v20.4s, #\shift |
-+ srshr v15.4s, v15.4s, #\shift |
-+ xtn \y26, v20.4s |
-+ xtn \y27, v15.4s |
-+.else |
-+ rshrn \y26, v20.4s, #\shift |
-+ rshrn \y27, v15.4s, #\shift |
-+.endif |
-+ |
-+.endm |
-+ |
-+asm_function jsimd_idct_2x2_neon |
-+ |
-+ DCT_TABLE .req x0 |
-+ COEF_BLOCK .req x1 |
-+ OUTPUT_BUF .req x2 |
-+ OUTPUT_COL .req x3 |
-+ TMP1 .req x0 |
-+ TMP2 .req x15 |
-+ |
-+ /* vpush {v8.4h - v15.4h} ; not available */ |
-+ sub sp, sp, 208 |
-+ str x15, [sp], 16 |
-+ |
-+ /* Load constants */ |
-+ adr TMP2, jsimd_idct_2x2_neon_consts |
-+ st1 {v4.8b - v7.8b}, [sp], 32 |
-+ st1 {v8.8b - v11.8b}, [sp], 32 |
-+ st1 {v12.8b - v15.8b}, [sp], 32 |
-+ st1 {v16.8b - v19.8b}, [sp], 32 |
-+ st1 {v21.8b - v22.8b}, [sp], 16 |
-+ st1 {v24.8b - v27.8b}, [sp], 32 |
-+ st1 {v30.8b - v31.8b}, [sp], 16 |
-+ ld1 {v14.4h}, [TMP2] |
-+ |
-+ /* Load all COEF_BLOCK into NEON registers with the following allocation: |
-+ * 0 1 2 3 | 4 5 6 7 |
-+ * ---------+-------- |
-+ * 0 | v4.4h | v5.4h |
-+ * 1 | v6.4h | v7.4h |
-+ * 2 | - | - |
-+ * 3 | v10.4h | v11.4h |
-+ * 4 | - | - |
-+ * 5 | v12.4h | v13.4h |
-+ * 6 | - | - |
-+ * 7 | v16.4h | v17.4h |
-+ */ |
-+ ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [COEF_BLOCK], 32 |
-+ add COEF_BLOCK, COEF_BLOCK, #16 |
-+ ld1 {v10.4h, v11.4h}, [COEF_BLOCK], 16 |
-+ add COEF_BLOCK, COEF_BLOCK, #16 |
-+ ld1 {v12.4h, v13.4h}, [COEF_BLOCK], 16 |
-+ add COEF_BLOCK, COEF_BLOCK, #16 |
-+ ld1 {v16.4h, v17.4h}, [COEF_BLOCK], 16 |
-+ /* Dequantize */ |
-+ ld1 {v18.4h, v19.4h, v20.4h, v21.4h}, [DCT_TABLE], 32 |
-+ mul v4.4h, v4.4h, v18.4h |
-+ mul v5.4h, v5.4h, v19.4h |
-+ ins v4.2d[1], v5.2d[0] |
-+ mul v6.4h, v6.4h, v20.4h |
-+ mul v7.4h, v7.4h, v21.4h |
-+ ins v6.2d[1], v7.2d[0] |
-+ add DCT_TABLE, DCT_TABLE, #16 |
-+ ld1 {v24.4h, v25.4h}, [DCT_TABLE], 16 |
-+ mul v10.4h, v10.4h, v24.4h |
-+ mul v11.4h, v11.4h, v25.4h |
-+ ins v10.2d[1], v11.2d[0] |
-+ add DCT_TABLE, DCT_TABLE, #16 |
-+ ld1 {v26.4h, v27.4h}, [DCT_TABLE], 16 |
-+ mul v12.4h, v12.4h, v26.4h |
-+ mul v13.4h, v13.4h, v27.4h |
-+ ins v12.2d[1], v13.2d[0] |
-+ add DCT_TABLE, DCT_TABLE, #16 |
-+ ld1 {v30.4h, v31.4h}, [DCT_TABLE], 16 |
-+ mul v16.4h, v16.4h, v30.4h |
-+ mul v17.4h, v17.4h, v31.4h |
-+ ins v16.2d[1], v17.2d[0] |
-+ |
-+ /* Pass 1 */ |
-+#if 0 |
-+ idct_helper v4.4h, v6.4h, v10.4h, v12.4h, v16.4h, 13, v4.4h, v6.4h |
-+ transpose_4x4 v4.4h, v6.4h, v8.4h, v10.4h |
-+ idct_helper v5.4h, v7.4h, v11.4h, v13.4h, v17.4h, 13, v5.4h, v7.4h |
-+ transpose_4x4 v5.4h, v7.4h, v9.4h, v11.4h |
-+#else |
-+ smull v26.4s, v6.4h, v14.4h[3] |
-+ smlal v26.4s, v10.4h, v14.4h[2] |
-+ smlal v26.4s, v12.4h, v14.4h[1] |
-+ smlal v26.4s, v16.4h, v14.4h[0] |
-+ smull v24.4s, v7.4h, v14.4h[3] |
-+ smlal v24.4s, v11.4h, v14.4h[2] |
-+ smlal v24.4s, v13.4h, v14.4h[1] |
-+ smlal v24.4s, v17.4h, v14.4h[0] |
-+ sshll v15.4s, v4.4h, #15 |
-+ sshll v30.4s, v5.4h, #15 |
-+ add v20.4s, v15.4s, v26.4s |
-+ sub v15.4s, v15.4s, v26.4s |
-+ rshrn v4.4h, v20.4s, #13 |
-+ rshrn v6.4h, v15.4s, #13 |
-+ add v20.4s, v30.4s, v24.4s |
-+ sub v15.4s, v30.4s, v24.4s |
-+ rshrn v5.4h, v20.4s, #13 |
-+ rshrn v7.4h, v15.4s, #13 |
-+ ins v4.2d[1], v5.2d[0] |
-+ ins v6.2d[1], v7.2d[0] |
-+ transpose v4, v6, v3, .16b, .8h |
-+ transpose v6, v10, v3, .16b, .4s |
-+ ins v11.2d[0], v10.2d[1] |
-+ ins v7.2d[0], v6.2d[1] |
-+#endif |
-+ |
-+ /* Pass 2 */ |
-+ idct_helper v4.4h, v6.4h, v10.4h, v7.4h, v11.4h, 20, v26.4h, v27.4h |
-+ |
-+ /* Range limit */ |
-+ movi v30.8h, #0x80 |
-+ ins v26.2d[1], v27.2d[0] |
-+ add v26.8h, v26.8h, v30.8h |
-+ sqxtun v30.8b, v26.8h |
-+ ins v26.2d[0], v30.2d[0] |
-+ sqxtun v27.8b, v26.8h |
-+ |
-+ /* Store results to the output buffer */ |
-+ ldp TMP1, TMP2, [OUTPUT_BUF] |
-+ add TMP1, TMP1, OUTPUT_COL |
-+ add TMP2, TMP2, OUTPUT_COL |
-+ |
-+ st1 {v26.b}[0], [TMP1], 1 |
-+ st1 {v27.b}[4], [TMP1], 1 |
-+ st1 {v26.b}[1], [TMP2], 1 |
-+ st1 {v27.b}[5], [TMP2], 1 |
-+ |
-+ sub sp, sp, #208 |
-+ ldr x15, [sp], 16 |
-+ ld1 {v4.8b - v7.8b}, [sp], 32 |
-+ ld1 {v8.8b - v11.8b}, [sp], 32 |
-+ ld1 {v12.8b - v15.8b}, [sp], 32 |
-+ ld1 {v16.8b - v19.8b}, [sp], 32 |
-+ ld1 {v21.8b - v22.8b}, [sp], 16 |
-+ ld1 {v24.8b - v27.8b}, [sp], 32 |
-+ ld1 {v30.8b - v31.8b}, [sp], 16 |
-+ blr x30 |
-+ |
-+ .unreq DCT_TABLE |
-+ .unreq COEF_BLOCK |
-+ .unreq OUTPUT_BUF |
-+ .unreq OUTPUT_COL |
-+ .unreq TMP1 |
-+ .unreq TMP2 |
-+ |
-+.purgem idct_helper |
-+ |
-+ |
-+/*****************************************************************************/ |
-+ |
-+/* |
-+ * jsimd_ycc_extrgb_convert_neon |
-+ * jsimd_ycc_extbgr_convert_neon |
-+ * jsimd_ycc_extrgbx_convert_neon |
-+ * jsimd_ycc_extbgrx_convert_neon |
-+ * jsimd_ycc_extxbgr_convert_neon |
-+ * jsimd_ycc_extxrgb_convert_neon |
++/** |
++ * Returns a descriptive error message explaining why the last command failed. |
+ * |
-+ * Colorspace conversion YCbCr -> RGB |
++ * @return a descriptive error message explaining why the last command failed. |
+ */ |
+ DLLEXPORT char* DLLCALL tjGetErrorStr(void); |
+ |
+ |
++/* Backward compatibility functions and macros (nothing to see here) */ |
++#define NUMSUBOPT TJ_NUMSAMP |
++#define TJ_444 TJSAMP_444 |
++#define TJ_422 TJSAMP_422 |
++#define TJ_420 TJSAMP_420 |
++#define TJ_411 TJSAMP_420 |
++#define TJ_GRAYSCALE TJSAMP_GRAY |
+ |
-+.macro do_load size |
-+ .if \size == 8 |
-+ ld1 {v4.8b}, [U], 8 |
-+ ld1 {v5.8b}, [V], 8 |
-+ ld1 {v0.8b}, [Y], 8 |
-+ prfm PLDL1KEEP, [U, #64] |
-+ prfm PLDL1KEEP, [V, #64] |
-+ prfm PLDL1KEEP, [Y, #64] |
-+ .elseif \size == 4 |
-+ ld1 {v4.b}[0], [U], 1 |
-+ ld1 {v4.b}[1], [U], 1 |
-+ ld1 {v4.b}[2], [U], 1 |
-+ ld1 {v4.b}[3], [U], 1 |
-+ ld1 {v5.b}[0], [V], 1 |
-+ ld1 {v5.b}[1], [V], 1 |
-+ ld1 {v5.b}[2], [V], 1 |
-+ ld1 {v5.b}[3], [V], 1 |
-+ ld1 {v0.b}[0], [Y], 1 |
-+ ld1 {v0.b}[1], [Y], 1 |
-+ ld1 {v0.b}[2], [Y], 1 |
-+ ld1 {v0.b}[3], [Y], 1 |
-+ .elseif \size == 2 |
-+ ld1 {v4.b}[4], [U], 1 |
-+ ld1 {v4.b}[5], [U], 1 |
-+ ld1 {v5.b}[4], [V], 1 |
-+ ld1 {v5.b}[5], [V], 1 |
-+ ld1 {v0.b}[4], [Y], 1 |
-+ ld1 {v0.b}[5], [Y], 1 |
-+ .elseif \size == 1 |
-+ ld1 {v4.b}[6], [U], 1 |
-+ ld1 {v5.b}[6], [V], 1 |
-+ ld1 {v0.b}[6], [Y], 1 |
-+ .else |
-+ .error unsupported macroblock size |
-+ .endif |
-+.endm |
-+ |
-+.macro do_store bpp, size |
-+ .if \bpp == 24 |
-+ .if \size == 8 |
-+ st3 {v10.8b, v11.8b, v12.8b}, [RGB], 24 |
-+ .elseif \size == 4 |
-+ st3 {v10.b, v11.b, v12.b}[0], [RGB], 3 |
-+ st3 {v10.b, v11.b, v12.b}[1], [RGB], 3 |
-+ st3 {v10.b, v11.b, v12.b}[2], [RGB], 3 |
-+ st3 {v10.b, v11.b, v12.b}[3], [RGB], 3 |
-+ .elseif \size == 2 |
-+ st3 {v10.b, v11.b, v12.b}[4], [RGB], 3 |
-+ st3 {v10.b, v11.b, v12.b}[5], [RGB], 3 |
-+ .elseif \size == 1 |
-+ st3 {v10.b, v11.b, v12.b}[6], [RGB], 3 |
-+ .else |
-+ .error unsupported macroblock size |
-+ .endif |
-+ .elseif \bpp == 32 |
-+ .if \size == 8 |
-+ st4 {v10.8b, v11.8b, v12.8b, v13.8b}, [RGB], 32 |
-+ .elseif \size == 4 |
-+ st4 {v10.b, v11.b, v12.b, v13.b}[0], [RGB], 4 |
-+ st4 {v10.b, v11.b, v12.b, v13.b}[1], [RGB], 4 |
-+ st4 {v10.b, v11.b, v12.b, v13.b}[2], [RGB], 4 |
-+ st4 {v10.b, v11.b, v12.b, v13.b}[3], [RGB], 4 |
-+ .elseif \size == 2 |
-+ st4 {v10.b, v11.b, v12.b, v13.b}[4], [RGB], 4 |
-+ st4 {v10.b, v11.b, v12.b, v13.b}[5], [RGB], 4 |
-+ .elseif \size == 1 |
-+ st4 {v10.b, v11.b, v12.b, v13.b}[6], [RGB], 4 |
-+ .else |
-+ .error unsupported macroblock size |
-+ .endif |
-+ .elseif \bpp==16 |
-+ .if \size == 8 |
-+ st1 {v25.8h}, [RGB],16 |
-+ .elseif \size == 4 |
-+ st1 {v25.4h}, [RGB],8 |
-+ .elseif \size == 2 |
-+ st1 {v25.h}[4], [RGB],2 |
-+ st1 {v25.h}[5], [RGB],2 |
-+ .elseif \size == 1 |
-+ st1 {v25.h}[6], [RGB],2 |
-+ .else |
-+ .error unsupported macroblock size |
-+ .endif |
-+ .else |
-+ .error unsupported bpp |
-+ .endif |
-+.endm |
-+ |
-+.macro generate_jsimd_ycc_rgb_convert_neon colorid, bpp, r_offs, rsize, g_offs, gsize, b_offs, bsize, defsize |
++#define TJ_BGR 1 |
++#define TJ_BOTTOMUP TJFLAG_BOTTOMUP |
++#define TJ_FORCEMMX TJFLAG_FORCEMMX |
++#define TJ_FORCESSE TJFLAG_FORCESSE |
++#define TJ_FORCESSE2 TJFLAG_FORCESSE2 |
++#define TJ_ALPHAFIRST 64 |
++#define TJ_FORCESSE3 TJFLAG_FORCESSE3 |
++#define TJ_FASTUPSAMPLE TJFLAG_FASTUPSAMPLE |
++#define TJ_YUV 512 |
+ |
-+/* |
-+ * 2-stage pipelined YCbCr->RGB conversion |
-+ */ |
++DLLEXPORT unsigned long DLLCALL TJBUFSIZE(int width, int height); |
++ |
++DLLEXPORT unsigned long DLLCALL TJBUFSIZEYUV(int width, int height, |
++ int jpegSubsamp); |
+ |
-+.macro do_yuv_to_rgb_stage1 |
-+ uaddw v6.8h, v2.8h, v4.8b /* q3 = u - 128 */ |
-+ uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */ |
-+ smull v20.4s, v6.4h, v1.4h[1] /* multiply by -11277 */ |
-+ smlal v20.4s, v8.4h, v1.4h[2] /* multiply by -23401 */ |
-+ smull2 v22.4s, v6.8h, v1.4h[1] /* multiply by -11277 */ |
-+ smlal2 v22.4s, v8.8h, v1.4h[2] /* multiply by -23401 */ |
-+ smull v24.4s, v8.4h, v1.4h[0] /* multiply by 22971 */ |
-+ smull2 v26.4s, v8.8h, v1.4h[0] /* multiply by 22971 */ |
-+ smull v28.4s, v6.4h, v1.4h[3] /* multiply by 29033 */ |
-+ smull2 v30.4s, v6.8h, v1.4h[3] /* multiply by 29033 */ |
-+.endm |
-+ |
-+.macro do_yuv_to_rgb_stage2 |
-+ rshrn v20.4h, v20.4s, #15 |
-+ rshrn2 v20.8h, v22.4s, #15 |
-+ rshrn v24.4h, v24.4s, #14 |
-+ rshrn2 v24.8h, v26.4s, #14 |
-+ rshrn v28.4h, v28.4s, #14 |
-+ rshrn2 v28.8h, v30.4s, #14 |
-+ uaddw v20.8h, v20.8h, v0.8b |
-+ uaddw v24.8h, v24.8h, v0.8b |
-+ uaddw v28.8h, v28.8h, v0.8b |
-+.if \bpp != 16 |
-+ sqxtun v1\g_offs\defsize, v20.8h |
-+ sqxtun v1\r_offs\defsize, v24.8h |
-+ sqxtun v1\b_offs\defsize, v28.8h |
-+.else |
-+ sqshlu v21.8h, v20.8h, #8 |
-+ sqshlu v25.8h, v24.8h, #8 |
-+ sqshlu v29.8h, v28.8h, #8 |
-+ sri v25.8h, v21.8h, #5 |
-+ sri v25.8h, v29.8h, #11 |
-+.endif |
-+ |
-+.endm |
-+ |
-+.macro do_yuv_to_rgb_stage2_store_load_stage1 |
-+ rshrn v20.4h, v20.4s, #15 |
-+ rshrn v24.4h, v24.4s, #14 |
-+ rshrn v28.4h, v28.4s, #14 |
-+ ld1 {v4.8b}, [U], 8 |
-+ rshrn2 v20.8h, v22.4s, #15 |
-+ rshrn2 v24.8h, v26.4s, #14 |
-+ rshrn2 v28.8h, v30.4s, #14 |
-+ ld1 {v5.8b}, [V], 8 |
-+ uaddw v20.8h, v20.8h, v0.8b |
-+ uaddw v24.8h, v24.8h, v0.8b |
-+ uaddw v28.8h, v28.8h, v0.8b |
-+.if \bpp != 16 /**************** rgb24/rgb32 *********************************/ |
-+ sqxtun v1\g_offs\defsize, v20.8h |
-+ ld1 {v0.8b}, [Y], 8 |
-+ sqxtun v1\r_offs\defsize, v24.8h |
-+ prfm PLDL1KEEP, [U, #64] |
-+ prfm PLDL1KEEP, [V, #64] |
-+ prfm PLDL1KEEP, [Y, #64] |
-+ sqxtun v1\b_offs\defsize, v28.8h |
-+ uaddw v6.8h, v2.8h, v4.8b /* v6.16b = u - 128 */ |
-+ uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */ |
-+ smull v20.4s, v6.4h, v1.4h[1] /* multiply by -11277 */ |
-+ smlal v20.4s, v8.4h, v1.4h[2] /* multiply by -23401 */ |
-+ smull2 v22.4s, v6.8h, v1.4h[1] /* multiply by -11277 */ |
-+ smlal2 v22.4s, v8.8h, v1.4h[2] /* multiply by -23401 */ |
-+ smull v24.4s, v8.4h, v1.4h[0] /* multiply by 22971 */ |
-+ smull2 v26.4s, v8.8h, v1.4h[0] /* multiply by 22971 */ |
-+.else /**************************** rgb565 ***********************************/ |
-+ sqshlu v21.8h, v20.8h, #8 |
-+ sqshlu v25.8h, v24.8h, #8 |
-+ sqshlu v29.8h, v28.8h, #8 |
-+ uaddw v6.8h, v2.8h, v4.8b /* v6.16b = u - 128 */ |
-+ uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */ |
-+ ld1 {v0.8b}, [Y], 8 |
-+ smull v20.4s, v6.4h, v1.4h[1] /* multiply by -11277 */ |
-+ smlal v20.4s, v8.4h, v1.4h[2] /* multiply by -23401 */ |
-+ smull2 v22.4s, v6.8h, v1.4h[1] /* multiply by -11277 */ |
-+ smlal2 v22.4s, v8.8h, v1.4h[2] /* multiply by -23401 */ |
-+ sri v25.8h, v21.8h, #5 |
-+ smull v24.4s, v8.4h, v1.4h[0] /* multiply by 22971 */ |
-+ smull2 v26.4s, v8.8h, v1.4h[0] /* multiply by 22971 */ |
-+ prfm PLDL1KEEP, [U, #64] |
-+ prfm PLDL1KEEP, [V, #64] |
-+ prfm PLDL1KEEP, [Y, #64] |
-+ sri v25.8h, v29.8h, #11 |
-+.endif |
-+ do_store \bpp, 8 |
-+ smull v28.4s, v6.4h, v1.4h[3] /* multiply by 29033 */ |
-+ smull2 v30.4s, v6.8h, v1.4h[3] /* multiply by 29033 */ |
-+.endm |
-+ |
-+.macro do_yuv_to_rgb |
-+ do_yuv_to_rgb_stage1 |
-+ do_yuv_to_rgb_stage2 |
-+.endm |
-+ |
-+/* Apple gas crashes on adrl, work around that by using adr. |
-+ * But this requires a copy of these constants for each function. |
++DLLEXPORT int DLLCALL tjCompress(tjhandle handle, unsigned char *srcBuf, |
++ int width, int pitch, int height, int pixelSize, unsigned char *dstBuf, |
++ unsigned long *compressedSize, int jpegSubsamp, int jpegQual, int flags); |
++ |
++DLLEXPORT int DLLCALL tjEncodeYUV(tjhandle handle, |
++ unsigned char *srcBuf, int width, int pitch, int height, int pixelSize, |
++ unsigned char *dstBuf, int subsamp, int flags); |
++ |
++DLLEXPORT int DLLCALL tjDecompressHeader(tjhandle handle, |
++ unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height); |
++ |
++DLLEXPORT int DLLCALL tjDecompress(tjhandle handle, |
++ unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, |
++ int width, int pitch, int height, int pixelSize, int flags); |
++ |
++ |
++/** |
++ * @} |
+ */ |
+ |
-+.balign 16 |
-+jsimd_ycc_\colorid\()_neon_consts: |
-+ .short 0, 0, 0, 0 |
-+ .short 22971, -11277, -23401, 29033 |
-+ .short -128, -128, -128, -128 |
-+ .short -128, -128, -128, -128 |
-+ |
-+asm_function jsimd_ycc_\colorid\()_convert_neon |
-+ OUTPUT_WIDTH .req x0 |
-+ INPUT_BUF .req x1 |
-+ INPUT_ROW .req x2 |
-+ OUTPUT_BUF .req x3 |
-+ NUM_ROWS .req x4 |
-+ |
-+ INPUT_BUF0 .req x5 |
-+ INPUT_BUF1 .req x6 |
-+ INPUT_BUF2 .req INPUT_BUF |
-+ |
-+ RGB .req x7 |
-+ Y .req x8 |
-+ U .req x9 |
-+ V .req x10 |
-+ N .req x15 |
-+ |
-+ sub sp, sp, 336 |
-+ str x15, [sp], 16 |
-+ /* Load constants to d1, d2, d3 (v0.4h is just used for padding) */ |
-+ adr x15, jsimd_ycc_\colorid\()_neon_consts |
-+ /* Save NEON registers */ |
-+ st1 {v0.8b - v3.8b}, [sp], 32 |
-+ st1 {v4.8b - v7.8b}, [sp], 32 |
-+ st1 {v8.8b - v11.8b}, [sp], 32 |
-+ st1 {v12.8b - v15.8b}, [sp], 32 |
-+ st1 {v16.8b - v19.8b}, [sp], 32 |
-+ st1 {v20.8b - v23.8b}, [sp], 32 |
-+ st1 {v24.8b - v27.8b}, [sp], 32 |
-+ st1 {v28.8b - v31.8b}, [sp], 32 |
-+ ld1 {v0.4h, v1.4h}, [x15], 16 |
-+ ld1 {v2.8h}, [x15] |
-+ |
-+ /* Save ARM registers and handle input arguments */ |
-+ /* push {x4, x5, x6, x7, x8, x9, x10, x30} */ |
-+ stp x4, x5, [sp], 16 |
-+ stp x6, x7, [sp], 16 |
-+ stp x8, x9, [sp], 16 |
-+ stp x10, x30, [sp], 16 |
-+ ldr INPUT_BUF0, [INPUT_BUF] |
-+ ldr INPUT_BUF1, [INPUT_BUF, 8] |
-+ ldr INPUT_BUF2, [INPUT_BUF, 16] |
-+ .unreq INPUT_BUF |
-+ |
-+ /* Initially set v10, v11.4h, v12.8b, d13 to 0xFF */ |
-+ movi v10.16b, #255 |
-+ movi v13.16b, #255 |
-+ |
-+ /* Outer loop over scanlines */ |
-+ cmp NUM_ROWS, #1 |
-+ blt 9f |
-+0: |
-+ lsl x16, INPUT_ROW, #3 |
-+ ldr Y, [INPUT_BUF0, x16] |
-+ ldr U, [INPUT_BUF1, x16] |
-+ mov N, OUTPUT_WIDTH |
-+ ldr V, [INPUT_BUF2, x16] |
-+ add INPUT_ROW, INPUT_ROW, #1 |
-+ ldr RGB, [OUTPUT_BUF], #8 |
-+ |
-+ /* Inner loop over pixels */ |
-+ subs N, N, #8 |
-+ blt 3f |
-+ do_load 8 |
-+ do_yuv_to_rgb_stage1 |
-+ subs N, N, #8 |
-+ blt 2f |
-+1: |
-+ do_yuv_to_rgb_stage2_store_load_stage1 |
-+ subs N, N, #8 |
-+ bge 1b |
-+2: |
-+ do_yuv_to_rgb_stage2 |
-+ do_store \bpp, 8 |
-+ tst N, #7 |
-+ beq 8f |
-+3: |
-+ tst N, #4 |
-+ beq 3f |
-+ do_load 4 |
-+3: |
-+ tst N, #2 |
-+ beq 4f |
-+ do_load 2 |
-+4: |
-+ tst N, #1 |
-+ beq 5f |
-+ do_load 1 |
-+5: |
-+ do_yuv_to_rgb |
-+ tst N, #4 |
-+ beq 6f |
-+ do_store \bpp, 4 |
-+6: |
-+ tst N, #2 |
-+ beq 7f |
-+ do_store \bpp, 2 |
-+7: |
-+ tst N, #1 |
-+ beq 8f |
-+ do_store \bpp, 1 |
-+8: |
-+ subs NUM_ROWS, NUM_ROWS, #1 |
-+ bgt 0b |
-+9: |
-+ /* Restore all registers and return */ |
-+ sub sp, sp, #336 |
-+ ldr x15, [sp], 16 |
-+ ld1 {v0.8b - v3.8b}, [sp], 32 |
-+ ld1 {v4.8b - v7.8b}, [sp], 32 |
-+ ld1 {v8.8b - v11.8b}, [sp], 32 |
-+ ld1 {v12.8b - v15.8b}, [sp], 32 |
-+ ld1 {v16.8b - v19.8b}, [sp], 32 |
-+ ld1 {v20.8b - v23.8b}, [sp], 32 |
-+ ld1 {v24.8b - v27.8b}, [sp], 32 |
-+ ld1 {v28.8b - v31.8b}, [sp], 32 |
-+ /* pop {r4, r5, r6, r7, r8, r9, r10, pc} */ |
-+ ldp x4, x5, [sp], 16 |
-+ ldp x6, x7, [sp], 16 |
-+ ldp x8, x9, [sp], 16 |
-+ ldp x10, x30, [sp], 16 |
-+ br x30 |
-+ .unreq OUTPUT_WIDTH |
-+ .unreq INPUT_ROW |
-+ .unreq OUTPUT_BUF |
-+ .unreq NUM_ROWS |
-+ .unreq INPUT_BUF0 |
-+ .unreq INPUT_BUF1 |
-+ .unreq INPUT_BUF2 |
-+ .unreq RGB |
-+ .unreq Y |
-+ .unreq U |
-+ .unreq V |
-+ .unreq N |
-+ |
-+.purgem do_yuv_to_rgb |
-+.purgem do_yuv_to_rgb_stage1 |
-+.purgem do_yuv_to_rgb_stage2 |
-+.purgem do_yuv_to_rgb_stage2_store_load_stage1 |
-+.endm |
-+ |
-+/*--------------------------------- id ----- bpp R rsize G gsize B bsize defsize */ |
-+generate_jsimd_ycc_rgb_convert_neon extrgb, 24, 0, .4h, 1, .4h, 2, .4h, .8b |
-+generate_jsimd_ycc_rgb_convert_neon extbgr, 24, 2, .4h, 1, .4h, 0, .4h, .8b |
-+generate_jsimd_ycc_rgb_convert_neon extrgbx, 32, 0, .4h, 1, .4h, 2, .4h, .8b |
-+generate_jsimd_ycc_rgb_convert_neon extbgrx, 32, 2, .4h, 1, .4h, 0, .4h, .8b |
-+generate_jsimd_ycc_rgb_convert_neon extxbgr, 32, 3, .4h, 2, .4h, 1, .4h, .8b |
-+generate_jsimd_ycc_rgb_convert_neon extxrgb, 32, 1, .4h, 2, .4h, 3, .4h, .8b |
-+generate_jsimd_ycc_rgb_convert_neon rgb565, 16, 0, .4h, 0, .4h, 0, .4h, .8b |
-+.purgem do_load |
-+.purgem do_store |
+ #ifdef __cplusplus |
+ } |
+ #endif |
++ |
++#endif |
+Index: turbojpegl.c |
+=================================================================== |
+--- turbojpegl.c (revision 829) |
++++ turbojpegl.c (working copy) |
+@@ -149,6 +149,10 @@ |
+ #error "TurboJPEG requires JPEG colorspace extensions" |
+ #endif |
+ |
++ if(flags&TJ_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); |
++ else if(flags&TJ_FORCESSE) putenv("JSIMD_FORCESSE=1"); |
++ else if(flags&TJ_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); |
++ |
+ if(setjmp(j->jerr.jb)) |
+ { // this will execute if LIBJPEG has an error |
+ if(row_pointer) free(row_pointer); |
+@@ -188,7 +192,8 @@ |
+ j->cinfo.image_height-j->cinfo.next_scanline); |
+ } |
+ jpeg_finish_compress(&j->cinfo); |
+- *size=TJBUFSIZE(j->cinfo.image_width, j->cinfo.image_height)-(j->jdms.free_in_buffer); |
++ *size=TJBUFSIZE(j->cinfo.image_width, j->cinfo.image_height) |
++ -(unsigned long)(j->jdms.free_in_buffer); |
+ |
+ if(row_pointer) free(row_pointer); |
+ return 0; |
+@@ -287,6 +292,10 @@ |
+ |
+ if(pitch==0) pitch=width*ps; |
+ |
++ if(flags&TJ_FORCEMMX) putenv("JSIMD_FORCEMMX=1"); |
++ else if(flags&TJ_FORCESSE) putenv("JSIMD_FORCESSE=1"); |
++ else if(flags&TJ_FORCESSE2) putenv("JSIMD_FORCESSE2=1"); |
++ |
+ if(setjmp(j->jerr.jb)) |
+ { // this will execute if LIBJPEG has an error |
+ if(row_pointer) free(row_pointer); |
+Index: wrppm.c |
+=================================================================== |
+--- wrppm.c (revision 829) |
++++ wrppm.c (working copy) |
+@@ -2,6 +2,7 @@ |
+ * wrppm.c |
+ * |
+ * Copyright (C) 1991-1996, Thomas G. Lane. |
++ * Modified 2009 by Guido Vollbeding. |
+ * This file is part of the Independent JPEG Group's software. |
+ * For conditions of distribution and use, see the accompanying README file. |
+ * |
+@@ -40,11 +41,11 @@ |
+ #define BYTESPERSAMPLE 1 |
+ #define PPM_MAXVAL 255 |
+ #else |
+-/* The word-per-sample format always puts the LSB first. */ |
++/* The word-per-sample format always puts the MSB first. */ |
+ #define PUTPPMSAMPLE(ptr,v) \ |
+ { register int val_ = v; \ |
++ *ptr++ = (char) ((val_ >> 8) & 0xFF); \ |
+ *ptr++ = (char) (val_ & 0xFF); \ |
+- *ptr++ = (char) ((val_ >> 8) & 0xFF); \ |
+ } |
+ #define BYTESPERSAMPLE 2 |
+ #define PPM_MAXVAL ((1<<BITS_IN_JSAMPLE)-1) |